aboutsummaryrefslogtreecommitdiff
path: root/include/quickmedia/HtmlSearch.h
blob: 8ce7ff4199500c9f6de23cd8fe2e7c49bd53de58 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#ifndef QUICKMEDIA_HTML_SEARCH_H
#define QUICKMEDIA_HTML_SEARCH_H

#include "NodeSearch.h"
#include <HtmlParser.h>
#include <stddef.h>

#ifdef __cplusplus
extern "C" {
#endif

typedef struct QuickMediaHtmlAttribute QuickMediaHtmlAttribute;
typedef struct QuickMediaHtmlNode QuickMediaHtmlNode;
typedef struct QuickMediaHtmlChildNode QuickMediaHtmlChildNode;

typedef struct {
    char *data;
    size_t size;
    size_t capacity;
} QuickMediaString;

struct QuickMediaHtmlAttribute {
    QuickMediaStringView key;
    QuickMediaStringView value;
    QuickMediaHtmlAttribute *next;
};

struct QuickMediaHtmlNode {
    int is_tag; /* 0 = text, 1 = tag */
    QuickMediaStringView name; /* name if the node is a tag, text if the node is a text */
    QuickMediaHtmlAttribute *first_attribute;
    QuickMediaHtmlAttribute *last_attribute;
    QuickMediaHtmlChildNode *first_child;
    QuickMediaHtmlChildNode *last_child;
    QuickMediaHtmlNode *parent;
};

struct QuickMediaHtmlChildNode {
    QuickMediaHtmlNode node;
    QuickMediaHtmlChildNode *next;
};

typedef struct {
    QuickMediaHtmlNode *node;
    QuickMediaString __str;
} QuickMediaMatchNode;

typedef struct {
    QuickMediaHtmlNode root_node;
} QuickMediaHtmlSearch;

/*
    Returns an empty string view if attribute doesn't exist or if it doesn't have any value.
    The result is only valid within the callback function scope.
    The result is stripped of whitespace on the left and right side.
    Case insensitive search.
*/
QuickMediaStringView quickmedia_html_node_get_attribute_value(QuickMediaHtmlNode *self, const char *attribute_name);

/*
    Returns NULL if not found.
    The result is only valid within the callback function scope.
    This function is not recursive.
    Case insensitive search.
*/
QuickMediaHtmlNode* quickmedia_html_node_find_child(QuickMediaHtmlNode *self, const char *tag_name, const char *attribute_name, const char *attribute_value);

/*
    Returns an empty string if the node doesn't have any text or if there was an error creating the text.
    The result is only valid within the callback function scope.
    The result is stripped of whitespace on the left and right side.
*/
QuickMediaStringView quickmedia_html_node_get_text(QuickMediaMatchNode *self);

/* @node is only valid within the callback function scope. Return 0 to continue */
typedef int (*QuickMediaHtmlSearchResultCallback)(QuickMediaMatchNode *node, void *userdata);

/* |html_source| should be in utf8 format and may contain utf8 BOM. |html_source| has to be valid until |quickmedia_html_search_deinit| is called. */
int quickmedia_html_search_init(QuickMediaHtmlSearch *self, const char *html_source, size_t size);
void quickmedia_html_search_deinit(QuickMediaHtmlSearch *self);

/* Non-standard xpath. Doesn't use '@' symbol for accessing properties. Returns non-0 value if there is a syntax error in the xpath */
int quickmedia_html_find_nodes_xpath(QuickMediaHtmlSearch *self, const char *xpath, QuickMediaHtmlSearchResultCallback result_callback, void *userdata);

#ifdef __cplusplus
}
#endif

#endif