aboutsummaryrefslogtreecommitdiff
path: root/include/quickmedia/HtmlSearch.h
blob: 63f21758728e2f26fc9a5b79a2a2dfe8a09b4b7f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#ifndef QUICKMEDIA_HTML_SEARCH_H
#define QUICKMEDIA_HTML_SEARCH_H

#include "NodeSearch.h"
#include <HtmlParser.h>
#include <stddef.h>

#ifdef __cplusplus
extern "C" {
#endif

typedef struct QuickMediaHtmlAttribute QuickMediaHtmlAttribute;
typedef struct QuickMediaHtmlNode QuickMediaHtmlNode;
typedef struct QuickMediaHtmlChildNode QuickMediaHtmlChildNode;
typedef struct QuickMediaTextNode QuickMediaTextNode;

typedef struct {
    char *data;
    size_t size;
    size_t capacity;
} QuickMediaString;

struct QuickMediaHtmlAttribute {
    QuickMediaStringView key;
    QuickMediaStringView value;
    QuickMediaHtmlAttribute *next;
};

struct QuickMediaHtmlNode {
    int is_tag; /* 0 = text, 1 = tag */
    QuickMediaStringView name; /* name if the node is a tag, text if the node is a text */
    QuickMediaHtmlAttribute *first_attribute;
    QuickMediaHtmlAttribute *last_attribute;
    QuickMediaHtmlChildNode *first_child;
    QuickMediaHtmlChildNode *last_child;
    QuickMediaHtmlNode *parent;
};

struct QuickMediaHtmlChildNode {
    QuickMediaHtmlNode node;
    QuickMediaHtmlChildNode *next;
};

typedef struct {
    QuickMediaHtmlNode *node;
    QuickMediaString __str;
} QuickMediaMatchNode;

typedef struct {
    QuickMediaHtmlNode root_node;
} QuickMediaHtmlSearch;

/*
    Returns an empty string view if attribute doesn't exist or if it doesn't have any value.
    The result is only valid within the callback function scope.
*/
QuickMediaStringView quickmedia_html_node_get_attribute_value(QuickMediaMatchNode *self, const char *attribute_name);

/*
    Returns an empty string if the node doesn't have any text or if there was an error creating the text.
    The result is only valid within the callback function scope.
*/
QuickMediaStringView quickmedia_html_node_get_text(QuickMediaMatchNode *self);

/* @node is only valid within the callback function scope. Return 0 to continue */
typedef int (*QuickMediaHtmlSearchResultCallback)(QuickMediaMatchNode *node, void *userdata);

/* |html_source| should be in utf8 format and may contain utf8 BOM */
int quickmedia_html_search_init(QuickMediaHtmlSearch *self, const char *html_source, size_t size);
void quickmedia_html_search_deinit(QuickMediaHtmlSearch *self);

/* Non-standard xpath. Doesn't use '@' symbol for accessing properties. Returns non-0 value if there is a syntax error in the xpath */
int quickmedia_html_find_nodes_xpath(QuickMediaHtmlSearch *self, const char *xpath, QuickMediaHtmlSearchResultCallback result_callback, void *userdata);

#ifdef __cplusplus
}
#endif

#endif