aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/MediaGeneric.cpp
blob: 0b6955672287757c78e4b81caa5cc8fb286d49c4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#include "../../plugins/MediaGeneric.hpp"
#include "../../include/StringUtils.hpp"
#include <quickmedia/HtmlSearch.h>

namespace QuickMedia {
    static bool string_view_contains(const QuickMediaStringView str, const char *sub) {
        return memmem(str.data, str.size, sub, strlen(sub));
    }

    using HtmlPathCallback = std::function<void(QuickMediaMatchNode*)>;
    static int quickmedia_html_find_nodes_xpath(QuickMediaHtmlSearch *self, const char *xpath, HtmlPathCallback callback) {
        return quickmedia_html_find_nodes_xpath(self, xpath, [](QuickMediaMatchNode *node, void *userdata) {
            HtmlPathCallback *callback = (HtmlPathCallback*)userdata;
            (*callback)(node);
            return 0;
        }, &callback);
    }

    static QuickMediaStringView html_attr_or_inner_text(QuickMediaMatchNode *node, const char *field_name) {
        if(strcmp(field_name, "text") == 0)
            return quickmedia_html_node_get_text(node);
        else
            return quickmedia_html_node_get_attribute_value(node, field_name);
    }

    static void body_items_prepend_website_url(BodyItems &body_items, const std::string &website_url) {
        for(auto &body_item : body_items) {
            if(string_starts_with(body_item->url, "//"))
                body_item->url = "https://" + body_item->url.substr(2);
            else if(string_starts_with(body_item->url, "/"))
                body_item->url = website_url + body_item->url.substr(1);

            if(string_starts_with(body_item->thumbnail_url, "//"))
                body_item->thumbnail_url = "https://" + body_item->thumbnail_url.substr(2);
            else if(string_starts_with(body_item->thumbnail_url, "/"))
                body_item->thumbnail_url = website_url + body_item->thumbnail_url.substr(1);
        }
    }

    static PluginResult fetch_page_results(const std::string &url, const std::string &website_url, const std::vector<MediaTextQuery> &text_queries, const std::vector<MediaThumbnailQuery> &thumbnail_queries, mgl::vec2i thumbnail_max_size, MediaRelatedCustomHandler *custom_handler, BodyItems &result_items, bool cloudflare_bypass, const std::vector<CommandArg> &extra_commands) {
        std::vector<CommandArg> args = extra_commands;
        if(!website_url.empty())
            args.push_back({ "-H", "referer: " + website_url });

        std::string website_data;
        if(download_to_string(url, website_data, args, true, true, cloudflare_bypass) != DownloadResult::OK)
            return PluginResult::NET_ERR;

        if(website_data.empty())
            return PluginResult::OK;

        if(custom_handler && *custom_handler) {
            std::vector<MediaRelatedItem> media_related_items = (*custom_handler)(website_data);
            for(MediaRelatedItem &media_related_item : media_related_items) {
                auto body_item = BodyItem::create(media_related_item.title);
                body_item->url = std::move(media_related_item.url);
                body_item->thumbnail_url = std::move(media_related_item.thumbnail_url);
                body_item->thumbnail_size = thumbnail_max_size;
                result_items.push_back(std::move(body_item));
            }
            body_items_prepend_website_url(result_items, website_url);
            return PluginResult::OK;
        }

        QuickMediaHtmlSearch html_search;
        int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size());
        if(result != 0)
            goto cleanup;

        for(const MediaTextQuery &text_query : text_queries) {
            if(!text_query.html_query || !text_query.title_field) {
                assert(false);
                result = -1;
                goto cleanup;
            }

            result = quickmedia_html_find_nodes_xpath(&html_search, text_query.html_query, [&text_query, &result_items](QuickMediaMatchNode *node) {
                QuickMediaStringView title_value = html_attr_or_inner_text(node, text_query.title_field);
                QuickMediaStringView url_value = html_attr_or_inner_text(node, text_query.url_field);
                if(title_value.data && url_value.data && (!text_query.url_contains || string_view_contains(url_value, text_query.url_contains))) {
                    std::string field1_fixed(title_value.data, title_value.size);
                    html_unescape_sequences(field1_fixed);
                    auto item = BodyItem::create(std::move(field1_fixed));
                    item->url.assign(url_value.data, url_value.size);
                    result_items.push_back(std::move(item));
                }
            });
            if(result != 0)
                goto cleanup;
        }

        for(const MediaThumbnailQuery &thumbnail_query : thumbnail_queries) {
            assert(thumbnail_query.html_query && thumbnail_query.field_name);
            if(thumbnail_query.html_query && thumbnail_query.field_name) {
                size_t index = 0;
                result = quickmedia_html_find_nodes_xpath(&html_search, thumbnail_query.html_query, [&thumbnail_query, &result_items, &index, thumbnail_max_size](QuickMediaMatchNode *node) {
                    QuickMediaStringView field_value = html_attr_or_inner_text(node, thumbnail_query.field_name);
                    if(index < result_items.size() && field_value.data && (!thumbnail_query.field_contains || string_view_contains(field_value, thumbnail_query.field_contains))) {
                        result_items[index]->thumbnail_url.assign(field_value.data, field_value.size);
                        result_items[index]->thumbnail_size = thumbnail_max_size;
                        ++index;
                    }
                });
                if(result != 0)
                    goto cleanup;
            }
        }

        body_items_prepend_website_url(result_items, website_url);

        cleanup:
        quickmedia_html_search_deinit(&html_search);
        if(result == 0) {
            return PluginResult::OK;
        } else {
            result_items.clear();
            return PluginResult::ERR;
        }
    }

    MediaGenericSearchPage::MediaGenericSearchPage(Program *program, const char *website_url, mgl::vec2i thumbnail_max_size, bool cloudflare_bypass, std::vector<CommandArg> extra_commands) :
        Page(program), website_url(website_url ? website_url : ""), thumbnail_max_size(thumbnail_max_size), cloudflare_bypass(cloudflare_bypass), extra_commands(std::move(extra_commands))
    {
        if(!this->website_url.empty()) {
            if(this->website_url.back() != '/')
                this->website_url.push_back('/');
        }
    }

    SearchResult MediaGenericSearchPage::search(const std::string &str, BodyItems &result_items) {
        return plugin_result_to_search_result(get_page(str, 0, result_items));
    }

    PluginResult MediaGenericSearchPage::get_page(const std::string &str, int page, BodyItems &result_items) {
        std::string url = search_query.search_template;
        string_replace_all(url, "%s", url_param_encode(str));
        string_replace_all(url, "%p", std::to_string(search_query.page_start + page));
        return fetch_page_results(url, website_url, text_queries, thumbnail_queries, thumbnail_max_size, nullptr, result_items, cloudflare_bypass, extra_commands);
    }

    PluginResult MediaGenericSearchPage::submit(const SubmitArgs &args, std::vector<Tab> &result_tabs) {
        result_tabs.push_back(Tab{nullptr, std::make_unique<MediaGenericVideoPage>(program, this, args.url), nullptr});
        return PluginResult::OK;
    }

    PluginResult MediaGenericSearchPage::get_related_media(const std::string &url, BodyItems &result_items) {
        return fetch_page_results(url, website_url, related_media_text_queries, related_media_thumbnail_queries, thumbnail_max_size, &related_custom_handler, result_items, cloudflare_bypass, extra_commands);
    }

    MediaGenericSearchPage& MediaGenericSearchPage::search_handler(const char *search_template, int page_start) {
        search_query.search_template = search_template;
        search_query.page_start = page_start;
        return *this;
    }

    MediaGenericSearchPage& MediaGenericSearchPage::text_handler(std::vector<MediaTextQuery> queries) {
        text_queries = std::move(queries);
        return *this;
    }

    MediaGenericSearchPage& MediaGenericSearchPage::thumbnail_handler(std::vector<MediaThumbnailQuery> queries) {
        thumbnail_queries = std::move(queries);
        return *this;
    }

    MediaGenericSearchPage& MediaGenericSearchPage::related_media_text_handler(std::vector<MediaTextQuery> queries) {
        related_media_text_queries = std::move(queries);
        related_custom_handler = nullptr;
        return *this;
    }

    MediaGenericSearchPage& MediaGenericSearchPage::related_media_thumbnail_handler(std::vector<MediaThumbnailQuery> queries) {
        related_media_thumbnail_queries = std::move(queries);
        related_custom_handler = nullptr;
        return *this;
    }

    MediaGenericSearchPage& MediaGenericSearchPage::related_media_custom_handler(MediaRelatedCustomHandler handler) {
        related_custom_handler = handler;
        related_media_text_queries.clear();
        related_media_thumbnail_queries.clear();
        return *this;
    }

    PluginResult MediaGenericRelatedPage::submit(const SubmitArgs &args, std::vector<Tab> &result_tabs) {
        result_tabs.push_back(Tab{nullptr, std::make_unique<MediaGenericVideoPage>(program, search_page, args.url), nullptr});
        return PluginResult::OK;
    }

    BodyItems MediaGenericVideoPage::get_related_media(const std::string &url) {
        BodyItems result_items;
        search_page->get_related_media(url, result_items);
        return result_items;
    }

    PluginResult MediaGenericVideoPage::get_related_pages(const BodyItems &related_videos, const std::string&, std::vector<Tab> &result_tabs) {
        auto related_page_body = create_body(false, true);
        related_page_body->set_items(related_videos);
        result_tabs.push_back(Tab{std::move(related_page_body), std::make_unique<MediaGenericRelatedPage>(program, search_page), create_search_bar("Search...", SEARCH_DELAY_FILTER)});
        return PluginResult::OK;
    }
}