diff options
author | dec05eba <dec05eba@protonmail.com> | 2021-07-03 16:23:36 +0200 |
---|---|---|
committer | dec05eba <dec05eba@protonmail.com> | 2021-07-03 18:34:37 +0200 |
commit | 611d22bf269672ba56f98e12eb6b2a40efdaa5b9 (patch) | |
tree | 5dfd3e98fd08fa7cb6cb82c565b538cc891b6b98 /src/plugins/MediaGeneric.cpp | |
parent | 496f71413df2468a9d3329355ffef08280219808 (diff) |
Remove dependency on tidy, fix ph, support all 4chan markup
Go back to previous page when failing to fetch number of pages
Diffstat (limited to 'src/plugins/MediaGeneric.cpp')
-rw-r--r-- | src/plugins/MediaGeneric.cpp | 47 |
1 files changed, 26 insertions, 21 deletions
diff --git a/src/plugins/MediaGeneric.cpp b/src/plugins/MediaGeneric.cpp index 1f2389a..c829a33 100644 --- a/src/plugins/MediaGeneric.cpp +++ b/src/plugins/MediaGeneric.cpp @@ -3,15 +3,20 @@ #include <quickmedia/HtmlSearch.h> namespace QuickMedia { - using HtmlPathCallback = std::function<void(QuickMediaHtmlNode*)>; + static bool string_view_contains(const QuickMediaStringView str, const char *sub) { + return memmem(str.data, str.size, sub, strlen(sub)); + } + + using HtmlPathCallback = std::function<void(QuickMediaMatchNode*)>; static int quickmedia_html_find_nodes_xpath(QuickMediaHtmlSearch *self, const char *xpath, HtmlPathCallback callback) { - return quickmedia_html_find_nodes_xpath(self, xpath, [](QuickMediaHtmlNode *node, void *userdata) { + return quickmedia_html_find_nodes_xpath(self, xpath, [](QuickMediaMatchNode *node, void *userdata) { HtmlPathCallback *callback = (HtmlPathCallback*)userdata; (*callback)(node); + return 0; }, &callback); } - static const char* html_attr_or_inner_text(QuickMediaHtmlNode *node, const char *field_name) { + static QuickMediaStringView html_attr_or_inner_text(QuickMediaMatchNode *node, const char *field_name) { if(strcmp(field_name, "text") == 0) return quickmedia_html_node_get_text(node); else @@ -32,13 +37,13 @@ namespace QuickMedia { } } - static PluginResult fetch_page_results(const std::string &url, const std::string &website_url, const std::vector<MediaTextQuery> &text_queries, const std::vector<MediaThumbnailQuery> &thumbnail_queries, MediaRelatedCustomHandler *custom_handler, BodyItems &result_items) { + static PluginResult fetch_page_results(const std::string &url, const std::string &website_url, const std::vector<MediaTextQuery> &text_queries, const std::vector<MediaThumbnailQuery> &thumbnail_queries, MediaRelatedCustomHandler *custom_handler, BodyItems &result_items, bool cloudflare_bypass) { std::vector<CommandArg> args; if(!website_url.empty()) args.push_back({ "-H", "referer: " + website_url }); std::string website_data; - if(download_to_string(url, website_data, args, true, true, true) != DownloadResult::OK) + if(download_to_string(url, website_data, args, true, true, cloudflare_bypass) != DownloadResult::OK) return PluginResult::NET_ERR; if(website_data.empty()) @@ -47,7 +52,7 @@ namespace QuickMedia { if(custom_handler && *custom_handler) { std::vector<MediaRelatedItem> media_related_items = (*custom_handler)(website_data); for(MediaRelatedItem &media_related_item : media_related_items) { - auto body_item = BodyItem::create(strip(media_related_item.title)); + auto body_item = BodyItem::create(media_related_item.title); body_item->url = std::move(media_related_item.url); body_item->thumbnail_url = std::move(media_related_item.thumbnail_url); result_items.push_back(std::move(body_item)); @@ -57,7 +62,7 @@ namespace QuickMedia { } QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size()); if(result != 0) goto cleanup; @@ -68,14 +73,14 @@ namespace QuickMedia { goto cleanup; } - result = quickmedia_html_find_nodes_xpath(&html_search, text_query.html_query, [&text_query, &result_items](QuickMediaHtmlNode *node) { - const char *title_value = html_attr_or_inner_text(node, text_query.title_field); - const char *url_value = html_attr_or_inner_text(node, text_query.url_field); - if(title_value && url_value && (!text_query.url_contains || strstr(url_value, text_query.url_contains))) { - std::string field1_fixed = strip(title_value); + result = quickmedia_html_find_nodes_xpath(&html_search, text_query.html_query, [&text_query, &result_items](QuickMediaMatchNode *node) { + QuickMediaStringView title_value = html_attr_or_inner_text(node, text_query.title_field); + QuickMediaStringView url_value = html_attr_or_inner_text(node, text_query.url_field); + if(title_value.data && url_value.data && (!text_query.url_contains || string_view_contains(url_value, text_query.url_contains))) { + std::string field1_fixed(title_value.data, title_value.size); html_unescape_sequences(field1_fixed); auto item = BodyItem::create(std::move(field1_fixed)); - item->url = strip(url_value); + item->url.assign(url_value.data, url_value.size); result_items.push_back(std::move(item)); } }); @@ -87,10 +92,10 @@ namespace QuickMedia { assert(thumbnail_query.html_query && thumbnail_query.field_name); if(thumbnail_query.html_query && thumbnail_query.field_name) { size_t index = 0; - result = quickmedia_html_find_nodes_xpath(&html_search, thumbnail_query.html_query, [&thumbnail_query, &result_items, &index](QuickMediaHtmlNode *node) { - const char *field_value = html_attr_or_inner_text(node, thumbnail_query.field_name); - if(index < result_items.size() && field_value && (!thumbnail_query.field_contains || strstr(field_value, thumbnail_query.field_contains))) { - result_items[index]->thumbnail_url = strip(field_value); + result = quickmedia_html_find_nodes_xpath(&html_search, thumbnail_query.html_query, [&thumbnail_query, &result_items, &index](QuickMediaMatchNode *node) { + QuickMediaStringView field_value = html_attr_or_inner_text(node, thumbnail_query.field_name); + if(index < result_items.size() && field_value.data && (!thumbnail_query.field_contains || string_view_contains(field_value, thumbnail_query.field_contains))) { + result_items[index]->thumbnail_url.assign(field_value.data, field_value.size); ++index; } }); @@ -111,8 +116,8 @@ namespace QuickMedia { } } - MediaGenericSearchPage::MediaGenericSearchPage(Program *program, const char *website_url, sf::Vector2i thumbnail_max_size) : - Page(program), website_url(website_url ? website_url : ""), thumbnail_max_size(thumbnail_max_size) + MediaGenericSearchPage::MediaGenericSearchPage(Program *program, const char *website_url, sf::Vector2i thumbnail_max_size, bool cloudflare_bypass) : + Page(program), website_url(website_url ? website_url : ""), thumbnail_max_size(thumbnail_max_size), cloudflare_bypass(cloudflare_bypass) { if(!this->website_url.empty()) { if(this->website_url.back() != '/') @@ -128,7 +133,7 @@ namespace QuickMedia { std::string url = search_query.search_template; string_replace_all(url, "%s", url_param_encode(str)); string_replace_all(url, "%p", std::to_string(search_query.page_start + page)); - return fetch_page_results(url, website_url, text_queries, thumbnail_queries, nullptr, result_items); + return fetch_page_results(url, website_url, text_queries, thumbnail_queries, nullptr, result_items, cloudflare_bypass); } PluginResult MediaGenericSearchPage::submit(const std::string&, const std::string &url, std::vector<Tab> &result_tabs) { @@ -137,7 +142,7 @@ namespace QuickMedia { } PluginResult MediaGenericSearchPage::get_related_media(const std::string &url, BodyItems &result_items) { - return fetch_page_results(url, website_url, related_media_text_queries, related_media_thumbnail_queries, &related_custom_handler, result_items); + return fetch_page_results(url, website_url, related_media_text_queries, related_media_thumbnail_queries, &related_custom_handler, result_items, cloudflare_bypass); } MediaGenericSearchPage& MediaGenericSearchPage::search_handler(const char *search_template, int page_start) { |