From 611d22bf269672ba56f98e12eb6b2a40efdaa5b9 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 3 Jul 2021 16:23:36 +0200 Subject: Remove dependency on tidy, fix ph, support all 4chan markup Go back to previous page when failing to fetch number of pages --- src/plugins/MangaGeneric.cpp | 84 +++++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 37 deletions(-) (limited to 'src/plugins/MangaGeneric.cpp') diff --git a/src/plugins/MangaGeneric.cpp b/src/plugins/MangaGeneric.cpp index 2d0df6a..a2608ab 100644 --- a/src/plugins/MangaGeneric.cpp +++ b/src/plugins/MangaGeneric.cpp @@ -43,7 +43,11 @@ namespace QuickMedia { const char *field_contains = nullptr; }; - static const char* html_attr_or_inner_text(QuickMediaHtmlNode *node, const char *field_name) { + static bool string_view_contains(const QuickMediaStringView str, const char *sub) { + return memmem(str.data, str.size, sub, strlen(sub)); + } + + static QuickMediaStringView html_attr_or_inner_text(QuickMediaMatchNode *node, const char *field_name) { if(strcmp(field_name, "text") == 0) return quickmedia_html_node_get_text(node); else @@ -66,59 +70,62 @@ namespace QuickMedia { static int html_append_search(QuickMediaHtmlSearch *html_search, const char *html_query, HtmlSearchUserdata *search_userdata) { return quickmedia_html_find_nodes_xpath(html_search, html_query, - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { HtmlSearchUserdata *search_userdata = (HtmlSearchUserdata*)userdata; - const char *field1_value = html_attr_or_inner_text(node, search_userdata->field1); + QuickMediaStringView field1_value = html_attr_or_inner_text(node, search_userdata->field1); if(search_userdata->field2) { - const char *field2_value = html_attr_or_inner_text(node, search_userdata->field2); - if(field1_value && field2_value && (!search_userdata->field2_contains || strstr(field2_value, search_userdata->field2_contains))) { - std::string field1_fixed = strip(field1_value); + QuickMediaStringView field2_value = html_attr_or_inner_text(node, search_userdata->field2); + if(field1_value.data && field2_value.data && (!search_userdata->field2_contains || string_view_contains(field2_value, search_userdata->field2_contains))) { + std::string field1_fixed(field1_value.data, field1_value.size); html_unescape_sequences(field1_fixed); auto item = BodyItem::create(std::move(field1_fixed)); - item->url = strip(field2_value); + item->url = std::string(field2_value.data, field2_value.size); search_userdata->body_items->push_back(std::move(item)); } } else { - if(field1_value) { - std::string field1_fixed = strip(field1_value); + if(field1_value.data) { + std::string field1_fixed(field1_value.data, field1_value.size); html_unescape_sequences(field1_fixed); auto item = BodyItem::create(std::move(field1_fixed)); search_userdata->body_items->push_back(std::move(item)); } } + return 0; }, search_userdata); } static int html_body_item_merge(QuickMediaHtmlSearch *html_search, const char *html_query, HtmlMergeUserdata *merge_userdata) { return quickmedia_html_find_nodes_xpath(html_search, html_query, - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { HtmlMergeUserdata *merge_userdata = (HtmlMergeUserdata*)userdata; BodyItemContext &body_item_image_context = merge_userdata->body_item_image_context; - const char *field_value = html_attr_or_inner_text(node, merge_userdata->field_name); + QuickMediaStringView field_value = html_attr_or_inner_text(node, merge_userdata->field_name); if(body_item_image_context.index < body_item_image_context.body_items->size() - && field_value && (!merge_userdata->field_contains || strstr(field_value, merge_userdata->field_contains))) + && field_value.data && (!merge_userdata->field_contains || string_view_contains(field_value, merge_userdata->field_contains))) { + std::string field_stripped(field_value.data, field_value.size); if(merge_userdata->type == MergeType::THUMBNAIL) { - (*body_item_image_context.body_items)[body_item_image_context.index]->thumbnail_url = strip(field_value); + (*body_item_image_context.body_items)[body_item_image_context.index]->thumbnail_url = std::move(field_stripped); } else if(merge_userdata->type == MergeType::DESCRIPTION) { - std::string field_stripped = strip(field_value); const char *prefix = merge_userdata->desc_prefix ? merge_userdata->desc_prefix : ""; - (*body_item_image_context.body_items)[body_item_image_context.index]->set_description(prefix + field_stripped); + (*body_item_image_context.body_items)[body_item_image_context.index]->set_description(prefix + std::move(field_stripped)); (*body_item_image_context.body_items)[body_item_image_context.index]->set_description_color(get_current_theme().faded_text_color); } body_item_image_context.index++; } + return 0; }, merge_userdata); } static int html_get_page_url(QuickMediaHtmlSearch *html_search, const char *html_query, HtmlPageImageUserdata *page_image_userdata) { return quickmedia_html_find_nodes_xpath(html_search, html_query, - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { HtmlPageImageUserdata *page_image_userdata = (HtmlPageImageUserdata*)userdata; - const char *field1_value = html_attr_or_inner_text(node, page_image_userdata->field_name); - if(page_image_userdata->url->empty() && field1_value && (!page_image_userdata->field_contains || strstr(field1_value, page_image_userdata->field_contains))) { - *page_image_userdata->url = strip(field1_value); + QuickMediaStringView field1_value = html_attr_or_inner_text(node, page_image_userdata->field_name); + if(page_image_userdata->url->empty() && field1_value.data && (!page_image_userdata->field_contains || string_view_contains(field1_value, page_image_userdata->field_contains))) { + *page_image_userdata->url = std::string(field1_value.data, field1_value.size); } + return 0; }, page_image_userdata); } @@ -170,7 +177,7 @@ namespace QuickMedia { return PluginResult::OK; QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size()); if(result != 0) goto cleanup; @@ -306,7 +313,7 @@ namespace QuickMedia { return PluginResult::NET_ERR; QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size()); if(result != 0) goto cleanup; @@ -338,12 +345,13 @@ namespace QuickMedia { authors_userdata.authors_query = &authors_query; quickmedia_html_find_nodes_xpath(&html_search, authors_query.html_query, - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { HtmlAuthorsUserdata *authors_userdata = (HtmlAuthorsUserdata*)userdata; - const char *title_value = html_attr_or_inner_text(node, authors_userdata->authors_query->title_field); - const char *url_value = html_attr_or_inner_text(node, authors_userdata->authors_query->url_field); - if(title_value && url_value && (!authors_userdata->authors_query->url_contains || strstr(url_value, authors_userdata->authors_query->url_contains))) - (*authors_userdata->creators)[strip(title_value)] = strip(url_value); + QuickMediaStringView title_value = html_attr_or_inner_text(node, authors_userdata->authors_query->title_field); + QuickMediaStringView url_value = html_attr_or_inner_text(node, authors_userdata->authors_query->url_field); + if(title_value.data && url_value.data && (!authors_userdata->authors_query->url_contains || string_view_contains(url_value, authors_userdata->authors_query->url_contains))) + (*authors_userdata->creators)[std::string(title_value.data, title_value.size)] = std::string(url_value.data, url_value.size); + return 0; }, &authors_userdata); } } @@ -471,19 +479,20 @@ namespace QuickMedia { return ImageResult::NET_ERR; QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size()); if(result != 0) goto cleanup; result = quickmedia_html_find_nodes_xpath(&html_search, list_page_pagination_query->pages_html_query, - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { HtmlPageCountUserdata *page_count_userdata = (HtmlPageCountUserdata*)userdata; - const char *field1_value = html_attr_or_inner_text(node, page_count_userdata->field_name); - if(field1_value) { - std::string field_value_stripped = strip(field1_value); + QuickMediaStringView field1_value = html_attr_or_inner_text(node, page_count_userdata->field_name); + if(field1_value.data) { + std::string field_value_stripped(field1_value.data, field1_value.size); if(is_number(field_value_stripped.c_str())) page_count_userdata->num_pages = strtol(field_value_stripped.c_str(), nullptr, 10); } + return 0; }, &page_count_userdata); if(result != 0 || page_count_userdata.num_pages == 0) { @@ -583,7 +592,7 @@ namespace QuickMedia { return ImageResult::ERR; QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size()); if(result != 0) goto cleanup; @@ -642,17 +651,18 @@ namespace QuickMedia { list_page_images_userdata.field_contains = list_page_images_query->field_contains; QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size()); if(result != 0) goto cleanup; result = quickmedia_html_find_nodes_xpath(&html_search, list_page_images_query->html_query, - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { HtmlListPageImagesUserdata *list_page_images_userdata = (HtmlListPageImagesUserdata*)userdata; - const char *field1_value = html_attr_or_inner_text(node, list_page_images_userdata->field_name); - if(field1_value && (!list_page_images_userdata->field_contains || strstr(field1_value, list_page_images_userdata->field_contains))) { - list_page_images_userdata->urls->push_back(strip(field1_value)); + QuickMediaStringView field1_value = html_attr_or_inner_text(node, list_page_images_userdata->field_name); + if(field1_value.data && (!list_page_images_userdata->field_contains || string_view_contains(field1_value, list_page_images_userdata->field_contains))) { + list_page_images_userdata->urls->push_back(std::string(field1_value.data, field1_value.size)); } + return 0; }, &list_page_images_userdata); if(result == 0 && !chapter_image_urls.empty() && list_page_images_query->post_handler) -- cgit v1.2.3