From 611d22bf269672ba56f98e12eb6b2a40efdaa5b9 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sat, 3 Jul 2021 16:23:36 +0200 Subject: Remove dependency on tidy, fix ph, support all 4chan markup Go back to previous page when failing to fetch number of pages --- src/DownloadUtils.cpp | 3 +- src/NetUtils.cpp | 3 +- src/QuickMedia.cpp | 26 ++- src/plugins/Fourchan.cpp | 389 +++++++++++++++++++------------------------ src/plugins/MangaGeneric.cpp | 84 ++++++---- src/plugins/Manganelo.cpp | 83 +++++---- src/plugins/MediaGeneric.cpp | 47 +++--- src/plugins/NyaaSi.cpp | 73 ++++---- src/plugins/Saucenao.cpp | 27 +-- src/plugins/Soundcloud.cpp | 11 +- src/plugins/Youtube.cpp | 12 +- 11 files changed, 384 insertions(+), 374 deletions(-) (limited to 'src') diff --git a/src/DownloadUtils.cpp b/src/DownloadUtils.cpp index 7660cee..dff0ecb 100644 --- a/src/DownloadUtils.cpp +++ b/src/DownloadUtils.cpp @@ -19,8 +19,6 @@ namespace QuickMedia { int total_downloaded_size = 0; }; - static const bool debug_download = false; - static int accumulate_string(char *data, int size, void *userdata) { std::string *str = (std::string*)userdata; if(str->size() + size > 1024 * 1024 * 100) // 100mb sane limit, TODO: make configurable @@ -75,6 +73,7 @@ namespace QuickMedia { return 0; } + static const bool debug_download = false; static const char *useragent_str = "user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"; DownloadResult download_head_to_string(const std::string &url, std::string &result, bool use_browser_useragent, bool fail_on_error) { diff --git a/src/NetUtils.cpp b/src/NetUtils.cpp index d6b6cf2..cc19094 100644 --- a/src/NetUtils.cpp +++ b/src/NetUtils.cpp @@ -35,10 +35,11 @@ namespace QuickMedia { }; void html_unescape_sequences(std::string &str) { - const std::array unescape_sequences = { + const std::array unescape_sequences = { HtmlUnescapeSequence { """, "\"" }, HtmlUnescapeSequence { "'", "'" }, HtmlUnescapeSequence { "'", "'" }, + HtmlUnescapeSequence { " ", "\n" }, HtmlUnescapeSequence { "<", "<" }, HtmlUnescapeSequence { ">", ">" }, HtmlUnescapeSequence { "&", "&" } // This should be last, to not accidentally replace a new sequence caused by replacing this diff --git a/src/QuickMedia.cpp b/src/QuickMedia.cpp index 9788c0a..e21e591 100644 --- a/src/QuickMedia.cpp +++ b/src/QuickMedia.cpp @@ -1217,22 +1217,22 @@ namespace QuickMedia { } } else if(strcmp(plugin_name, "pornhub") == 0) { check_youtube_dl_installed(plugin_name); - auto search_page = std::make_unique(this, "https://www.pornhub.com/", sf::Vector2i(320/1.5f, 180/1.5f)); + auto search_page = std::make_unique(this, "https://www.pornhub.com/", sf::Vector2i(320/1.5f, 180/1.5f), false); add_pornhub_handlers(search_page.get()); tabs.push_back(Tab{create_body(false, true), std::move(search_page), create_search_bar("Search...", 500)}); } else if(strcmp(plugin_name, "spankbang") == 0) { check_youtube_dl_installed(plugin_name); - auto search_page = std::make_unique(this, "https://spankbang.com/", sf::Vector2i(500/2.5f, 281/2.5f)); + auto search_page = std::make_unique(this, "https://spankbang.com/", sf::Vector2i(500/2.5f, 281/2.5f), true); add_spankbang_handlers(search_page.get()); tabs.push_back(Tab{create_body(false, true), std::move(search_page), create_search_bar("Search...", 500)}); } else if(strcmp(plugin_name, "xvideos") == 0) { check_youtube_dl_installed(plugin_name); - auto search_page = std::make_unique(this, "https://www.xvideos.com/", sf::Vector2i(352/1.5f, 198/1.5f)); + auto search_page = std::make_unique(this, "https://www.xvideos.com/", sf::Vector2i(352/1.5f, 198/1.5f), false); add_xvideos_handlers(search_page.get()); tabs.push_back(Tab{create_body(false, true), std::move(search_page), create_search_bar("Search...", 500)}); } else if(strcmp(plugin_name, "xhamster") == 0) { check_youtube_dl_installed(plugin_name); - auto search_page = std::make_unique(this, "https://xhamster.com/", sf::Vector2i(240, 135)); + auto search_page = std::make_unique(this, "https://xhamster.com/", sf::Vector2i(240, 135), false); add_xhamster_handlers(search_page.get()); tabs.push_back(Tab{create_body(false, true), std::move(search_page), create_search_bar("Search...", 500)}); } else if(strcmp(plugin_name, "soundcloud") == 0) { @@ -1972,7 +1972,9 @@ namespace QuickMedia { BodyItem *selected_item = tabs[selected_tab].body->get_selected(); if(selected_item && tabs[selected_tab].page->is_trackable()) { TrackablePage *trackable_page = dynamic_cast(tabs[selected_tab].page.get()); - trackable_page->track(selected_item->get_title()); + run_task_with_loading_screen([trackable_page, selected_item](){ + return trackable_page->track(selected_item->get_title()) == TrackResult::OK; + }); } } else if(event.key.code == sf::Keyboard::C && event.key.control) { BodyItem *selected_item = tabs[selected_tab].body->get_selected(); @@ -3183,6 +3185,7 @@ namespace QuickMedia { image_download_future.cancel(); image_download_cancel = false; + num_manga_pages = 0; std::promise num_manga_pages_promise; num_manga_pages_future = num_manga_pages_promise.get_future(); @@ -3346,6 +3349,11 @@ namespace QuickMedia { sf::Event event; download_chapter_images_if_needed(images_page); + if(num_manga_pages == 0) { + current_page = pop_page_stack(); + return 0; + } + if(current_page != PageType::IMAGES || !window.isOpen()) return 0; @@ -3541,6 +3549,11 @@ namespace QuickMedia { } download_chapter_images_if_needed(images_page); + if(num_manga_pages == 0) { + current_page = pop_page_stack(); + return; + } + if(current_page != PageType::IMAGES_CONTINUOUS || !window.isOpen()) return; @@ -4281,6 +4294,9 @@ namespace QuickMedia { update_idle_state(); handle_window_close(); + if(current_page != PageType::CHAT_LOGIN) + break; + if(redraw) { redraw = false; get_body_dimensions(window_size, nullptr, body_pos, body_size); diff --git a/src/plugins/Fourchan.cpp b/src/plugins/Fourchan.cpp index 52024e1..4b2ca61 100644 --- a/src/plugins/Fourchan.cpp +++ b/src/plugins/Fourchan.cpp @@ -3,10 +3,9 @@ #include "../../include/Storage.hpp" #include "../../include/StringUtils.hpp" #include "../../include/NetUtils.hpp" +#include #include #include -#include -#include // API documentation: https://github.com/4chan/4chan-API @@ -37,133 +36,195 @@ namespace QuickMedia { struct CommentPiece { enum class Type { TEXT, - QUOTE, // > - QUOTELINK, // >>POSTNO, - LINE_CONTINUE + QUOTE, // >, Set for span + QUOTE_CONTINUE, // Set for span + QUOTELINK, // >>POSTNO, Set for a + DEADLINK, // Set for span + CROSSBOARD_LINK, // Set for a + CODEBLOCK // Set for pre }; - DataView text; // Set when type is TEXT, QUOTE or QUOTELINK + std::string text; int64_t quote_postnumber = 0; // Set when type is QUOTELINK Type type; }; - static TidyAttr get_attribute_by_name(TidyNode node, const char *name) { - for(TidyAttr attr = tidyAttrFirst(node); attr; attr = tidyAttrNext(attr)) { - const char *attr_name = tidyAttrName(attr); - if(attr_name && strcmp(name, attr_name) == 0) - return attr; - } - return nullptr; - } - - static const char* get_attribute_value(TidyNode node, const char *name) { - TidyAttr attr = get_attribute_by_name(node, name); - if(attr) - return tidyAttrValue(attr); - return nullptr; - } - - static void lstrip_newline(const char *str, size_t size, const char **output_str, size_t *output_size) { - size_t i = 0; - while(i < size && str[i] == '\n') { - ++i; - } - *output_str = str + i; - *output_size = size - i; - } + enum class NodeType { + A, + SPAN, + PRE + }; - static void rstrip_newline(const char *str, size_t size, size_t *output_size) { - ssize_t i = size - 1; - while(i >= 0 && str[i] == '\n') { - --i; + // Returns -1 if no match + static NodeType tag_name_to_node_type(HtmlStringView str) { + if(str.size == 1 && str.data[0] == 'a') { + return NodeType::A; + } else if(str.size == 4 && memcmp(str.data, "span", 4) == 0) { + return NodeType::SPAN; + } else if(str.size == 3 && memcmp(str.data, "pre", 3) == 0) { + return NodeType::PRE; + } else { + return (NodeType)-1; } - *output_size = i + 1; } - static void strip_newline(const char *str, size_t size, const char **output_str, size_t *output_size) { - lstrip_newline(str, size, output_str, output_size); - rstrip_newline(*output_str, *output_size, output_size); - } + struct HtmlNode { + NodeType node_type; + std::string klass; + std::string href; + int output_count = 0; + }; using CommentPieceCallback = std::function; - static int extract_comment_pieces(TidyDoc doc, TidyNode node, CommentPieceCallback &callback) { - for(TidyNode child = tidyGetChild(node); child; child = tidyGetNext(child)) { - const char *node_name = tidyNodeGetName(child); - if(node_name) { - if(strcmp(node_name, "br") == 0) { + struct HtmlParseUserdata { + CommentPieceCallback callback; + std::stack html_node; + }; + + static int html_parse_callback(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata) { + HtmlParseUserdata *parse_userdata = (HtmlParseUserdata*)userdata; + + switch(parse_type) { + case HTML_PARSE_TAG_START: { + if(html_parser->tag_name.size == 2 && memcmp(html_parser->tag_name.data, "br", 2) == 0) { CommentPiece comment_piece; comment_piece.type = CommentPiece::Type::TEXT; - // Warning: Cast from const char* to char* ... - comment_piece.text = { (char*)"\n", 1 }; - callback(comment_piece); - /*} else if(strcmp(node_name, "span") == 0) { - const char *span_class = get_attribute_value(child, "class"); - //fprintf(stderr, "span class: %s\n", span_class); - if(span_class && strcmp(span_class, "quote") == 0) { - CommentPiece comment_piece; - comment_piece.type = CommentPiece::Type::QUOTE; - // Warning: Cast from const char* to char* ... - comment_piece.text = { (char*)"\n", 1 }; - callback(comment_piece); - }*/ + comment_piece.text = "\n"; + parse_userdata->callback(comment_piece); } - } - if(tidyNodeGetType(child) == TidyNode_Text) { - TidyBuffer tidy_buffer; - tidyBufInit(&tidy_buffer); - if(tidyNodeGetText(doc, child, &tidy_buffer)) { - const char *inner_text = (const char*)tidy_buffer.bp; - size_t inner_text_size = tidy_buffer.size; - strip_newline(inner_text, inner_text_size, &inner_text, &inner_text_size); - - const char *node_name = tidyNodeGetName(node); - if(node_name && strcmp(node_name, "a") == 0) { - const char *a_class = get_attribute_value(node, "class"); - const char *a_href = get_attribute_value(node, "href"); - if(a_class && a_href && strcmp(a_class, "quotelink") == 0 && strncmp(a_href, "#p", 2) == 0) { - CommentPiece comment_piece; - comment_piece.type = CommentPiece::Type::QUOTELINK; - comment_piece.quote_postnumber = strtoll(a_href + 2, nullptr, 10); - // Warning: Cast from const char* to char* ... - comment_piece.text = { (char*)inner_text, inner_text_size }; - callback(comment_piece); - tidyBufFree(&tidy_buffer); - continue; - } + const NodeType node_type = tag_name_to_node_type(html_parser->tag_name); + if(node_type != (NodeType)-1) + parse_userdata->html_node.push({ node_type, "", "", 0 }); + break; + } + case HTML_PARSE_TAG_END: { + if(!parse_userdata->html_node.empty()) { + const NodeType node_type = tag_name_to_node_type(html_parser->tag_name); + if(node_type != (NodeType)-1) + parse_userdata->html_node.pop(); + } + break; + } + case HTML_PARSE_ATTRIBUTE: { + if(!parse_userdata->html_node.empty()) { + HtmlNode &html_node = parse_userdata->html_node.top(); + if(html_parser->attribute_key.size == 5 && memcmp(html_parser->attribute_key.data, "class", 5) == 0) { + html_node.klass.assign(html_parser->attribute_value.data, html_parser->attribute_value.size); + } else if(html_parser->attribute_key.size == 4 && memcmp(html_parser->attribute_key.data, "href", 4) == 0) { + html_node.href.assign(html_parser->attribute_value.data, html_parser->attribute_value.size); } + } + break; + } + case HTML_PARSE_TEXT: { + std::string text(html_parser->text.data, html_parser->text.size); + html_unescape_sequences(text); - CommentPiece comment_piece; + CommentPiece comment_piece; + comment_piece.type = CommentPiece::Type::TEXT; + comment_piece.text = std::move(text); + + if(parse_userdata->html_node.empty()) { comment_piece.type = CommentPiece::Type::TEXT; - // Warning: Cast from const char* to char* ... - comment_piece.text = { (char*)inner_text, inner_text_size }; - callback(comment_piece); + } else { + HtmlNode &html_node = parse_userdata->html_node.top(); + switch(html_node.node_type) { + case NodeType::A: { + if(html_node.klass == "quotelink") { + if(string_starts_with(html_node.href, "#p")) { + comment_piece.type = CommentPiece::Type::QUOTELINK; + comment_piece.quote_postnumber = strtoll(html_node.href.c_str() + 2, nullptr, 10); + } else if(string_starts_with(html_node.href, "/")) { + comment_piece.type = CommentPiece::Type::CROSSBOARD_LINK; + } else { + fprintf(stderr, "Unexpected href for quotelink: %s\n", html_node.href.c_str()); + } + } else { + fprintf(stderr, "Unexpected class for a: %s\n", html_node.klass.c_str()); + } + break; + } + case NodeType::SPAN: { + if(html_node.klass == "quote") { + comment_piece.type = html_node.output_count ? CommentPiece::Type::QUOTE : CommentPiece::Type::QUOTE_CONTINUE; + } else if(html_node.klass == "deadlink") { + comment_piece.type = CommentPiece::Type::DEADLINK; + } else { + fprintf(stderr, "Unexpected class for span: %s\n", html_node.klass.c_str()); + } + break; + } + case NodeType::PRE: { + if(html_node.klass == "prettyprint") { + comment_piece.type = CommentPiece::Type::CODEBLOCK; + } else { + fprintf(stderr, "Unexpected class for pre: %s\n", html_node.klass.c_str()); + } + break; + } + } + html_node.output_count++; } - tidyBufFree(&tidy_buffer); - } else { - int res = extract_comment_pieces(doc, child, callback); - if(res != 0) - return res; + + parse_userdata->callback(comment_piece); + break; + } + case HTML_PARSE_JAVASCRIPT_CODE: { + break; } } + return 0; } static void extract_comment_pieces(const char *html_source, size_t size, CommentPieceCallback callback) { - TidyDoc doc = tidyCreate(); - tidyOptSetBool(doc, TidyShowWarnings, no); - tidyOptSetInt(doc, TidyUseCustomTags, 1); - tidyOptSetInt(doc, TidyWrapLen, 0); - if(tidyParseString(doc, html_source) < 0) { - CommentPiece comment_piece; - comment_piece.type = CommentPiece::Type::TEXT; - // Warning: Cast from const char* to char* ... - comment_piece.text = { (char*)html_source, size }; - callback(comment_piece); - } else { - extract_comment_pieces(doc, tidyGetRoot(doc), callback); - } - tidyRelease(doc); + HtmlParseUserdata parse_userdata; + parse_userdata.callback = std::move(callback); + html_parser_parse(html_source, size, html_parse_callback, &parse_userdata); + } + + static std::string html_to_text(const char *html_source, size_t size, std::unordered_map &comment_by_postno, BodyItems &result_items, size_t body_item_index) { + std::string comment_text; + extract_comment_pieces(html_source, size, + [&comment_text, &comment_by_postno, &result_items, body_item_index](const CommentPiece &cp) { + switch(cp.type) { + case CommentPiece::Type::TEXT: + comment_text += std::move(cp.text); + break; + case CommentPiece::Type::QUOTE: + comment_text += std::move(cp.text); + break; + case CommentPiece::Type::QUOTE_CONTINUE: + comment_text += std::move(cp.text); + break; + case CommentPiece::Type::QUOTELINK: { + comment_text += std::move(cp.text); + auto it = comment_by_postno.find(cp.quote_postnumber); + if(it == comment_by_postno.end()) { + // TODO: Link this quote to a 4chan archive that still has the quoted comment (if available) + comment_text += " (Dead)"; + } else { + result_items[body_item_index]->replies_to.push_back(it->second); + result_items[it->second]->replies.push_back(body_item_index); + } + break; + } + case CommentPiece::Type::DEADLINK: + // TODO: Link this quote to a 4chan archive that still has the quoted comment (if available) + comment_text += std::move(cp.text) + " (Dead)"; + break; + case CommentPiece::Type::CROSSBOARD_LINK: + // TODO: Link this to another thread and allow navigating to it + comment_text += std::move(cp.text) + " (Cross-thread)"; + break; + case CommentPiece::Type::CODEBLOCK: + // TODO: Use a different colored background and use a monospace font + comment_text += std::move(cp.text); + break; + } + }); + return comment_text; } PluginResult FourchanBoardsPage::submit(const std::string &title, const std::string &url, std::vector &result_tabs) { @@ -264,68 +325,14 @@ namespace QuickMedia { author_str += " #" + std::to_string(post_num.asInt64()); - std::string comment_text; - extract_comment_pieces(sub_begin, sub_end - sub_begin, - [&comment_text](const CommentPiece &cp) { - switch(cp.type) { - case CommentPiece::Type::TEXT: - comment_text.append(cp.text.data, cp.text.size); - break; - case CommentPiece::Type::QUOTE: - //comment_text += '>'; - //comment_text.append(cp.text.data, cp.text.size); - //comment_text += '\n'; - break; - case CommentPiece::Type::QUOTELINK: { - comment_text.append(cp.text.data, cp.text.size); - break; - } - case CommentPiece::Type::LINE_CONTINUE: { - if(!comment_text.empty() && comment_text.back() == '\n') { - comment_text.pop_back(); - } - break; - } - } - } - ); + std::string comment_text = html_to_text(sub_begin, sub_end - sub_begin, comment_by_postno, result_items, body_item_index); if(!comment_text.empty()) comment_text += '\n'; - extract_comment_pieces(comment_begin, comment_end - comment_begin, - [&comment_text, &comment_by_postno, &result_items, body_item_index](const CommentPiece &cp) { - switch(cp.type) { - case CommentPiece::Type::TEXT: - comment_text.append(cp.text.data, cp.text.size); - break; - case CommentPiece::Type::QUOTE: - //comment_text += '>'; - //comment_text.append(cp.text.data, cp.text.size); - //comment_text += '\n'; - break; - case CommentPiece::Type::QUOTELINK: { - comment_text.append(cp.text.data, cp.text.size); - auto it = comment_by_postno.find(cp.quote_postnumber); - if(it == comment_by_postno.end()) { - // TODO: Link this quote to a 4chan archive that still has the quoted comment (if available) - comment_text += "(dead)"; - } else { - result_items[body_item_index]->replies_to.push_back(it->second); - result_items[it->second]->replies.push_back(body_item_index); - } - break; - } - case CommentPiece::Type::LINE_CONTINUE: { - if(!comment_text.empty() && comment_text.back() == '\n') { - comment_text.pop_back(); - } - break; - } - } - } - ); + + comment_text += html_to_text(comment_begin, comment_end - comment_begin, comment_by_postno, result_items, body_item_index); if(!comment_text.empty() && comment_text.back() == '\n') - comment_text.back() = ' '; - html_unescape_sequences(comment_text); + comment_text.pop_back(); + BodyItem *body_item = result_items[body_item_index].get(); body_item->set_title(std::move(comment_text)); body_item->set_author(std::move(author_str)); @@ -369,6 +376,7 @@ namespace QuickMedia { if(!json_root.isArray()) return PluginResult::ERR; + std::unordered_map comment_by_postno; for(const Json::Value &page_data : json_root) { if(!page_data.isObject()) continue; @@ -395,61 +403,11 @@ namespace QuickMedia { if(!thread_num.isNumeric()) continue; - std::string title_text; - extract_comment_pieces(sub_begin, sub_end - sub_begin, - [&title_text](const CommentPiece &cp) { - switch(cp.type) { - case CommentPiece::Type::TEXT: - title_text.append(cp.text.data, cp.text.size); - break; - case CommentPiece::Type::QUOTE: - //title_text += '>'; - //title_text.append(cp.text.data, cp.text.size); - //comment_text += '\n'; - break; - case CommentPiece::Type::QUOTELINK: { - title_text.append(cp.text.data, cp.text.size); - break; - } - case CommentPiece::Type::LINE_CONTINUE: { - if(!title_text.empty() && title_text.back() == '\n') { - title_text.pop_back(); - } - break; - } - } - } - ); + std::string title_text = html_to_text(sub_begin, sub_end - sub_begin, comment_by_postno, result_items, 0); if(!title_text.empty() && title_text.back() == '\n') title_text.back() = ' '; - html_unescape_sequences(title_text); - - std::string comment_text; - extract_comment_pieces(comment_begin, comment_end - comment_begin, - [&comment_text](const CommentPiece &cp) { - switch(cp.type) { - case CommentPiece::Type::TEXT: - comment_text.append(cp.text.data, cp.text.size); - break; - case CommentPiece::Type::QUOTE: - //comment_text += '>'; - //comment_text.append(cp.text.data, cp.text.size); - //comment_text += '\n'; - break; - case CommentPiece::Type::QUOTELINK: { - comment_text.append(cp.text.data, cp.text.size); - break; - } - case CommentPiece::Type::LINE_CONTINUE: { - if(!comment_text.empty() && comment_text.back() == '\n') { - comment_text.pop_back(); - } - break; - } - } - } - ); - html_unescape_sequences(comment_text); + + std::string comment_text = html_to_text(comment_begin, comment_end - comment_begin, comment_by_postno, result_items, 0); // TODO: Do the same when wrapping is implemented // TODO: Remove this int num_lines = 0; @@ -462,6 +420,7 @@ namespace QuickMedia { } } } + auto body_item = BodyItem::create(std::move(comment_text)); body_item->set_author(std::move(title_text)); body_item->url = std::to_string(thread_num.asInt64()); diff --git a/src/plugins/MangaGeneric.cpp b/src/plugins/MangaGeneric.cpp index 2d0df6a..a2608ab 100644 --- a/src/plugins/MangaGeneric.cpp +++ b/src/plugins/MangaGeneric.cpp @@ -43,7 +43,11 @@ namespace QuickMedia { const char *field_contains = nullptr; }; - static const char* html_attr_or_inner_text(QuickMediaHtmlNode *node, const char *field_name) { + static bool string_view_contains(const QuickMediaStringView str, const char *sub) { + return memmem(str.data, str.size, sub, strlen(sub)); + } + + static QuickMediaStringView html_attr_or_inner_text(QuickMediaMatchNode *node, const char *field_name) { if(strcmp(field_name, "text") == 0) return quickmedia_html_node_get_text(node); else @@ -66,59 +70,62 @@ namespace QuickMedia { static int html_append_search(QuickMediaHtmlSearch *html_search, const char *html_query, HtmlSearchUserdata *search_userdata) { return quickmedia_html_find_nodes_xpath(html_search, html_query, - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { HtmlSearchUserdata *search_userdata = (HtmlSearchUserdata*)userdata; - const char *field1_value = html_attr_or_inner_text(node, search_userdata->field1); + QuickMediaStringView field1_value = html_attr_or_inner_text(node, search_userdata->field1); if(search_userdata->field2) { - const char *field2_value = html_attr_or_inner_text(node, search_userdata->field2); - if(field1_value && field2_value && (!search_userdata->field2_contains || strstr(field2_value, search_userdata->field2_contains))) { - std::string field1_fixed = strip(field1_value); + QuickMediaStringView field2_value = html_attr_or_inner_text(node, search_userdata->field2); + if(field1_value.data && field2_value.data && (!search_userdata->field2_contains || string_view_contains(field2_value, search_userdata->field2_contains))) { + std::string field1_fixed(field1_value.data, field1_value.size); html_unescape_sequences(field1_fixed); auto item = BodyItem::create(std::move(field1_fixed)); - item->url = strip(field2_value); + item->url = std::string(field2_value.data, field2_value.size); search_userdata->body_items->push_back(std::move(item)); } } else { - if(field1_value) { - std::string field1_fixed = strip(field1_value); + if(field1_value.data) { + std::string field1_fixed(field1_value.data, field1_value.size); html_unescape_sequences(field1_fixed); auto item = BodyItem::create(std::move(field1_fixed)); search_userdata->body_items->push_back(std::move(item)); } } + return 0; }, search_userdata); } static int html_body_item_merge(QuickMediaHtmlSearch *html_search, const char *html_query, HtmlMergeUserdata *merge_userdata) { return quickmedia_html_find_nodes_xpath(html_search, html_query, - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { HtmlMergeUserdata *merge_userdata = (HtmlMergeUserdata*)userdata; BodyItemContext &body_item_image_context = merge_userdata->body_item_image_context; - const char *field_value = html_attr_or_inner_text(node, merge_userdata->field_name); + QuickMediaStringView field_value = html_attr_or_inner_text(node, merge_userdata->field_name); if(body_item_image_context.index < body_item_image_context.body_items->size() - && field_value && (!merge_userdata->field_contains || strstr(field_value, merge_userdata->field_contains))) + && field_value.data && (!merge_userdata->field_contains || string_view_contains(field_value, merge_userdata->field_contains))) { + std::string field_stripped(field_value.data, field_value.size); if(merge_userdata->type == MergeType::THUMBNAIL) { - (*body_item_image_context.body_items)[body_item_image_context.index]->thumbnail_url = strip(field_value); + (*body_item_image_context.body_items)[body_item_image_context.index]->thumbnail_url = std::move(field_stripped); } else if(merge_userdata->type == MergeType::DESCRIPTION) { - std::string field_stripped = strip(field_value); const char *prefix = merge_userdata->desc_prefix ? merge_userdata->desc_prefix : ""; - (*body_item_image_context.body_items)[body_item_image_context.index]->set_description(prefix + field_stripped); + (*body_item_image_context.body_items)[body_item_image_context.index]->set_description(prefix + std::move(field_stripped)); (*body_item_image_context.body_items)[body_item_image_context.index]->set_description_color(get_current_theme().faded_text_color); } body_item_image_context.index++; } + return 0; }, merge_userdata); } static int html_get_page_url(QuickMediaHtmlSearch *html_search, const char *html_query, HtmlPageImageUserdata *page_image_userdata) { return quickmedia_html_find_nodes_xpath(html_search, html_query, - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { HtmlPageImageUserdata *page_image_userdata = (HtmlPageImageUserdata*)userdata; - const char *field1_value = html_attr_or_inner_text(node, page_image_userdata->field_name); - if(page_image_userdata->url->empty() && field1_value && (!page_image_userdata->field_contains || strstr(field1_value, page_image_userdata->field_contains))) { - *page_image_userdata->url = strip(field1_value); + QuickMediaStringView field1_value = html_attr_or_inner_text(node, page_image_userdata->field_name); + if(page_image_userdata->url->empty() && field1_value.data && (!page_image_userdata->field_contains || string_view_contains(field1_value, page_image_userdata->field_contains))) { + *page_image_userdata->url = std::string(field1_value.data, field1_value.size); } + return 0; }, page_image_userdata); } @@ -170,7 +177,7 @@ namespace QuickMedia { return PluginResult::OK; QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size()); if(result != 0) goto cleanup; @@ -306,7 +313,7 @@ namespace QuickMedia { return PluginResult::NET_ERR; QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size()); if(result != 0) goto cleanup; @@ -338,12 +345,13 @@ namespace QuickMedia { authors_userdata.authors_query = &authors_query; quickmedia_html_find_nodes_xpath(&html_search, authors_query.html_query, - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { HtmlAuthorsUserdata *authors_userdata = (HtmlAuthorsUserdata*)userdata; - const char *title_value = html_attr_or_inner_text(node, authors_userdata->authors_query->title_field); - const char *url_value = html_attr_or_inner_text(node, authors_userdata->authors_query->url_field); - if(title_value && url_value && (!authors_userdata->authors_query->url_contains || strstr(url_value, authors_userdata->authors_query->url_contains))) - (*authors_userdata->creators)[strip(title_value)] = strip(url_value); + QuickMediaStringView title_value = html_attr_or_inner_text(node, authors_userdata->authors_query->title_field); + QuickMediaStringView url_value = html_attr_or_inner_text(node, authors_userdata->authors_query->url_field); + if(title_value.data && url_value.data && (!authors_userdata->authors_query->url_contains || string_view_contains(url_value, authors_userdata->authors_query->url_contains))) + (*authors_userdata->creators)[std::string(title_value.data, title_value.size)] = std::string(url_value.data, url_value.size); + return 0; }, &authors_userdata); } } @@ -471,19 +479,20 @@ namespace QuickMedia { return ImageResult::NET_ERR; QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size()); if(result != 0) goto cleanup; result = quickmedia_html_find_nodes_xpath(&html_search, list_page_pagination_query->pages_html_query, - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { HtmlPageCountUserdata *page_count_userdata = (HtmlPageCountUserdata*)userdata; - const char *field1_value = html_attr_or_inner_text(node, page_count_userdata->field_name); - if(field1_value) { - std::string field_value_stripped = strip(field1_value); + QuickMediaStringView field1_value = html_attr_or_inner_text(node, page_count_userdata->field_name); + if(field1_value.data) { + std::string field_value_stripped(field1_value.data, field1_value.size); if(is_number(field_value_stripped.c_str())) page_count_userdata->num_pages = strtol(field_value_stripped.c_str(), nullptr, 10); } + return 0; }, &page_count_userdata); if(result != 0 || page_count_userdata.num_pages == 0) { @@ -583,7 +592,7 @@ namespace QuickMedia { return ImageResult::ERR; QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size()); if(result != 0) goto cleanup; @@ -642,17 +651,18 @@ namespace QuickMedia { list_page_images_userdata.field_contains = list_page_images_query->field_contains; QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size()); if(result != 0) goto cleanup; result = quickmedia_html_find_nodes_xpath(&html_search, list_page_images_query->html_query, - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { HtmlListPageImagesUserdata *list_page_images_userdata = (HtmlListPageImagesUserdata*)userdata; - const char *field1_value = html_attr_or_inner_text(node, list_page_images_userdata->field_name); - if(field1_value && (!list_page_images_userdata->field_contains || strstr(field1_value, list_page_images_userdata->field_contains))) { - list_page_images_userdata->urls->push_back(strip(field1_value)); + QuickMediaStringView field1_value = html_attr_or_inner_text(node, list_page_images_userdata->field_name); + if(field1_value.data && (!list_page_images_userdata->field_contains || string_view_contains(field1_value, list_page_images_userdata->field_contains))) { + list_page_images_userdata->urls->push_back(std::string(field1_value.data, field1_value.size)); } + return 0; }, &list_page_images_userdata); if(result == 0 && !chapter_image_urls.empty() && list_page_images_query->post_handler) diff --git a/src/plugins/Manganelo.cpp b/src/plugins/Manganelo.cpp index 094d096..e0517dd 100644 --- a/src/plugins/Manganelo.cpp +++ b/src/plugins/Manganelo.cpp @@ -1,11 +1,14 @@ #include "../../plugins/Manganelo.hpp" #include "../../include/Notification.hpp" -#include "../../include/StringUtils.hpp" #include "../../include/NetUtils.hpp" #include "../../include/Theme.hpp" #include namespace QuickMedia { + static bool string_view_contains(const QuickMediaStringView str, const char *sub) { + return memmem(str.data, str.size, sub, strlen(sub)); + } + // Returns true if modified static bool remove_html_span(std::string &str) { size_t open_tag_start = str.find("url = strip(href); + QuickMediaStringView href = quickmedia_html_node_get_attribute_value(node, "href"); + QuickMediaStringView text = quickmedia_html_node_get_text(node); + if(href.data && text.data) { + auto item = BodyItem::create(std::string(text.data, text.size)); + item->url.assign(href.data, href.size); item_data->push_back(std::move(item)); } + return 0; }, &chapters_items); BodyItemContext body_item_context; @@ -56,29 +60,31 @@ namespace QuickMedia { body_item_context.index = 0; quickmedia_html_find_nodes_xpath(&html_search, "//ul[class='row-content-chapter']//span", - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { auto *item_data = (BodyItemContext*)userdata; - const char *class_attr = quickmedia_html_node_get_attribute_value(node, "class"); - const char *text = quickmedia_html_node_get_text(node); - if(text && class_attr && strstr(class_attr, "chapter-time") && item_data->index < item_data->body_items->size()) { - std::string uploaded_date = strip(text); - (*item_data->body_items)[item_data->index]->set_description("Uploaded: " + uploaded_date); + QuickMediaStringView class_attr = quickmedia_html_node_get_attribute_value(node, "class"); + QuickMediaStringView text = quickmedia_html_node_get_text(node); + if(text.data && class_attr.data && string_view_contains(class_attr, "chapter-time") && item_data->index < item_data->body_items->size()) { + std::string uploaded_date(text.data, text.size); + (*item_data->body_items)[item_data->index]->set_description("Uploaded: " + std::move(uploaded_date)); (*item_data->body_items)[item_data->index]->set_description_color(get_current_theme().faded_text_color); item_data->index++; } + return 0; }, &body_item_context); quickmedia_html_find_nodes_xpath(&html_search, "//a[class='a-h']", - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { std::vector *creators = (std::vector*)userdata; - const char *href = quickmedia_html_node_get_attribute_value(node, "href"); - const char *text = quickmedia_html_node_get_text(node); - if(href && text && strstr(href, "/author/story/")) { + QuickMediaStringView href = quickmedia_html_node_get_attribute_value(node, "href"); + QuickMediaStringView text = quickmedia_html_node_get_text(node); + if(href.data && text.data && string_view_contains(href, "/author/story/")) { Creator creator; - creator.name = strip(text); - creator.url = strip(href); + creator.name.assign(text.data, text.size); + creator.url.assign(href.data, href.size); creators->push_back(std::move(creator)); } + return 0; }, &creators); cleanup: @@ -124,7 +130,7 @@ namespace QuickMedia { if(name.isString() && name.asCString()[0] != '\0' && nameunsigned.isString() && nameunsigned.asCString()[0] != '\0') { std::string name_str = name.asString(); while(remove_html_span(name_str)) {} - auto item = BodyItem::create(strip(name_str)); + auto item = BodyItem::create(name_str); item->url = "https://manganelo.com/manga/" + url_param_encode(nameunsigned.asString()); if(lastchapter.isString() && lastchapter.asCString()[0] != '\0') { item->set_description("Latest chapter: " + lastchapter.asString()); @@ -192,20 +198,21 @@ namespace QuickMedia { return PluginResult::NET_ERR; QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size()); if(result != 0) goto cleanup; result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='search-story-item']//a[class='item-img']", - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { auto *item_data = (BodyItems*)userdata; - const char *href = quickmedia_html_node_get_attribute_value(node, "href"); - const char *title = quickmedia_html_node_get_attribute_value(node, "title"); - if(href && title && strstr(href, "/manga/")) { - auto body_item = BodyItem::create(strip(title)); - body_item->url = strip(href); + QuickMediaStringView href = quickmedia_html_node_get_attribute_value(node, "href"); + QuickMediaStringView title = quickmedia_html_node_get_attribute_value(node, "title"); + if(href.data && title.data && string_view_contains(href, "/manga/")) { + auto body_item = BodyItem::create(std::string(title.data, title.size)); + body_item->url.assign(href.data, href.size); item_data->push_back(std::move(body_item)); } + return 0; }, &result_items); if(result != 0) @@ -216,13 +223,14 @@ namespace QuickMedia { body_item_image_context.index = 0; result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='search-story-item']//a[class='item-img']//img", - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { auto *item_data = (BodyItemContext*)userdata; - const char *src = quickmedia_html_node_get_attribute_value(node, "src"); - if(src && item_data->index < item_data->body_items->size()) { - (*item_data->body_items)[item_data->index]->thumbnail_url = src; + QuickMediaStringView src = quickmedia_html_node_get_attribute_value(node, "src"); + if(src.data && item_data->index < item_data->body_items->size()) { + (*item_data->body_items)[item_data->index]->thumbnail_url.assign(src.data, src.size); item_data->index++; } + return 0; }, &body_item_image_context); cleanup: @@ -261,18 +269,19 @@ namespace QuickMedia { return ImageResult::NET_ERR; QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size()); if(result != 0) goto cleanup; result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='container-chapter-reader']/img", - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { auto *urls = (std::vector*)userdata; - const char *src = quickmedia_html_node_get_attribute_value(node, "src"); - if(src) { - std::string image_url = strip(src); + QuickMediaStringView src = quickmedia_html_node_get_attribute_value(node, "src"); + if(src.data) { + std::string image_url(src.data, src.size); urls->push_back(std::move(image_url)); } + return 0; }, &chapter_image_urls); cleanup: diff --git a/src/plugins/MediaGeneric.cpp b/src/plugins/MediaGeneric.cpp index 1f2389a..c829a33 100644 --- a/src/plugins/MediaGeneric.cpp +++ b/src/plugins/MediaGeneric.cpp @@ -3,15 +3,20 @@ #include namespace QuickMedia { - using HtmlPathCallback = std::function; + static bool string_view_contains(const QuickMediaStringView str, const char *sub) { + return memmem(str.data, str.size, sub, strlen(sub)); + } + + using HtmlPathCallback = std::function; static int quickmedia_html_find_nodes_xpath(QuickMediaHtmlSearch *self, const char *xpath, HtmlPathCallback callback) { - return quickmedia_html_find_nodes_xpath(self, xpath, [](QuickMediaHtmlNode *node, void *userdata) { + return quickmedia_html_find_nodes_xpath(self, xpath, [](QuickMediaMatchNode *node, void *userdata) { HtmlPathCallback *callback = (HtmlPathCallback*)userdata; (*callback)(node); + return 0; }, &callback); } - static const char* html_attr_or_inner_text(QuickMediaHtmlNode *node, const char *field_name) { + static QuickMediaStringView html_attr_or_inner_text(QuickMediaMatchNode *node, const char *field_name) { if(strcmp(field_name, "text") == 0) return quickmedia_html_node_get_text(node); else @@ -32,13 +37,13 @@ namespace QuickMedia { } } - static PluginResult fetch_page_results(const std::string &url, const std::string &website_url, const std::vector &text_queries, const std::vector &thumbnail_queries, MediaRelatedCustomHandler *custom_handler, BodyItems &result_items) { + static PluginResult fetch_page_results(const std::string &url, const std::string &website_url, const std::vector &text_queries, const std::vector &thumbnail_queries, MediaRelatedCustomHandler *custom_handler, BodyItems &result_items, bool cloudflare_bypass) { std::vector args; if(!website_url.empty()) args.push_back({ "-H", "referer: " + website_url }); std::string website_data; - if(download_to_string(url, website_data, args, true, true, true) != DownloadResult::OK) + if(download_to_string(url, website_data, args, true, true, cloudflare_bypass) != DownloadResult::OK) return PluginResult::NET_ERR; if(website_data.empty()) @@ -47,7 +52,7 @@ namespace QuickMedia { if(custom_handler && *custom_handler) { std::vector media_related_items = (*custom_handler)(website_data); for(MediaRelatedItem &media_related_item : media_related_items) { - auto body_item = BodyItem::create(strip(media_related_item.title)); + auto body_item = BodyItem::create(media_related_item.title); body_item->url = std::move(media_related_item.url); body_item->thumbnail_url = std::move(media_related_item.thumbnail_url); result_items.push_back(std::move(body_item)); @@ -57,7 +62,7 @@ namespace QuickMedia { } QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size()); if(result != 0) goto cleanup; @@ -68,14 +73,14 @@ namespace QuickMedia { goto cleanup; } - result = quickmedia_html_find_nodes_xpath(&html_search, text_query.html_query, [&text_query, &result_items](QuickMediaHtmlNode *node) { - const char *title_value = html_attr_or_inner_text(node, text_query.title_field); - const char *url_value = html_attr_or_inner_text(node, text_query.url_field); - if(title_value && url_value && (!text_query.url_contains || strstr(url_value, text_query.url_contains))) { - std::string field1_fixed = strip(title_value); + result = quickmedia_html_find_nodes_xpath(&html_search, text_query.html_query, [&text_query, &result_items](QuickMediaMatchNode *node) { + QuickMediaStringView title_value = html_attr_or_inner_text(node, text_query.title_field); + QuickMediaStringView url_value = html_attr_or_inner_text(node, text_query.url_field); + if(title_value.data && url_value.data && (!text_query.url_contains || string_view_contains(url_value, text_query.url_contains))) { + std::string field1_fixed(title_value.data, title_value.size); html_unescape_sequences(field1_fixed); auto item = BodyItem::create(std::move(field1_fixed)); - item->url = strip(url_value); + item->url.assign(url_value.data, url_value.size); result_items.push_back(std::move(item)); } }); @@ -87,10 +92,10 @@ namespace QuickMedia { assert(thumbnail_query.html_query && thumbnail_query.field_name); if(thumbnail_query.html_query && thumbnail_query.field_name) { size_t index = 0; - result = quickmedia_html_find_nodes_xpath(&html_search, thumbnail_query.html_query, [&thumbnail_query, &result_items, &index](QuickMediaHtmlNode *node) { - const char *field_value = html_attr_or_inner_text(node, thumbnail_query.field_name); - if(index < result_items.size() && field_value && (!thumbnail_query.field_contains || strstr(field_value, thumbnail_query.field_contains))) { - result_items[index]->thumbnail_url = strip(field_value); + result = quickmedia_html_find_nodes_xpath(&html_search, thumbnail_query.html_query, [&thumbnail_query, &result_items, &index](QuickMediaMatchNode *node) { + QuickMediaStringView field_value = html_attr_or_inner_text(node, thumbnail_query.field_name); + if(index < result_items.size() && field_value.data && (!thumbnail_query.field_contains || string_view_contains(field_value, thumbnail_query.field_contains))) { + result_items[index]->thumbnail_url.assign(field_value.data, field_value.size); ++index; } }); @@ -111,8 +116,8 @@ namespace QuickMedia { } } - MediaGenericSearchPage::MediaGenericSearchPage(Program *program, const char *website_url, sf::Vector2i thumbnail_max_size) : - Page(program), website_url(website_url ? website_url : ""), thumbnail_max_size(thumbnail_max_size) + MediaGenericSearchPage::MediaGenericSearchPage(Program *program, const char *website_url, sf::Vector2i thumbnail_max_size, bool cloudflare_bypass) : + Page(program), website_url(website_url ? website_url : ""), thumbnail_max_size(thumbnail_max_size), cloudflare_bypass(cloudflare_bypass) { if(!this->website_url.empty()) { if(this->website_url.back() != '/') @@ -128,7 +133,7 @@ namespace QuickMedia { std::string url = search_query.search_template; string_replace_all(url, "%s", url_param_encode(str)); string_replace_all(url, "%p", std::to_string(search_query.page_start + page)); - return fetch_page_results(url, website_url, text_queries, thumbnail_queries, nullptr, result_items); + return fetch_page_results(url, website_url, text_queries, thumbnail_queries, nullptr, result_items, cloudflare_bypass); } PluginResult MediaGenericSearchPage::submit(const std::string&, const std::string &url, std::vector &result_tabs) { @@ -137,7 +142,7 @@ namespace QuickMedia { } PluginResult MediaGenericSearchPage::get_related_media(const std::string &url, BodyItems &result_items) { - return fetch_page_results(url, website_url, related_media_text_queries, related_media_thumbnail_queries, &related_custom_handler, result_items); + return fetch_page_results(url, website_url, related_media_text_queries, related_media_thumbnail_queries, &related_custom_handler, result_items, cloudflare_bypass); } MediaGenericSearchPage& MediaGenericSearchPage::search_handler(const char *search_template, int page_start) { diff --git a/src/plugins/NyaaSi.cpp b/src/plugins/NyaaSi.cpp index 5d9e41b..d4667af 100644 --- a/src/plugins/NyaaSi.cpp +++ b/src/plugins/NyaaSi.cpp @@ -291,17 +291,18 @@ namespace QuickMedia { return PluginResult::NET_ERR; QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size()); if(result != 0) goto cleanup; result = quickmedia_html_find_nodes_xpath(&html_search, "//h3[class='panel-title']", - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { std::string *title = (std::string*)userdata; - const char *text = quickmedia_html_node_get_text(node); - if(title->empty() && text) { - *title = text; + QuickMediaStringView text = quickmedia_html_node_get_text(node); + if(title->empty() && text.data) { + title->assign(text.data, text.size); } + return 0; }, &title); if(result != 0) @@ -314,16 +315,17 @@ namespace QuickMedia { } result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='panel-body']//div[class='row']//a", - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { ResultItemExtra *item_data = (ResultItemExtra*)userdata; - const char *href = quickmedia_html_node_get_attribute_value(node, "href"); - const char *text = quickmedia_html_node_get_text(node); - if(item_data->result_items->empty() && href && text && strncmp(href, "/user/", 6) == 0) { + QuickMediaStringView href = quickmedia_html_node_get_attribute_value(node, "href"); + QuickMediaStringView text = quickmedia_html_node_get_text(node); + if(item_data->result_items->empty() && href.data && text.data && href.size >= 6 && memcmp(href.data, "/user/", 6) == 0) { auto body_item = BodyItem::create(""); - body_item->set_description("Submitter: " + strip(text)); - body_item->url = "https://" + *item_data->domain + "/" + std::string(href); + body_item->set_description("Submitter: " + std::string(text.data, text.size)); + body_item->url = "https://" + *item_data->domain + "/" + std::string(href.data, href.size); item_data->result_items->push_back(std::move(body_item)); } + return 0; }, &result_item_extra); if(result != 0) @@ -335,17 +337,18 @@ namespace QuickMedia { result_items.push_back(std::move(body_item)); } - result_items.front()->set_title(strip(title)); + result_items.front()->set_title(title); result = quickmedia_html_find_nodes_xpath(&html_search, "//div[id='torrent-description']", - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { std::string *description = (std::string*)userdata; - const char *text = quickmedia_html_node_get_text(node); - if(description->empty() && text) { - std::string desc = strip(text); + QuickMediaStringView text = quickmedia_html_node_get_text(node); + if(description->empty() && text.data) { + std::string desc(text.data, text.size); html_unescape_sequences(desc); *description = std::move(desc); } + return 0; }, &description); if(result != 0) @@ -355,12 +358,13 @@ namespace QuickMedia { result_items.front()->set_description(result_items.front()->get_description() + "\nDescription:\n" + description); result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='container']//a", - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { std::string *magnet_url = (std::string*)userdata; - const char *href = quickmedia_html_node_get_attribute_value(node, "href"); - if(magnet_url->empty() && href && strncmp(href, "magnet:?", 8) == 0) { - *magnet_url = href; + QuickMediaStringView href = quickmedia_html_node_get_attribute_value(node, "href"); + if(magnet_url->empty() && href.data && href.size >= 8 && memcmp(href.data, "magnet:?", 8) == 0) { + magnet_url->assign(href.data, href.size); } + return 0; }, &magnet_url); if(result != 0) @@ -377,15 +381,16 @@ namespace QuickMedia { comments_start_index = result_items.size(); result = quickmedia_html_find_nodes_xpath(&html_search, "//div[id='comments']//a", - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { auto *item_data = (BodyItems*)userdata; - const char *href = quickmedia_html_node_get_attribute_value(node, "href"); - const char *text = quickmedia_html_node_get_text(node); - if(href && text && strncmp(href, "/user/", 6) == 0) { - auto body_item = BodyItem::create(strip(text)); + QuickMediaStringView href = quickmedia_html_node_get_attribute_value(node, "href"); + QuickMediaStringView text = quickmedia_html_node_get_text(node); + if(href.data && text.data && href.size >= 6 && memcmp(href.data, "/user/", 6) == 0) { + auto body_item = BodyItem::create(std::string(text.data, text.size)); //body_item->url = "https://nyaa.si/" + std::string(href); item_data->push_back(std::move(body_item)); } + return 0; }, &result_items); if(result != 0 || result_items.size() == comments_start_index) @@ -396,14 +401,15 @@ namespace QuickMedia { body_item_image_context.index = comments_start_index; result = quickmedia_html_find_nodes_xpath(&html_search, "//div[id='comments']//img[class='avatar']", - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { auto *item_data = (BodyItemContext*)userdata; - const char *src = quickmedia_html_node_get_attribute_value(node, "src"); - if(src && item_data->index < item_data->body_items->size()) { - (*item_data->body_items)[item_data->index]->thumbnail_url = src; + QuickMediaStringView src = quickmedia_html_node_get_attribute_value(node, "src"); + if(src.data && item_data->index < item_data->body_items->size()) { + (*item_data->body_items)[item_data->index]->thumbnail_url.assign(src.data, src.size); (*item_data->body_items)[item_data->index]->thumbnail_size = sf::Vector2i(120, 120); item_data->index++; } + return 0; }, &body_item_image_context); if(result != 0) @@ -412,15 +418,16 @@ namespace QuickMedia { body_item_image_context.index = comments_start_index; result = quickmedia_html_find_nodes_xpath(&html_search, "//div[id='comments']//div[class='comment-content']", - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { auto *item_data = (BodyItemContext*)userdata; - const char *text = quickmedia_html_node_get_text(node); - if(text && item_data->index < item_data->body_items->size()) { - std::string desc = strip(text); + QuickMediaStringView text = quickmedia_html_node_get_text(node); + if(text.data && item_data->index < item_data->body_items->size()) { + std::string desc(text.data, text.size); html_unescape_sequences(desc); (*item_data->body_items)[item_data->index]->set_description(std::move(desc)); item_data->index++; } + return 0; }, &body_item_image_context); cleanup: diff --git a/src/plugins/Saucenao.cpp b/src/plugins/Saucenao.cpp index 1278bed..e8d8357 100644 --- a/src/plugins/Saucenao.cpp +++ b/src/plugins/Saucenao.cpp @@ -1,5 +1,4 @@ #include "../../plugins/Saucenao.hpp" -#include "../../include/StringUtils.hpp" #include namespace QuickMedia { @@ -19,22 +18,23 @@ namespace QuickMedia { if(download_result != DownloadResult::OK) return download_result_to_plugin_result(download_result); QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size()); if(result != 0) goto cleanup; result = quickmedia_html_find_nodes_xpath(&html_search, "//td[class='resulttablecontent']", - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { BodyItems *item_data = (BodyItems*)userdata; - const char *text = quickmedia_html_node_get_text(node); - if(text) { - std::string title = text; + QuickMediaStringView text = quickmedia_html_node_get_text(node); + if(text.data) { + std::string title(text.data, text.size); size_t p_index = title.find("%"); if(p_index != std::string::npos) title = title.erase(0, p_index + 1); - auto item = BodyItem::create(strip(title)); + auto item = BodyItem::create(title); item_data->push_back(std::move(item)); } + return 0; }, &result_items); BodyItemContext body_item_context; @@ -42,16 +42,17 @@ namespace QuickMedia { body_item_context.index = 0; quickmedia_html_find_nodes_xpath(&html_search, "//td[class='resulttableimage']//img", - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { BodyItemContext *item_data = (BodyItemContext*)userdata; - const char *src = quickmedia_html_node_get_attribute_value(node, "src"); - const char *data_src = quickmedia_html_node_get_attribute_value(node, "data-src"); - const char *image_url = data_src ? data_src : src; - if(image_url && item_data->index < item_data->body_items->size()) { - (*item_data->body_items)[item_data->index]->thumbnail_url = strip(image_url); + QuickMediaStringView src = quickmedia_html_node_get_attribute_value(node, "src"); + QuickMediaStringView data_src = quickmedia_html_node_get_attribute_value(node, "data-src"); + QuickMediaStringView image_url = data_src.data ? data_src : src; + if(image_url.data && item_data->index < item_data->body_items->size()) { + (*item_data->body_items)[item_data->index]->thumbnail_url.assign(image_url.data, image_url.size); (*item_data->body_items)[item_data->index]->thumbnail_size = sf::Vector2i(150, 147); item_data->index++; } + return 0; }, &body_item_context); cleanup: diff --git a/src/plugins/Soundcloud.cpp b/src/plugins/Soundcloud.cpp index abc8c18..7079e46 100644 --- a/src/plugins/Soundcloud.cpp +++ b/src/plugins/Soundcloud.cpp @@ -345,16 +345,17 @@ namespace QuickMedia { if(client_id.empty()) { std::vector script_sources; QuickMediaHtmlSearch html_search; - int result = quickmedia_html_search_init(&html_search, website_data.c_str()); + int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size()); if(result != 0) goto cleanup; result = quickmedia_html_find_nodes_xpath(&html_search, "//script", - [](QuickMediaHtmlNode *node, void *userdata) { + [](QuickMediaMatchNode *node, void *userdata) { std::vector *script_sources = (std::vector*)userdata; - const char *src = quickmedia_html_node_get_attribute_value(node, "src"); - if(src) - script_sources->push_back(strip(src)); + QuickMediaStringView src = quickmedia_html_node_get_attribute_value(node, "src"); + if(src.data) + script_sources->push_back(std::string(src.data, src.size)); + return 0; }, &script_sources); cleanup: diff --git a/src/plugins/Youtube.cpp b/src/plugins/Youtube.cpp index 24fd448..13655c7 100644 --- a/src/plugins/Youtube.cpp +++ b/src/plugins/Youtube.cpp @@ -1610,25 +1610,25 @@ R"END( if(!subscription_data.inside_entry && subscription_data.author.empty()) { if(parse_type == HTML_PARSE_TAG_START && string_view_equals(&html_parser->tag_name, "title")) { subscription_data.inside_title = true; - return; + return 0; } else if(parse_type == HTML_PARSE_TAG_END && string_view_equals(&html_parser->tag_name, "title")) { subscription_data.inside_title = false; subscription_data.author.assign(html_parser->text_stripped.data, html_parser->text_stripped.size); - return; + return 0; } } if(parse_type == HTML_PARSE_TAG_START && string_view_equals(&html_parser->tag_name, "entry")) { subscription_data.subscription_entry.push_back({}); subscription_data.inside_entry = true; - return; + return 0; } else if(parse_type == HTML_PARSE_TAG_END && string_view_equals(&html_parser->tag_name, "entry")) { subscription_data.inside_entry = false; - return; + return 0; } if(!subscription_data.inside_entry) - return; + return 0; if(string_view_equals(&html_parser->tag_name, "title") && parse_type == HTML_PARSE_TAG_END) { subscription_data.subscription_entry.back().title.assign(html_parser->text_stripped.data, html_parser->text_stripped.size); @@ -1638,6 +1638,8 @@ R"END( std::string published_str(html_parser->text_stripped.data, html_parser->text_stripped.size); subscription_data.subscription_entry.back().published = iso_utc_to_unix_time(published_str.c_str()); } + + return 0; }, &subscription_data); std::vector results; -- cgit v1.2.3