aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/DownloadUtils.cpp3
-rw-r--r--src/NetUtils.cpp3
-rw-r--r--src/QuickMedia.cpp26
-rw-r--r--src/plugins/Fourchan.cpp389
-rw-r--r--src/plugins/MangaGeneric.cpp84
-rw-r--r--src/plugins/Manganelo.cpp83
-rw-r--r--src/plugins/MediaGeneric.cpp47
-rw-r--r--src/plugins/NyaaSi.cpp73
-rw-r--r--src/plugins/Saucenao.cpp27
-rw-r--r--src/plugins/Soundcloud.cpp11
-rw-r--r--src/plugins/Youtube.cpp12
11 files changed, 384 insertions, 374 deletions
diff --git a/src/DownloadUtils.cpp b/src/DownloadUtils.cpp
index 7660cee..dff0ecb 100644
--- a/src/DownloadUtils.cpp
+++ b/src/DownloadUtils.cpp
@@ -19,8 +19,6 @@ namespace QuickMedia {
int total_downloaded_size = 0;
};
- static const bool debug_download = false;
-
static int accumulate_string(char *data, int size, void *userdata) {
std::string *str = (std::string*)userdata;
if(str->size() + size > 1024 * 1024 * 100) // 100mb sane limit, TODO: make configurable
@@ -75,6 +73,7 @@ namespace QuickMedia {
return 0;
}
+ static const bool debug_download = false;
static const char *useragent_str = "user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36";
DownloadResult download_head_to_string(const std::string &url, std::string &result, bool use_browser_useragent, bool fail_on_error) {
diff --git a/src/NetUtils.cpp b/src/NetUtils.cpp
index d6b6cf2..cc19094 100644
--- a/src/NetUtils.cpp
+++ b/src/NetUtils.cpp
@@ -35,10 +35,11 @@ namespace QuickMedia {
};
void html_unescape_sequences(std::string &str) {
- const std::array<HtmlUnescapeSequence, 6> unescape_sequences = {
+ const std::array<HtmlUnescapeSequence, 7> unescape_sequences = {
HtmlUnescapeSequence { "&quot;", "\"" },
HtmlUnescapeSequence { "&#039;", "'" },
HtmlUnescapeSequence { "&#39;", "'" },
+ HtmlUnescapeSequence { "&#10;", "\n" },
HtmlUnescapeSequence { "&lt;", "<" },
HtmlUnescapeSequence { "&gt;", ">" },
HtmlUnescapeSequence { "&amp;", "&" } // This should be last, to not accidentally replace a new sequence caused by replacing this
diff --git a/src/QuickMedia.cpp b/src/QuickMedia.cpp
index 9788c0a..e21e591 100644
--- a/src/QuickMedia.cpp
+++ b/src/QuickMedia.cpp
@@ -1217,22 +1217,22 @@ namespace QuickMedia {
}
} else if(strcmp(plugin_name, "pornhub") == 0) {
check_youtube_dl_installed(plugin_name);
- auto search_page = std::make_unique<MediaGenericSearchPage>(this, "https://www.pornhub.com/", sf::Vector2i(320/1.5f, 180/1.5f));
+ auto search_page = std::make_unique<MediaGenericSearchPage>(this, "https://www.pornhub.com/", sf::Vector2i(320/1.5f, 180/1.5f), false);
add_pornhub_handlers(search_page.get());
tabs.push_back(Tab{create_body(false, true), std::move(search_page), create_search_bar("Search...", 500)});
} else if(strcmp(plugin_name, "spankbang") == 0) {
check_youtube_dl_installed(plugin_name);
- auto search_page = std::make_unique<MediaGenericSearchPage>(this, "https://spankbang.com/", sf::Vector2i(500/2.5f, 281/2.5f));
+ auto search_page = std::make_unique<MediaGenericSearchPage>(this, "https://spankbang.com/", sf::Vector2i(500/2.5f, 281/2.5f), true);
add_spankbang_handlers(search_page.get());
tabs.push_back(Tab{create_body(false, true), std::move(search_page), create_search_bar("Search...", 500)});
} else if(strcmp(plugin_name, "xvideos") == 0) {
check_youtube_dl_installed(plugin_name);
- auto search_page = std::make_unique<MediaGenericSearchPage>(this, "https://www.xvideos.com/", sf::Vector2i(352/1.5f, 198/1.5f));
+ auto search_page = std::make_unique<MediaGenericSearchPage>(this, "https://www.xvideos.com/", sf::Vector2i(352/1.5f, 198/1.5f), false);
add_xvideos_handlers(search_page.get());
tabs.push_back(Tab{create_body(false, true), std::move(search_page), create_search_bar("Search...", 500)});
} else if(strcmp(plugin_name, "xhamster") == 0) {
check_youtube_dl_installed(plugin_name);
- auto search_page = std::make_unique<MediaGenericSearchPage>(this, "https://xhamster.com/", sf::Vector2i(240, 135));
+ auto search_page = std::make_unique<MediaGenericSearchPage>(this, "https://xhamster.com/", sf::Vector2i(240, 135), false);
add_xhamster_handlers(search_page.get());
tabs.push_back(Tab{create_body(false, true), std::move(search_page), create_search_bar("Search...", 500)});
} else if(strcmp(plugin_name, "soundcloud") == 0) {
@@ -1972,7 +1972,9 @@ namespace QuickMedia {
BodyItem *selected_item = tabs[selected_tab].body->get_selected();
if(selected_item && tabs[selected_tab].page->is_trackable()) {
TrackablePage *trackable_page = dynamic_cast<TrackablePage*>(tabs[selected_tab].page.get());
- trackable_page->track(selected_item->get_title());
+ run_task_with_loading_screen([trackable_page, selected_item](){
+ return trackable_page->track(selected_item->get_title()) == TrackResult::OK;
+ });
}
} else if(event.key.code == sf::Keyboard::C && event.key.control) {
BodyItem *selected_item = tabs[selected_tab].body->get_selected();
@@ -3183,6 +3185,7 @@ namespace QuickMedia {
image_download_future.cancel();
image_download_cancel = false;
+ num_manga_pages = 0;
std::promise<int> num_manga_pages_promise;
num_manga_pages_future = num_manga_pages_promise.get_future();
@@ -3346,6 +3349,11 @@ namespace QuickMedia {
sf::Event event;
download_chapter_images_if_needed(images_page);
+ if(num_manga_pages == 0) {
+ current_page = pop_page_stack();
+ return 0;
+ }
+
if(current_page != PageType::IMAGES || !window.isOpen())
return 0;
@@ -3541,6 +3549,11 @@ namespace QuickMedia {
}
download_chapter_images_if_needed(images_page);
+ if(num_manga_pages == 0) {
+ current_page = pop_page_stack();
+ return;
+ }
+
if(current_page != PageType::IMAGES_CONTINUOUS || !window.isOpen())
return;
@@ -4281,6 +4294,9 @@ namespace QuickMedia {
update_idle_state();
handle_window_close();
+ if(current_page != PageType::CHAT_LOGIN)
+ break;
+
if(redraw) {
redraw = false;
get_body_dimensions(window_size, nullptr, body_pos, body_size);
diff --git a/src/plugins/Fourchan.cpp b/src/plugins/Fourchan.cpp
index 52024e1..4b2ca61 100644
--- a/src/plugins/Fourchan.cpp
+++ b/src/plugins/Fourchan.cpp
@@ -3,10 +3,9 @@
#include "../../include/Storage.hpp"
#include "../../include/StringUtils.hpp"
#include "../../include/NetUtils.hpp"
+#include <HtmlParser.h>
#include <json/reader.h>
#include <string.h>
-#include <tidy.h>
-#include <tidybuffio.h>
// API documentation: https://github.com/4chan/4chan-API
@@ -37,133 +36,195 @@ namespace QuickMedia {
struct CommentPiece {
enum class Type {
TEXT,
- QUOTE, // >
- QUOTELINK, // >>POSTNO,
- LINE_CONTINUE
+ QUOTE, // >, Set for span
+ QUOTE_CONTINUE, // Set for span
+ QUOTELINK, // >>POSTNO, Set for a
+ DEADLINK, // Set for span
+ CROSSBOARD_LINK, // Set for a
+ CODEBLOCK // Set for pre
};
- DataView text; // Set when type is TEXT, QUOTE or QUOTELINK
+ std::string text;
int64_t quote_postnumber = 0; // Set when type is QUOTELINK
Type type;
};
- static TidyAttr get_attribute_by_name(TidyNode node, const char *name) {
- for(TidyAttr attr = tidyAttrFirst(node); attr; attr = tidyAttrNext(attr)) {
- const char *attr_name = tidyAttrName(attr);
- if(attr_name && strcmp(name, attr_name) == 0)
- return attr;
- }
- return nullptr;
- }
-
- static const char* get_attribute_value(TidyNode node, const char *name) {
- TidyAttr attr = get_attribute_by_name(node, name);
- if(attr)
- return tidyAttrValue(attr);
- return nullptr;
- }
-
- static void lstrip_newline(const char *str, size_t size, const char **output_str, size_t *output_size) {
- size_t i = 0;
- while(i < size && str[i] == '\n') {
- ++i;
- }
- *output_str = str + i;
- *output_size = size - i;
- }
+ enum class NodeType {
+ A,
+ SPAN,
+ PRE
+ };
- static void rstrip_newline(const char *str, size_t size, size_t *output_size) {
- ssize_t i = size - 1;
- while(i >= 0 && str[i] == '\n') {
- --i;
+ // Returns -1 if no match
+ static NodeType tag_name_to_node_type(HtmlStringView str) {
+ if(str.size == 1 && str.data[0] == 'a') {
+ return NodeType::A;
+ } else if(str.size == 4 && memcmp(str.data, "span", 4) == 0) {
+ return NodeType::SPAN;
+ } else if(str.size == 3 && memcmp(str.data, "pre", 3) == 0) {
+ return NodeType::PRE;
+ } else {
+ return (NodeType)-1;
}
- *output_size = i + 1;
}
- static void strip_newline(const char *str, size_t size, const char **output_str, size_t *output_size) {
- lstrip_newline(str, size, output_str, output_size);
- rstrip_newline(*output_str, *output_size, output_size);
- }
+ struct HtmlNode {
+ NodeType node_type;
+ std::string klass;
+ std::string href;
+ int output_count = 0;
+ };
using CommentPieceCallback = std::function<void(const CommentPiece&)>;
- static int extract_comment_pieces(TidyDoc doc, TidyNode node, CommentPieceCallback &callback) {
- for(TidyNode child = tidyGetChild(node); child; child = tidyGetNext(child)) {
- const char *node_name = tidyNodeGetName(child);
- if(node_name) {
- if(strcmp(node_name, "br") == 0) {
+ struct HtmlParseUserdata {
+ CommentPieceCallback callback;
+ std::stack<HtmlNode> html_node;
+ };
+
+ static int html_parse_callback(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata) {
+ HtmlParseUserdata *parse_userdata = (HtmlParseUserdata*)userdata;
+
+ switch(parse_type) {
+ case HTML_PARSE_TAG_START: {
+ if(html_parser->tag_name.size == 2 && memcmp(html_parser->tag_name.data, "br", 2) == 0) {
CommentPiece comment_piece;
comment_piece.type = CommentPiece::Type::TEXT;
- // Warning: Cast from const char* to char* ...
- comment_piece.text = { (char*)"\n", 1 };
- callback(comment_piece);
- /*} else if(strcmp(node_name, "span") == 0) {
- const char *span_class = get_attribute_value(child, "class");
- //fprintf(stderr, "span class: %s\n", span_class);
- if(span_class && strcmp(span_class, "quote") == 0) {
- CommentPiece comment_piece;
- comment_piece.type = CommentPiece::Type::QUOTE;
- // Warning: Cast from const char* to char* ...
- comment_piece.text = { (char*)"\n", 1 };
- callback(comment_piece);
- }*/
+ comment_piece.text = "\n";
+ parse_userdata->callback(comment_piece);
}
- }
- if(tidyNodeGetType(child) == TidyNode_Text) {
- TidyBuffer tidy_buffer;
- tidyBufInit(&tidy_buffer);
- if(tidyNodeGetText(doc, child, &tidy_buffer)) {
- const char *inner_text = (const char*)tidy_buffer.bp;
- size_t inner_text_size = tidy_buffer.size;
- strip_newline(inner_text, inner_text_size, &inner_text, &inner_text_size);
-
- const char *node_name = tidyNodeGetName(node);
- if(node_name && strcmp(node_name, "a") == 0) {
- const char *a_class = get_attribute_value(node, "class");
- const char *a_href = get_attribute_value(node, "href");
- if(a_class && a_href && strcmp(a_class, "quotelink") == 0 && strncmp(a_href, "#p", 2) == 0) {
- CommentPiece comment_piece;
- comment_piece.type = CommentPiece::Type::QUOTELINK;
- comment_piece.quote_postnumber = strtoll(a_href + 2, nullptr, 10);
- // Warning: Cast from const char* to char* ...
- comment_piece.text = { (char*)inner_text, inner_text_size };
- callback(comment_piece);
- tidyBufFree(&tidy_buffer);
- continue;
- }
+ const NodeType node_type = tag_name_to_node_type(html_parser->tag_name);
+ if(node_type != (NodeType)-1)
+ parse_userdata->html_node.push({ node_type, "", "", 0 });
+ break;
+ }
+ case HTML_PARSE_TAG_END: {
+ if(!parse_userdata->html_node.empty()) {
+ const NodeType node_type = tag_name_to_node_type(html_parser->tag_name);
+ if(node_type != (NodeType)-1)
+ parse_userdata->html_node.pop();
+ }
+ break;
+ }
+ case HTML_PARSE_ATTRIBUTE: {
+ if(!parse_userdata->html_node.empty()) {
+ HtmlNode &html_node = parse_userdata->html_node.top();
+ if(html_parser->attribute_key.size == 5 && memcmp(html_parser->attribute_key.data, "class", 5) == 0) {
+ html_node.klass.assign(html_parser->attribute_value.data, html_parser->attribute_value.size);
+ } else if(html_parser->attribute_key.size == 4 && memcmp(html_parser->attribute_key.data, "href", 4) == 0) {
+ html_node.href.assign(html_parser->attribute_value.data, html_parser->attribute_value.size);
}
+ }
+ break;
+ }
+ case HTML_PARSE_TEXT: {
+ std::string text(html_parser->text.data, html_parser->text.size);
+ html_unescape_sequences(text);
- CommentPiece comment_piece;
+ CommentPiece comment_piece;
+ comment_piece.type = CommentPiece::Type::TEXT;
+ comment_piece.text = std::move(text);
+
+ if(parse_userdata->html_node.empty()) {
comment_piece.type = CommentPiece::Type::TEXT;
- // Warning: Cast from const char* to char* ...
- comment_piece.text = { (char*)inner_text, inner_text_size };
- callback(comment_piece);
+ } else {
+ HtmlNode &html_node = parse_userdata->html_node.top();
+ switch(html_node.node_type) {
+ case NodeType::A: {
+ if(html_node.klass == "quotelink") {
+ if(string_starts_with(html_node.href, "#p")) {
+ comment_piece.type = CommentPiece::Type::QUOTELINK;
+ comment_piece.quote_postnumber = strtoll(html_node.href.c_str() + 2, nullptr, 10);
+ } else if(string_starts_with(html_node.href, "/")) {
+ comment_piece.type = CommentPiece::Type::CROSSBOARD_LINK;
+ } else {
+ fprintf(stderr, "Unexpected href for quotelink: %s\n", html_node.href.c_str());
+ }
+ } else {
+ fprintf(stderr, "Unexpected class for a: %s\n", html_node.klass.c_str());
+ }
+ break;
+ }
+ case NodeType::SPAN: {
+ if(html_node.klass == "quote") {
+ comment_piece.type = html_node.output_count ? CommentPiece::Type::QUOTE : CommentPiece::Type::QUOTE_CONTINUE;
+ } else if(html_node.klass == "deadlink") {
+ comment_piece.type = CommentPiece::Type::DEADLINK;
+ } else {
+ fprintf(stderr, "Unexpected class for span: %s\n", html_node.klass.c_str());
+ }
+ break;
+ }
+ case NodeType::PRE: {
+ if(html_node.klass == "prettyprint") {
+ comment_piece.type = CommentPiece::Type::CODEBLOCK;
+ } else {
+ fprintf(stderr, "Unexpected class for pre: %s\n", html_node.klass.c_str());
+ }
+ break;
+ }
+ }
+ html_node.output_count++;
}
- tidyBufFree(&tidy_buffer);
- } else {
- int res = extract_comment_pieces(doc, child, callback);
- if(res != 0)
- return res;
+
+ parse_userdata->callback(comment_piece);
+ break;
+ }
+ case HTML_PARSE_JAVASCRIPT_CODE: {
+ break;
}
}
+
return 0;
}
static void extract_comment_pieces(const char *html_source, size_t size, CommentPieceCallback callback) {
- TidyDoc doc = tidyCreate();
- tidyOptSetBool(doc, TidyShowWarnings, no);
- tidyOptSetInt(doc, TidyUseCustomTags, 1);
- tidyOptSetInt(doc, TidyWrapLen, 0);
- if(tidyParseString(doc, html_source) < 0) {
- CommentPiece comment_piece;
- comment_piece.type = CommentPiece::Type::TEXT;
- // Warning: Cast from const char* to char* ...
- comment_piece.text = { (char*)html_source, size };
- callback(comment_piece);
- } else {
- extract_comment_pieces(doc, tidyGetRoot(doc), callback);
- }
- tidyRelease(doc);
+ HtmlParseUserdata parse_userdata;
+ parse_userdata.callback = std::move(callback);
+ html_parser_parse(html_source, size, html_parse_callback, &parse_userdata);
+ }
+
+ static std::string html_to_text(const char *html_source, size_t size, std::unordered_map<int64_t, size_t> &comment_by_postno, BodyItems &result_items, size_t body_item_index) {
+ std::string comment_text;
+ extract_comment_pieces(html_source, size,
+ [&comment_text, &comment_by_postno, &result_items, body_item_index](const CommentPiece &cp) {
+ switch(cp.type) {
+ case CommentPiece::Type::TEXT:
+ comment_text += std::move(cp.text);
+ break;
+ case CommentPiece::Type::QUOTE:
+ comment_text += std::move(cp.text);
+ break;
+ case CommentPiece::Type::QUOTE_CONTINUE:
+ comment_text += std::move(cp.text);
+ break;
+ case CommentPiece::Type::QUOTELINK: {
+ comment_text += std::move(cp.text);
+ auto it = comment_by_postno.find(cp.quote_postnumber);
+ if(it == comment_by_postno.end()) {
+ // TODO: Link this quote to a 4chan archive that still has the quoted comment (if available)
+ comment_text += " (Dead)";
+ } else {
+ result_items[body_item_index]->replies_to.push_back(it->second);
+ result_items[it->second]->replies.push_back(body_item_index);
+ }
+ break;
+ }
+ case CommentPiece::Type::DEADLINK:
+ // TODO: Link this quote to a 4chan archive that still has the quoted comment (if available)
+ comment_text += std::move(cp.text) + " (Dead)";
+ break;
+ case CommentPiece::Type::CROSSBOARD_LINK:
+ // TODO: Link this to another thread and allow navigating to it
+ comment_text += std::move(cp.text) + " (Cross-thread)";
+ break;
+ case CommentPiece::Type::CODEBLOCK:
+ // TODO: Use a different colored background and use a monospace font
+ comment_text += std::move(cp.text);
+ break;
+ }
+ });
+ return comment_text;
}
PluginResult FourchanBoardsPage::submit(const std::string &title, const std::string &url, std::vector<Tab> &result_tabs) {
@@ -264,68 +325,14 @@ namespace QuickMedia {
author_str += " #" + std::to_string(post_num.asInt64());
- std::string comment_text;
- extract_comment_pieces(sub_begin, sub_end - sub_begin,
- [&comment_text](const CommentPiece &cp) {
- switch(cp.type) {
- case CommentPiece::Type::TEXT:
- comment_text.append(cp.text.data, cp.text.size);
- break;
- case CommentPiece::Type::QUOTE:
- //comment_text += '>';
- //comment_text.append(cp.text.data, cp.text.size);
- //comment_text += '\n';
- break;
- case CommentPiece::Type::QUOTELINK: {
- comment_text.append(cp.text.data, cp.text.size);
- break;
- }
- case CommentPiece::Type::LINE_CONTINUE: {
- if(!comment_text.empty() && comment_text.back() == '\n') {
- comment_text.pop_back();
- }
- break;
- }
- }
- }
- );
+ std::string comment_text = html_to_text(sub_begin, sub_end - sub_begin, comment_by_postno, result_items, body_item_index);
if(!comment_text.empty())
comment_text += '\n';
- extract_comment_pieces(comment_begin, comment_end - comment_begin,
- [&comment_text, &comment_by_postno, &result_items, body_item_index](const CommentPiece &cp) {
- switch(cp.type) {
- case CommentPiece::Type::TEXT:
- comment_text.append(cp.text.data, cp.text.size);
- break;
- case CommentPiece::Type::QUOTE:
- //comment_text += '>';
- //comment_text.append(cp.text.data, cp.text.size);
- //comment_text += '\n';
- break;
- case CommentPiece::Type::QUOTELINK: {
- comment_text.append(cp.text.data, cp.text.size);
- auto it = comment_by_postno.find(cp.quote_postnumber);
- if(it == comment_by_postno.end()) {
- // TODO: Link this quote to a 4chan archive that still has the quoted comment (if available)
- comment_text += "(dead)";
- } else {
- result_items[body_item_index]->replies_to.push_back(it->second);
- result_items[it->second]->replies.push_back(body_item_index);
- }
- break;
- }
- case CommentPiece::Type::LINE_CONTINUE: {
- if(!comment_text.empty() && comment_text.back() == '\n') {
- comment_text.pop_back();
- }
- break;
- }
- }
- }
- );
+
+ comment_text += html_to_text(comment_begin, comment_end - comment_begin, comment_by_postno, result_items, body_item_index);
if(!comment_text.empty() && comment_text.back() == '\n')
- comment_text.back() = ' ';
- html_unescape_sequences(comment_text);
+ comment_text.pop_back();
+
BodyItem *body_item = result_items[body_item_index].get();
body_item->set_title(std::move(comment_text));
body_item->set_author(std::move(author_str));
@@ -369,6 +376,7 @@ namespace QuickMedia {
if(!json_root.isArray())
return PluginResult::ERR;
+ std::unordered_map<int64_t, size_t> comment_by_postno;
for(const Json::Value &page_data : json_root) {
if(!page_data.isObject())
continue;
@@ -395,61 +403,11 @@ namespace QuickMedia {
if(!thread_num.isNumeric())
continue;
- std::string title_text;
- extract_comment_pieces(sub_begin, sub_end - sub_begin,
- [&title_text](const CommentPiece &cp) {
- switch(cp.type) {
- case CommentPiece::Type::TEXT:
- title_text.append(cp.text.data, cp.text.size);
- break;
- case CommentPiece::Type::QUOTE:
- //title_text += '>';
- //title_text.append(cp.text.data, cp.text.size);
- //comment_text += '\n';
- break;
- case CommentPiece::Type::QUOTELINK: {
- title_text.append(cp.text.data, cp.text.size);
- break;
- }
- case CommentPiece::Type::LINE_CONTINUE: {
- if(!title_text.empty() && title_text.back() == '\n') {
- title_text.pop_back();
- }
- break;
- }
- }
- }
- );
+ std::string title_text = html_to_text(sub_begin, sub_end - sub_begin, comment_by_postno, result_items, 0);
if(!title_text.empty() && title_text.back() == '\n')
title_text.back() = ' ';
- html_unescape_sequences(title_text);
-
- std::string comment_text;
- extract_comment_pieces(comment_begin, comment_end - comment_begin,
- [&comment_text](const CommentPiece &cp) {
- switch(cp.type) {
- case CommentPiece::Type::TEXT:
- comment_text.append(cp.text.data, cp.text.size);
- break;
- case CommentPiece::Type::QUOTE:
- //comment_text += '>';
- //comment_text.append(cp.text.data, cp.text.size);
- //comment_text += '\n';
- break;
- case CommentPiece::Type::QUOTELINK: {
- comment_text.append(cp.text.data, cp.text.size);
- break;
- }
- case CommentPiece::Type::LINE_CONTINUE: {
- if(!comment_text.empty() && comment_text.back() == '\n') {
- comment_text.pop_back();
- }
- break;
- }
- }
- }
- );
- html_unescape_sequences(comment_text);
+
+ std::string comment_text = html_to_text(comment_begin, comment_end - comment_begin, comment_by_postno, result_items, 0);
// TODO: Do the same when wrapping is implemented
// TODO: Remove this
int num_lines = 0;
@@ -462,6 +420,7 @@ namespace QuickMedia {
}
}
}
+
auto body_item = BodyItem::create(std::move(comment_text));
body_item->set_author(std::move(title_text));
body_item->url = std::to_string(thread_num.asInt64());
diff --git a/src/plugins/MangaGeneric.cpp b/src/plugins/MangaGeneric.cpp
index 2d0df6a..a2608ab 100644
--- a/src/plugins/MangaGeneric.cpp
+++ b/src/plugins/MangaGeneric.cpp
@@ -43,7 +43,11 @@ namespace QuickMedia {
const char *field_contains = nullptr;
};
- static const char* html_attr_or_inner_text(QuickMediaHtmlNode *node, const char *field_name) {
+ static bool string_view_contains(const QuickMediaStringView str, const char *sub) {
+ return memmem(str.data, str.size, sub, strlen(sub));
+ }
+
+ static QuickMediaStringView html_attr_or_inner_text(QuickMediaMatchNode *node, const char *field_name) {
if(strcmp(field_name, "text") == 0)
return quickmedia_html_node_get_text(node);
else
@@ -66,59 +70,62 @@ namespace QuickMedia {
static int html_append_search(QuickMediaHtmlSearch *html_search, const char *html_query, HtmlSearchUserdata *search_userdata) {
return quickmedia_html_find_nodes_xpath(html_search, html_query,
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
HtmlSearchUserdata *search_userdata = (HtmlSearchUserdata*)userdata;
- const char *field1_value = html_attr_or_inner_text(node, search_userdata->field1);
+ QuickMediaStringView field1_value = html_attr_or_inner_text(node, search_userdata->field1);
if(search_userdata->field2) {
- const char *field2_value = html_attr_or_inner_text(node, search_userdata->field2);
- if(field1_value && field2_value && (!search_userdata->field2_contains || strstr(field2_value, search_userdata->field2_contains))) {
- std::string field1_fixed = strip(field1_value);
+ QuickMediaStringView field2_value = html_attr_or_inner_text(node, search_userdata->field2);
+ if(field1_value.data && field2_value.data && (!search_userdata->field2_contains || string_view_contains(field2_value, search_userdata->field2_contains))) {
+ std::string field1_fixed(field1_value.data, field1_value.size);
html_unescape_sequences(field1_fixed);
auto item = BodyItem::create(std::move(field1_fixed));
- item->url = strip(field2_value);
+ item->url = std::string(field2_value.data, field2_value.size);
search_userdata->body_items->push_back(std::move(item));
}
} else {
- if(field1_value) {
- std::string field1_fixed = strip(field1_value);
+ if(field1_value.data) {
+ std::string field1_fixed(field1_value.data, field1_value.size);
html_unescape_sequences(field1_fixed);
auto item = BodyItem::create(std::move(field1_fixed));
search_userdata->body_items->push_back(std::move(item));
}
}
+ return 0;
}, search_userdata);
}
static int html_body_item_merge(QuickMediaHtmlSearch *html_search, const char *html_query, HtmlMergeUserdata *merge_userdata) {
return quickmedia_html_find_nodes_xpath(html_search, html_query,
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
HtmlMergeUserdata *merge_userdata = (HtmlMergeUserdata*)userdata;
BodyItemContext &body_item_image_context = merge_userdata->body_item_image_context;
- const char *field_value = html_attr_or_inner_text(node, merge_userdata->field_name);
+ QuickMediaStringView field_value = html_attr_or_inner_text(node, merge_userdata->field_name);
if(body_item_image_context.index < body_item_image_context.body_items->size()
- && field_value && (!merge_userdata->field_contains || strstr(field_value, merge_userdata->field_contains)))
+ && field_value.data && (!merge_userdata->field_contains || string_view_contains(field_value, merge_userdata->field_contains)))
{
+ std::string field_stripped(field_value.data, field_value.size);
if(merge_userdata->type == MergeType::THUMBNAIL) {
- (*body_item_image_context.body_items)[body_item_image_context.index]->thumbnail_url = strip(field_value);
+ (*body_item_image_context.body_items)[body_item_image_context.index]->thumbnail_url = std::move(field_stripped);
} else if(merge_userdata->type == MergeType::DESCRIPTION) {
- std::string field_stripped = strip(field_value);
const char *prefix = merge_userdata->desc_prefix ? merge_userdata->desc_prefix : "";
- (*body_item_image_context.body_items)[body_item_image_context.index]->set_description(prefix + field_stripped);
+ (*body_item_image_context.body_items)[body_item_image_context.index]->set_description(prefix + std::move(field_stripped));
(*body_item_image_context.body_items)[body_item_image_context.index]->set_description_color(get_current_theme().faded_text_color);
}
body_item_image_context.index++;
}
+ return 0;
}, merge_userdata);
}
static int html_get_page_url(QuickMediaHtmlSearch *html_search, const char *html_query, HtmlPageImageUserdata *page_image_userdata) {
return quickmedia_html_find_nodes_xpath(html_search, html_query,
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
HtmlPageImageUserdata *page_image_userdata = (HtmlPageImageUserdata*)userdata;
- const char *field1_value = html_attr_or_inner_text(node, page_image_userdata->field_name);
- if(page_image_userdata->url->empty() && field1_value && (!page_image_userdata->field_contains || strstr(field1_value, page_image_userdata->field_contains))) {
- *page_image_userdata->url = strip(field1_value);
+ QuickMediaStringView field1_value = html_attr_or_inner_text(node, page_image_userdata->field_name);
+ if(page_image_userdata->url->empty() && field1_value.data && (!page_image_userdata->field_contains || string_view_contains(field1_value, page_image_userdata->field_contains))) {
+ *page_image_userdata->url = std::string(field1_value.data, field1_value.size);
}
+ return 0;
}, page_image_userdata);
}
@@ -170,7 +177,7 @@ namespace QuickMedia {
return PluginResult::OK;
QuickMediaHtmlSearch html_search;
- int result = quickmedia_html_search_init(&html_search, website_data.c_str());
+ int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size());
if(result != 0)
goto cleanup;
@@ -306,7 +313,7 @@ namespace QuickMedia {
return PluginResult::NET_ERR;
QuickMediaHtmlSearch html_search;
- int result = quickmedia_html_search_init(&html_search, website_data.c_str());
+ int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size());
if(result != 0)
goto cleanup;
@@ -338,12 +345,13 @@ namespace QuickMedia {
authors_userdata.authors_query = &authors_query;
quickmedia_html_find_nodes_xpath(&html_search, authors_query.html_query,
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
HtmlAuthorsUserdata *authors_userdata = (HtmlAuthorsUserdata*)userdata;
- const char *title_value = html_attr_or_inner_text(node, authors_userdata->authors_query->title_field);
- const char *url_value = html_attr_or_inner_text(node, authors_userdata->authors_query->url_field);
- if(title_value && url_value && (!authors_userdata->authors_query->url_contains || strstr(url_value, authors_userdata->authors_query->url_contains)))
- (*authors_userdata->creators)[strip(title_value)] = strip(url_value);
+ QuickMediaStringView title_value = html_attr_or_inner_text(node, authors_userdata->authors_query->title_field);
+ QuickMediaStringView url_value = html_attr_or_inner_text(node, authors_userdata->authors_query->url_field);
+ if(title_value.data && url_value.data && (!authors_userdata->authors_query->url_contains || string_view_contains(url_value, authors_userdata->authors_query->url_contains)))
+ (*authors_userdata->creators)[std::string(title_value.data, title_value.size)] = std::string(url_value.data, url_value.size);
+ return 0;
}, &authors_userdata);
}
}
@@ -471,19 +479,20 @@ namespace QuickMedia {
return ImageResult::NET_ERR;
QuickMediaHtmlSearch html_search;
- int result = quickmedia_html_search_init(&html_search, website_data.c_str());
+ int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size());
if(result != 0)
goto cleanup;
result = quickmedia_html_find_nodes_xpath(&html_search, list_page_pagination_query->pages_html_query,
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
HtmlPageCountUserdata *page_count_userdata = (HtmlPageCountUserdata*)userdata;
- const char *field1_value = html_attr_or_inner_text(node, page_count_userdata->field_name);
- if(field1_value) {
- std::string field_value_stripped = strip(field1_value);
+ QuickMediaStringView field1_value = html_attr_or_inner_text(node, page_count_userdata->field_name);
+ if(field1_value.data) {
+ std::string field_value_stripped(field1_value.data, field1_value.size);
if(is_number(field_value_stripped.c_str()))
page_count_userdata->num_pages = strtol(field_value_stripped.c_str(), nullptr, 10);
}
+ return 0;
}, &page_count_userdata);
if(result != 0 || page_count_userdata.num_pages == 0) {
@@ -583,7 +592,7 @@ namespace QuickMedia {
return ImageResult::ERR;
QuickMediaHtmlSearch html_search;
- int result = quickmedia_html_search_init(&html_search, website_data.c_str());
+ int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size());
if(result != 0)
goto cleanup;
@@ -642,17 +651,18 @@ namespace QuickMedia {
list_page_images_userdata.field_contains = list_page_images_query->field_contains;
QuickMediaHtmlSearch html_search;
- int result = quickmedia_html_search_init(&html_search, website_data.c_str());
+ int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size());
if(result != 0)
goto cleanup;
result = quickmedia_html_find_nodes_xpath(&html_search, list_page_images_query->html_query,
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
HtmlListPageImagesUserdata *list_page_images_userdata = (HtmlListPageImagesUserdata*)userdata;
- const char *field1_value = html_attr_or_inner_text(node, list_page_images_userdata->field_name);
- if(field1_value && (!list_page_images_userdata->field_contains || strstr(field1_value, list_page_images_userdata->field_contains))) {
- list_page_images_userdata->urls->push_back(strip(field1_value));
+ QuickMediaStringView field1_value = html_attr_or_inner_text(node, list_page_images_userdata->field_name);
+ if(field1_value.data && (!list_page_images_userdata->field_contains || string_view_contains(field1_value, list_page_images_userdata->field_contains))) {
+ list_page_images_userdata->urls->push_back(std::string(field1_value.data, field1_value.size));
}
+ return 0;
}, &list_page_images_userdata);
if(result == 0 && !chapter_image_urls.empty() && list_page_images_query->post_handler)
diff --git a/src/plugins/Manganelo.cpp b/src/plugins/Manganelo.cpp
index 094d096..e0517dd 100644
--- a/src/plugins/Manganelo.cpp
+++ b/src/plugins/Manganelo.cpp
@@ -1,11 +1,14 @@
#include "../../plugins/Manganelo.hpp"
#include "../../include/Notification.hpp"
-#include "../../include/StringUtils.hpp"
#include "../../include/NetUtils.hpp"
#include "../../include/Theme.hpp"
#include <quickmedia/HtmlSearch.h>
namespace QuickMedia {
+ static bool string_view_contains(const QuickMediaStringView str, const char *sub) {
+ return memmem(str.data, str.size, sub, strlen(sub));
+ }
+
// Returns true if modified
static bool remove_html_span(std::string &str) {
size_t open_tag_start = str.find("<span");
@@ -35,20 +38,21 @@ namespace QuickMedia {
return PluginResult::NET_ERR;
QuickMediaHtmlSearch html_search;
- int result = quickmedia_html_search_init(&html_search, website_data.c_str());
+ int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size());
if(result != 0)
goto cleanup;
result = quickmedia_html_find_nodes_xpath(&html_search, "//ul[class='row-content-chapter']//a",
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
auto *item_data = (BodyItems*)userdata;
- const char *href = quickmedia_html_node_get_attribute_value(node, "href");
- const char *text = quickmedia_html_node_get_text(node);
- if(href && text) {
- auto item = BodyItem::create(strip(text));
- item->url = strip(href);
+ QuickMediaStringView href = quickmedia_html_node_get_attribute_value(node, "href");
+ QuickMediaStringView text = quickmedia_html_node_get_text(node);
+ if(href.data && text.data) {
+ auto item = BodyItem::create(std::string(text.data, text.size));
+ item->url.assign(href.data, href.size);
item_data->push_back(std::move(item));
}
+ return 0;
}, &chapters_items);
BodyItemContext body_item_context;
@@ -56,29 +60,31 @@ namespace QuickMedia {
body_item_context.index = 0;
quickmedia_html_find_nodes_xpath(&html_search, "//ul[class='row-content-chapter']//span",
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
auto *item_data = (BodyItemContext*)userdata;
- const char *class_attr = quickmedia_html_node_get_attribute_value(node, "class");
- const char *text = quickmedia_html_node_get_text(node);
- if(text && class_attr && strstr(class_attr, "chapter-time") && item_data->index < item_data->body_items->size()) {
- std::string uploaded_date = strip(text);
- (*item_data->body_items)[item_data->index]->set_description("Uploaded: " + uploaded_date);
+ QuickMediaStringView class_attr = quickmedia_html_node_get_attribute_value(node, "class");
+ QuickMediaStringView text = quickmedia_html_node_get_text(node);
+ if(text.data && class_attr.data && string_view_contains(class_attr, "chapter-time") && item_data->index < item_data->body_items->size()) {
+ std::string uploaded_date(text.data, text.size);
+ (*item_data->body_items)[item_data->index]->set_description("Uploaded: " + std::move(uploaded_date));
(*item_data->body_items)[item_data->index]->set_description_color(get_current_theme().faded_text_color);
item_data->index++;
}
+ return 0;
}, &body_item_context);
quickmedia_html_find_nodes_xpath(&html_search, "//a[class='a-h']",
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
std::vector<Creator> *creators = (std::vector<Creator>*)userdata;
- const char *href = quickmedia_html_node_get_attribute_value(node, "href");
- const char *text = quickmedia_html_node_get_text(node);
- if(href && text && strstr(href, "/author/story/")) {
+ QuickMediaStringView href = quickmedia_html_node_get_attribute_value(node, "href");
+ QuickMediaStringView text = quickmedia_html_node_get_text(node);
+ if(href.data && text.data && string_view_contains(href, "/author/story/")) {
Creator creator;
- creator.name = strip(text);
- creator.url = strip(href);
+ creator.name.assign(text.data, text.size);
+ creator.url.assign(href.data, href.size);
creators->push_back(std::move(creator));
}
+ return 0;
}, &creators);
cleanup:
@@ -124,7 +130,7 @@ namespace QuickMedia {
if(name.isString() && name.asCString()[0] != '\0' && nameunsigned.isString() && nameunsigned.asCString()[0] != '\0') {
std::string name_str = name.asString();
while(remove_html_span(name_str)) {}
- auto item = BodyItem::create(strip(name_str));
+ auto item = BodyItem::create(name_str);
item->url = "https://manganelo.com/manga/" + url_param_encode(nameunsigned.asString());
if(lastchapter.isString() && lastchapter.asCString()[0] != '\0') {
item->set_description("Latest chapter: " + lastchapter.asString());
@@ -192,20 +198,21 @@ namespace QuickMedia {
return PluginResult::NET_ERR;
QuickMediaHtmlSearch html_search;
- int result = quickmedia_html_search_init(&html_search, website_data.c_str());
+ int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size());
if(result != 0)
goto cleanup;
result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='search-story-item']//a[class='item-img']",
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
auto *item_data = (BodyItems*)userdata;
- const char *href = quickmedia_html_node_get_attribute_value(node, "href");
- const char *title = quickmedia_html_node_get_attribute_value(node, "title");
- if(href && title && strstr(href, "/manga/")) {
- auto body_item = BodyItem::create(strip(title));
- body_item->url = strip(href);
+ QuickMediaStringView href = quickmedia_html_node_get_attribute_value(node, "href");
+ QuickMediaStringView title = quickmedia_html_node_get_attribute_value(node, "title");
+ if(href.data && title.data && string_view_contains(href, "/manga/")) {
+ auto body_item = BodyItem::create(std::string(title.data, title.size));
+ body_item->url.assign(href.data, href.size);
item_data->push_back(std::move(body_item));
}
+ return 0;
}, &result_items);
if(result != 0)
@@ -216,13 +223,14 @@ namespace QuickMedia {
body_item_image_context.index = 0;
result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='search-story-item']//a[class='item-img']//img",
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
auto *item_data = (BodyItemContext*)userdata;
- const char *src = quickmedia_html_node_get_attribute_value(node, "src");
- if(src && item_data->index < item_data->body_items->size()) {
- (*item_data->body_items)[item_data->index]->thumbnail_url = src;
+ QuickMediaStringView src = quickmedia_html_node_get_attribute_value(node, "src");
+ if(src.data && item_data->index < item_data->body_items->size()) {
+ (*item_data->body_items)[item_data->index]->thumbnail_url.assign(src.data, src.size);
item_data->index++;
}
+ return 0;
}, &body_item_image_context);
cleanup:
@@ -261,18 +269,19 @@ namespace QuickMedia {
return ImageResult::NET_ERR;
QuickMediaHtmlSearch html_search;
- int result = quickmedia_html_search_init(&html_search, website_data.c_str());
+ int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size());
if(result != 0)
goto cleanup;
result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='container-chapter-reader']/img",
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
auto *urls = (std::vector<std::string>*)userdata;
- const char *src = quickmedia_html_node_get_attribute_value(node, "src");
- if(src) {
- std::string image_url = strip(src);
+ QuickMediaStringView src = quickmedia_html_node_get_attribute_value(node, "src");
+ if(src.data) {
+ std::string image_url(src.data, src.size);
urls->push_back(std::move(image_url));
}
+ return 0;
}, &chapter_image_urls);
cleanup:
diff --git a/src/plugins/MediaGeneric.cpp b/src/plugins/MediaGeneric.cpp
index 1f2389a..c829a33 100644
--- a/src/plugins/MediaGeneric.cpp
+++ b/src/plugins/MediaGeneric.cpp
@@ -3,15 +3,20 @@
#include <quickmedia/HtmlSearch.h>
namespace QuickMedia {
- using HtmlPathCallback = std::function<void(QuickMediaHtmlNode*)>;
+ static bool string_view_contains(const QuickMediaStringView str, const char *sub) {
+ return memmem(str.data, str.size, sub, strlen(sub));
+ }
+
+ using HtmlPathCallback = std::function<void(QuickMediaMatchNode*)>;
static int quickmedia_html_find_nodes_xpath(QuickMediaHtmlSearch *self, const char *xpath, HtmlPathCallback callback) {
- return quickmedia_html_find_nodes_xpath(self, xpath, [](QuickMediaHtmlNode *node, void *userdata) {
+ return quickmedia_html_find_nodes_xpath(self, xpath, [](QuickMediaMatchNode *node, void *userdata) {
HtmlPathCallback *callback = (HtmlPathCallback*)userdata;
(*callback)(node);
+ return 0;
}, &callback);
}
- static const char* html_attr_or_inner_text(QuickMediaHtmlNode *node, const char *field_name) {
+ static QuickMediaStringView html_attr_or_inner_text(QuickMediaMatchNode *node, const char *field_name) {
if(strcmp(field_name, "text") == 0)
return quickmedia_html_node_get_text(node);
else
@@ -32,13 +37,13 @@ namespace QuickMedia {
}
}
- static PluginResult fetch_page_results(const std::string &url, const std::string &website_url, const std::vector<MediaTextQuery> &text_queries, const std::vector<MediaThumbnailQuery> &thumbnail_queries, MediaRelatedCustomHandler *custom_handler, BodyItems &result_items) {
+ static PluginResult fetch_page_results(const std::string &url, const std::string &website_url, const std::vector<MediaTextQuery> &text_queries, const std::vector<MediaThumbnailQuery> &thumbnail_queries, MediaRelatedCustomHandler *custom_handler, BodyItems &result_items, bool cloudflare_bypass) {
std::vector<CommandArg> args;
if(!website_url.empty())
args.push_back({ "-H", "referer: " + website_url });
std::string website_data;
- if(download_to_string(url, website_data, args, true, true, true) != DownloadResult::OK)
+ if(download_to_string(url, website_data, args, true, true, cloudflare_bypass) != DownloadResult::OK)
return PluginResult::NET_ERR;
if(website_data.empty())
@@ -47,7 +52,7 @@ namespace QuickMedia {
if(custom_handler && *custom_handler) {
std::vector<MediaRelatedItem> media_related_items = (*custom_handler)(website_data);
for(MediaRelatedItem &media_related_item : media_related_items) {
- auto body_item = BodyItem::create(strip(media_related_item.title));
+ auto body_item = BodyItem::create(media_related_item.title);
body_item->url = std::move(media_related_item.url);
body_item->thumbnail_url = std::move(media_related_item.thumbnail_url);
result_items.push_back(std::move(body_item));
@@ -57,7 +62,7 @@ namespace QuickMedia {
}
QuickMediaHtmlSearch html_search;
- int result = quickmedia_html_search_init(&html_search, website_data.c_str());
+ int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size());
if(result != 0)
goto cleanup;
@@ -68,14 +73,14 @@ namespace QuickMedia {
goto cleanup;
}
- result = quickmedia_html_find_nodes_xpath(&html_search, text_query.html_query, [&text_query, &result_items](QuickMediaHtmlNode *node) {
- const char *title_value = html_attr_or_inner_text(node, text_query.title_field);
- const char *url_value = html_attr_or_inner_text(node, text_query.url_field);
- if(title_value && url_value && (!text_query.url_contains || strstr(url_value, text_query.url_contains))) {
- std::string field1_fixed = strip(title_value);
+ result = quickmedia_html_find_nodes_xpath(&html_search, text_query.html_query, [&text_query, &result_items](QuickMediaMatchNode *node) {
+ QuickMediaStringView title_value = html_attr_or_inner_text(node, text_query.title_field);
+ QuickMediaStringView url_value = html_attr_or_inner_text(node, text_query.url_field);
+ if(title_value.data && url_value.data && (!text_query.url_contains || string_view_contains(url_value, text_query.url_contains))) {
+ std::string field1_fixed(title_value.data, title_value.size);
html_unescape_sequences(field1_fixed);
auto item = BodyItem::create(std::move(field1_fixed));
- item->url = strip(url_value);
+ item->url.assign(url_value.data, url_value.size);
result_items.push_back(std::move(item));
}
});
@@ -87,10 +92,10 @@ namespace QuickMedia {
assert(thumbnail_query.html_query && thumbnail_query.field_name);
if(thumbnail_query.html_query && thumbnail_query.field_name) {
size_t index = 0;
- result = quickmedia_html_find_nodes_xpath(&html_search, thumbnail_query.html_query, [&thumbnail_query, &result_items, &index](QuickMediaHtmlNode *node) {
- const char *field_value = html_attr_or_inner_text(node, thumbnail_query.field_name);
- if(index < result_items.size() && field_value && (!thumbnail_query.field_contains || strstr(field_value, thumbnail_query.field_contains))) {
- result_items[index]->thumbnail_url = strip(field_value);
+ result = quickmedia_html_find_nodes_xpath(&html_search, thumbnail_query.html_query, [&thumbnail_query, &result_items, &index](QuickMediaMatchNode *node) {
+ QuickMediaStringView field_value = html_attr_or_inner_text(node, thumbnail_query.field_name);
+ if(index < result_items.size() && field_value.data && (!thumbnail_query.field_contains || string_view_contains(field_value, thumbnail_query.field_contains))) {
+ result_items[index]->thumbnail_url.assign(field_value.data, field_value.size);
++index;
}
});
@@ -111,8 +116,8 @@ namespace QuickMedia {
}
}
- MediaGenericSearchPage::MediaGenericSearchPage(Program *program, const char *website_url, sf::Vector2i thumbnail_max_size) :
- Page(program), website_url(website_url ? website_url : ""), thumbnail_max_size(thumbnail_max_size)
+ MediaGenericSearchPage::MediaGenericSearchPage(Program *program, const char *website_url, sf::Vector2i thumbnail_max_size, bool cloudflare_bypass) :
+ Page(program), website_url(website_url ? website_url : ""), thumbnail_max_size(thumbnail_max_size), cloudflare_bypass(cloudflare_bypass)
{
if(!this->website_url.empty()) {
if(this->website_url.back() != '/')
@@ -128,7 +133,7 @@ namespace QuickMedia {
std::string url = search_query.search_template;
string_replace_all(url, "%s", url_param_encode(str));
string_replace_all(url, "%p", std::to_string(search_query.page_start + page));
- return fetch_page_results(url, website_url, text_queries, thumbnail_queries, nullptr, result_items);
+ return fetch_page_results(url, website_url, text_queries, thumbnail_queries, nullptr, result_items, cloudflare_bypass);
}
PluginResult MediaGenericSearchPage::submit(const std::string&, const std::string &url, std::vector<Tab> &result_tabs) {
@@ -137,7 +142,7 @@ namespace QuickMedia {
}
PluginResult MediaGenericSearchPage::get_related_media(const std::string &url, BodyItems &result_items) {
- return fetch_page_results(url, website_url, related_media_text_queries, related_media_thumbnail_queries, &related_custom_handler, result_items);
+ return fetch_page_results(url, website_url, related_media_text_queries, related_media_thumbnail_queries, &related_custom_handler, result_items, cloudflare_bypass);
}
MediaGenericSearchPage& MediaGenericSearchPage::search_handler(const char *search_template, int page_start) {
diff --git a/src/plugins/NyaaSi.cpp b/src/plugins/NyaaSi.cpp
index 5d9e41b..d4667af 100644
--- a/src/plugins/NyaaSi.cpp
+++ b/src/plugins/NyaaSi.cpp
@@ -291,17 +291,18 @@ namespace QuickMedia {
return PluginResult::NET_ERR;
QuickMediaHtmlSearch html_search;
- int result = quickmedia_html_search_init(&html_search, website_data.c_str());
+ int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size());
if(result != 0)
goto cleanup;
result = quickmedia_html_find_nodes_xpath(&html_search, "//h3[class='panel-title']",
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
std::string *title = (std::string*)userdata;
- const char *text = quickmedia_html_node_get_text(node);
- if(title->empty() && text) {
- *title = text;
+ QuickMediaStringView text = quickmedia_html_node_get_text(node);
+ if(title->empty() && text.data) {
+ title->assign(text.data, text.size);
}
+ return 0;
}, &title);
if(result != 0)
@@ -314,16 +315,17 @@ namespace QuickMedia {
}
result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='panel-body']//div[class='row']//a",
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
ResultItemExtra *item_data = (ResultItemExtra*)userdata;
- const char *href = quickmedia_html_node_get_attribute_value(node, "href");
- const char *text = quickmedia_html_node_get_text(node);
- if(item_data->result_items->empty() && href && text && strncmp(href, "/user/", 6) == 0) {
+ QuickMediaStringView href = quickmedia_html_node_get_attribute_value(node, "href");
+ QuickMediaStringView text = quickmedia_html_node_get_text(node);
+ if(item_data->result_items->empty() && href.data && text.data && href.size >= 6 && memcmp(href.data, "/user/", 6) == 0) {
auto body_item = BodyItem::create("");
- body_item->set_description("Submitter: " + strip(text));
- body_item->url = "https://" + *item_data->domain + "/" + std::string(href);
+ body_item->set_description("Submitter: " + std::string(text.data, text.size));
+ body_item->url = "https://" + *item_data->domain + "/" + std::string(href.data, href.size);
item_data->result_items->push_back(std::move(body_item));
}
+ return 0;
}, &result_item_extra);
if(result != 0)
@@ -335,17 +337,18 @@ namespace QuickMedia {
result_items.push_back(std::move(body_item));
}
- result_items.front()->set_title(strip(title));
+ result_items.front()->set_title(title);
result = quickmedia_html_find_nodes_xpath(&html_search, "//div[id='torrent-description']",
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
std::string *description = (std::string*)userdata;
- const char *text = quickmedia_html_node_get_text(node);
- if(description->empty() && text) {
- std::string desc = strip(text);
+ QuickMediaStringView text = quickmedia_html_node_get_text(node);
+ if(description->empty() && text.data) {
+ std::string desc(text.data, text.size);
html_unescape_sequences(desc);
*description = std::move(desc);
}
+ return 0;
}, &description);
if(result != 0)
@@ -355,12 +358,13 @@ namespace QuickMedia {
result_items.front()->set_description(result_items.front()->get_description() + "\nDescription:\n" + description);
result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='container']//a",
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
std::string *magnet_url = (std::string*)userdata;
- const char *href = quickmedia_html_node_get_attribute_value(node, "href");
- if(magnet_url->empty() && href && strncmp(href, "magnet:?", 8) == 0) {
- *magnet_url = href;
+ QuickMediaStringView href = quickmedia_html_node_get_attribute_value(node, "href");
+ if(magnet_url->empty() && href.data && href.size >= 8 && memcmp(href.data, "magnet:?", 8) == 0) {
+ magnet_url->assign(href.data, href.size);
}
+ return 0;
}, &magnet_url);
if(result != 0)
@@ -377,15 +381,16 @@ namespace QuickMedia {
comments_start_index = result_items.size();
result = quickmedia_html_find_nodes_xpath(&html_search, "//div[id='comments']//a",
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
auto *item_data = (BodyItems*)userdata;
- const char *href = quickmedia_html_node_get_attribute_value(node, "href");
- const char *text = quickmedia_html_node_get_text(node);
- if(href && text && strncmp(href, "/user/", 6) == 0) {
- auto body_item = BodyItem::create(strip(text));
+ QuickMediaStringView href = quickmedia_html_node_get_attribute_value(node, "href");
+ QuickMediaStringView text = quickmedia_html_node_get_text(node);
+ if(href.data && text.data && href.size >= 6 && memcmp(href.data, "/user/", 6) == 0) {
+ auto body_item = BodyItem::create(std::string(text.data, text.size));
//body_item->url = "https://nyaa.si/" + std::string(href);
item_data->push_back(std::move(body_item));
}
+ return 0;
}, &result_items);
if(result != 0 || result_items.size() == comments_start_index)
@@ -396,14 +401,15 @@ namespace QuickMedia {
body_item_image_context.index = comments_start_index;
result = quickmedia_html_find_nodes_xpath(&html_search, "//div[id='comments']//img[class='avatar']",
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
auto *item_data = (BodyItemContext*)userdata;
- const char *src = quickmedia_html_node_get_attribute_value(node, "src");
- if(src && item_data->index < item_data->body_items->size()) {
- (*item_data->body_items)[item_data->index]->thumbnail_url = src;
+ QuickMediaStringView src = quickmedia_html_node_get_attribute_value(node, "src");
+ if(src.data && item_data->index < item_data->body_items->size()) {
+ (*item_data->body_items)[item_data->index]->thumbnail_url.assign(src.data, src.size);
(*item_data->body_items)[item_data->index]->thumbnail_size = sf::Vector2i(120, 120);
item_data->index++;
}
+ return 0;
}, &body_item_image_context);
if(result != 0)
@@ -412,15 +418,16 @@ namespace QuickMedia {
body_item_image_context.index = comments_start_index;
result = quickmedia_html_find_nodes_xpath(&html_search, "//div[id='comments']//div[class='comment-content']",
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
auto *item_data = (BodyItemContext*)userdata;
- const char *text = quickmedia_html_node_get_text(node);
- if(text && item_data->index < item_data->body_items->size()) {
- std::string desc = strip(text);
+ QuickMediaStringView text = quickmedia_html_node_get_text(node);
+ if(text.data && item_data->index < item_data->body_items->size()) {
+ std::string desc(text.data, text.size);
html_unescape_sequences(desc);
(*item_data->body_items)[item_data->index]->set_description(std::move(desc));
item_data->index++;
}
+ return 0;
}, &body_item_image_context);
cleanup:
diff --git a/src/plugins/Saucenao.cpp b/src/plugins/Saucenao.cpp
index 1278bed..e8d8357 100644
--- a/src/plugins/Saucenao.cpp
+++ b/src/plugins/Saucenao.cpp
@@ -1,5 +1,4 @@
#include "../../plugins/Saucenao.hpp"
-#include "../../include/StringUtils.hpp"
#include <quickmedia/HtmlSearch.h>
namespace QuickMedia {
@@ -19,22 +18,23 @@ namespace QuickMedia {
if(download_result != DownloadResult::OK) return download_result_to_plugin_result(download_result);
QuickMediaHtmlSearch html_search;
- int result = quickmedia_html_search_init(&html_search, website_data.c_str());
+ int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size());
if(result != 0)
goto cleanup;
result = quickmedia_html_find_nodes_xpath(&html_search, "//td[class='resulttablecontent']",
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
BodyItems *item_data = (BodyItems*)userdata;
- const char *text = quickmedia_html_node_get_text(node);
- if(text) {
- std::string title = text;
+ QuickMediaStringView text = quickmedia_html_node_get_text(node);
+ if(text.data) {
+ std::string title(text.data, text.size);
size_t p_index = title.find("%");
if(p_index != std::string::npos)
title = title.erase(0, p_index + 1);
- auto item = BodyItem::create(strip(title));
+ auto item = BodyItem::create(title);
item_data->push_back(std::move(item));
}
+ return 0;
}, &result_items);
BodyItemContext body_item_context;
@@ -42,16 +42,17 @@ namespace QuickMedia {
body_item_context.index = 0;
quickmedia_html_find_nodes_xpath(&html_search, "//td[class='resulttableimage']//img",
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
BodyItemContext *item_data = (BodyItemContext*)userdata;
- const char *src = quickmedia_html_node_get_attribute_value(node, "src");
- const char *data_src = quickmedia_html_node_get_attribute_value(node, "data-src");
- const char *image_url = data_src ? data_src : src;
- if(image_url && item_data->index < item_data->body_items->size()) {
- (*item_data->body_items)[item_data->index]->thumbnail_url = strip(image_url);
+ QuickMediaStringView src = quickmedia_html_node_get_attribute_value(node, "src");
+ QuickMediaStringView data_src = quickmedia_html_node_get_attribute_value(node, "data-src");
+ QuickMediaStringView image_url = data_src.data ? data_src : src;
+ if(image_url.data && item_data->index < item_data->body_items->size()) {
+ (*item_data->body_items)[item_data->index]->thumbnail_url.assign(image_url.data, image_url.size);
(*item_data->body_items)[item_data->index]->thumbnail_size = sf::Vector2i(150, 147);
item_data->index++;
}
+ return 0;
}, &body_item_context);
cleanup:
diff --git a/src/plugins/Soundcloud.cpp b/src/plugins/Soundcloud.cpp
index abc8c18..7079e46 100644
--- a/src/plugins/Soundcloud.cpp
+++ b/src/plugins/Soundcloud.cpp
@@ -345,16 +345,17 @@ namespace QuickMedia {
if(client_id.empty()) {
std::vector<std::string> script_sources;
QuickMediaHtmlSearch html_search;
- int result = quickmedia_html_search_init(&html_search, website_data.c_str());
+ int result = quickmedia_html_search_init(&html_search, website_data.c_str(), website_data.size());
if(result != 0)
goto cleanup;
result = quickmedia_html_find_nodes_xpath(&html_search, "//script",
- [](QuickMediaHtmlNode *node, void *userdata) {
+ [](QuickMediaMatchNode *node, void *userdata) {
std::vector<std::string> *script_sources = (std::vector<std::string>*)userdata;
- const char *src = quickmedia_html_node_get_attribute_value(node, "src");
- if(src)
- script_sources->push_back(strip(src));
+ QuickMediaStringView src = quickmedia_html_node_get_attribute_value(node, "src");
+ if(src.data)
+ script_sources->push_back(std::string(src.data, src.size));
+ return 0;
}, &script_sources);
cleanup:
diff --git a/src/plugins/Youtube.cpp b/src/plugins/Youtube.cpp
index 24fd448..13655c7 100644
--- a/src/plugins/Youtube.cpp
+++ b/src/plugins/Youtube.cpp
@@ -1610,25 +1610,25 @@ R"END(
if(!subscription_data.inside_entry && subscription_data.author.empty()) {
if(parse_type == HTML_PARSE_TAG_START && string_view_equals(&html_parser->tag_name, "title")) {
subscription_data.inside_title = true;
- return;
+ return 0;
} else if(parse_type == HTML_PARSE_TAG_END && string_view_equals(&html_parser->tag_name, "title")) {
subscription_data.inside_title = false;
subscription_data.author.assign(html_parser->text_stripped.data, html_parser->text_stripped.size);
- return;
+ return 0;
}
}
if(parse_type == HTML_PARSE_TAG_START && string_view_equals(&html_parser->tag_name, "entry")) {
subscription_data.subscription_entry.push_back({});
subscription_data.inside_entry = true;
- return;
+ return 0;
} else if(parse_type == HTML_PARSE_TAG_END && string_view_equals(&html_parser->tag_name, "entry")) {
subscription_data.inside_entry = false;
- return;
+ return 0;
}
if(!subscription_data.inside_entry)
- return;
+ return 0;
if(string_view_equals(&html_parser->tag_name, "title") && parse_type == HTML_PARSE_TAG_END) {
subscription_data.subscription_entry.back().title.assign(html_parser->text_stripped.data, html_parser->text_stripped.size);
@@ -1638,6 +1638,8 @@ R"END(
std::string published_str(html_parser->text_stripped.data, html_parser->text_stripped.size);
subscription_data.subscription_entry.back().published = iso_utc_to_unix_time(published_str.c_str());
}
+
+ return 0;
}, &subscription_data);
std::vector<YoutubeSubscriptionTaskResult> results;