aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/Pornhub.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/Pornhub.cpp')
-rw-r--r--src/plugins/Pornhub.cpp211
1 files changed, 112 insertions, 99 deletions
diff --git a/src/plugins/Pornhub.cpp b/src/plugins/Pornhub.cpp
index e8df9d7..b063a32 100644
--- a/src/plugins/Pornhub.cpp
+++ b/src/plugins/Pornhub.cpp
@@ -1,7 +1,9 @@
#include "../../plugins/Pornhub.hpp"
#include "../../include/StringUtils.hpp"
#include "../../include/NetUtils.hpp"
-#include <quickmedia/HtmlSearch.h>
+extern "C" {
+#include <HtmlParser.h>
+}
#include <string.h>
namespace QuickMedia {
@@ -9,64 +11,124 @@ namespace QuickMedia {
return strncmp(str, begin_with, strlen(begin_with)) == 0;
}
- static bool contains(const char *str, const char *substr) {
- return strstr(str, substr);
+ // TODO: Optimize by using HtmlStringView instead of std::string
+ struct HtmlElement {
+ std::string tag_name;
+ std::map<std::string, std::string> attributes;
+ std::vector<HtmlElement*> children;
+ HtmlElement *parent = nullptr; // ref
+ };
+
+ static void html_cleanup(HtmlElement *html_element_root) {
+ for(HtmlElement *child_html_element : html_element_root->children) {
+ html_cleanup(child_html_element);
+ }
+ delete html_element_root;
}
- SearchResult PornhubSearchPage::search(const std::string &str, BodyItems &result_items) {
- std::string url = "https://www.pornhub.com/video/search?search=";
- url += url_param_encode(str);
+ static const std::string& html_get_attribute_or(HtmlElement *html_element, const std::string &attr_key, const std::string &default_value) {
+ auto it = html_element->attributes.find(attr_key);
+ if(it != html_element->attributes.end())
+ return it->second;
+ else
+ return default_value;
+ }
+
+ struct HtmlParseUserdata {
+ HtmlElement *current_html_element;
+ };
+
+ static void html_page_callback(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata) {
+ HtmlParseUserdata *parse_userdata = (HtmlParseUserdata*)userdata;
+ if(parse_type == HTML_PARSE_TAG_START) {
+ auto new_html_element = new HtmlElement();
+ new_html_element->tag_name.assign(html_parser->tag_name.data, html_parser->tag_name.size);
+ new_html_element->parent = parse_userdata->current_html_element;
+
+ parse_userdata->current_html_element->children.push_back(new_html_element);
+ parse_userdata->current_html_element = new_html_element;
+ } else if(parse_type == HTML_PARSE_TAG_END) {
+ if(parse_userdata->current_html_element->parent)
+ parse_userdata->current_html_element = parse_userdata->current_html_element->parent;
+ } else if(parse_type == HTML_PARSE_ATTRIBUTE) {
+ std::string attr_key(html_parser->attribute_key.data, html_parser->attribute_key.size);
+ std::string attr_value(html_parser->attribute_value.data, html_parser->attribute_value.size);
+ parse_userdata->current_html_element->attributes.insert(std::make_pair(std::move(attr_key), std::move(attr_value)));
+ }
+ }
+
+ static HtmlElement* html_parse(char *source, size_t size) {
+ HtmlElement *html_element_root = new HtmlElement();
+ HtmlParseUserdata parse_userdata;
+ parse_userdata.current_html_element = html_element_root;
+ HtmlParser html_parser;
+ html_parser_init(&html_parser, source, size, html_page_callback, &parse_userdata);
+ html_parser_parse(&html_parser);
+ html_parser_deinit(&html_parser);
+ return html_element_root;
+ }
+
+ using HtmlFindTagsCallback = std::function<void(HtmlElement *html_element)>;
+ static void html_find_tags_with_class(HtmlElement *html_element, const std::string &tag_name, const std::string &class_value, const HtmlFindTagsCallback &callback) {
+ if(html_element->tag_name == tag_name) {
+ if(html_get_attribute_or(html_element, "class", "") == class_value)
+ callback(html_element);
+ }
+ for(HtmlElement *child_html_element : html_element->children) {
+ html_find_tags_with_class(child_html_element, tag_name, class_value, callback);
+ }
+ }
+
+ static void html_find_tags(HtmlElement *html_element, const std::string &tag_name, const HtmlFindTagsCallback &callback) {
+ if(html_element->tag_name == tag_name)
+ callback(html_element);
+ for(HtmlElement *child_html_element : html_element->children) {
+ html_find_tags(child_html_element, tag_name, callback);
+ }
+ }
+ static SearchResult get_videos_in_page(const std::string &url, bool use_tor, BodyItems &result_items) {
std::string website_data;
- if(download_to_string(url, website_data, {}, is_tor_enabled()) != DownloadResult::OK)
+ if(download_to_string(url, website_data, {}, use_tor) != DownloadResult::OK)
return SearchResult::NET_ERR;
- struct ItemData {
- BodyItems *result_items;
- size_t index;
- };
- ItemData item_data = { &result_items, 0 };
-
- QuickMediaHtmlSearch html_search;
- int result = quickmedia_html_search_init(&html_search, website_data.c_str());
- if(result != 0)
- goto cleanup;
-
- result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='phimage']//a",
- [](QuickMediaHtmlNode *node, void *userdata) {
- auto *result_items = (BodyItems*)userdata;
- const char *href = quickmedia_html_node_get_attribute_value(node, "href");
- const char *title = quickmedia_html_node_get_attribute_value(node, "title");
- if(href && title && begins_with(href, "/view_video.php?viewkey")) {
- auto item = BodyItem::create(strip(title));
- item->url = std::string("https://www.pornhub.com") + href;
- result_items->push_back(std::move(item));
- }
- }, &result_items);
- if(result != 0)
- goto cleanup;
-
- result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='phimage']//img",
- [](QuickMediaHtmlNode *node, void *userdata) {
- ItemData *item_data = (ItemData*)userdata;
- if(item_data->index >= item_data->result_items->size())
- return;
-
- const char *data_src = quickmedia_html_node_get_attribute_value(node, "data-src");
- if(data_src && contains(data_src, "phncdn.com/videos")) {
- (*item_data->result_items)[item_data->index]->thumbnail_url = data_src;
- ++item_data->index;
- }
- }, &item_data);
+ HtmlElement *html_root = html_parse(website_data.data(), website_data.size());
+ html_find_tags_with_class(html_root, "div", "phimage", [&result_items](HtmlElement *html_element) {
+ auto it = html_element->attributes.find("data-entrycode");
+ if(it == html_element->attributes.end() || it->second != "VidPg-premVid-videoPage") {
+ html_find_tags(html_element, "a", [&result_items](HtmlElement *html_element) {
+ const std::string &href = html_get_attribute_or(html_element, "href", "");
+ const std::string &title = html_get_attribute_or(html_element, "title", "");
+ if(!href.empty() && !title.empty() && begins_with(href.c_str(), "/view_video.php?viewkey")) {
+ std::string title_fixed = strip(title);
+ html_unescape_sequences(title_fixed);
+ auto item = BodyItem::create(std::move(title_fixed));
+ item->url = std::string("https://www.pornhub.com") + href;
+ item->thumbnail_size = sf::Vector2i(192, 108);
+ result_items.push_back(std::move(item));
+
+ html_find_tags(html_element, "img", [&result_items](HtmlElement *html_element) {
+ const std::string &src = html_get_attribute_or(html_element, "data-src", "");
+ if(src.find("phncdn.com/videos") != std::string::npos)
+ result_items.back()->thumbnail_url = src;
+ });
+ }
+ });
+ }
+ });
+ html_cleanup(html_root);
// Attempt to skip promoted videos (that are not related to the search term)
- if(result_items.size() >= 4) {
+ if(result_items.size() >= 4)
result_items.erase(result_items.begin(), result_items.begin() + 4);
- }
- cleanup:
- quickmedia_html_search_deinit(&html_search);
- return result == 0 ? SearchResult::OK : SearchResult::ERR;
+ return SearchResult::OK;
+ }
+
+ SearchResult PornhubSearchPage::search(const std::string &str, BodyItems &result_items) {
+ std::string url = "https://www.pornhub.com/video/search?search=";
+ url += url_param_encode(str);
+ return get_videos_in_page(url, is_tor_enabled(), result_items);
}
PluginResult PornhubSearchPage::submit(const std::string &title, const std::string &url, std::vector<Tab> &result_tabs) {
@@ -78,56 +140,7 @@ namespace QuickMedia {
BodyItems PornhubVideoPage::get_related_media(const std::string &url) {
BodyItems result_items;
-
- std::string website_data;
- if(download_to_string(url, website_data, {}, is_tor_enabled()) != DownloadResult::OK)
- return result_items;
-
- struct ItemData {
- BodyItems *result_items;
- size_t index;
- };
- ItemData item_data = { &result_items, 0 };
-
- QuickMediaHtmlSearch html_search;
- int result = quickmedia_html_search_init(&html_search, website_data.c_str());
- if(result != 0)
- goto cleanup;
-
- result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='phimage']//a",
- [](QuickMediaHtmlNode *node, void *userdata) {
- auto *result_items = (BodyItems*)userdata;
- const char *href = quickmedia_html_node_get_attribute_value(node, "href");
- const char *title = quickmedia_html_node_get_attribute_value(node, "title");
- if(href && title && begins_with(href, "/view_video.php?viewkey")) {
- auto item = BodyItem::create(strip(title));
- item->url = std::string("https://www.pornhub.com") + href;
- result_items->push_back(std::move(item));
- }
- }, &result_items);
- if(result != 0)
- goto cleanup;
-
- result = quickmedia_html_find_nodes_xpath(&html_search, "//div[class='phimage']//img",
- [](QuickMediaHtmlNode *node, void *userdata) {
- ItemData *item_data = (ItemData*)userdata;
- if(item_data->index >= item_data->result_items->size())
- return;
-
- const char *src = quickmedia_html_node_get_attribute_value(node, "src");
- if(src && contains(src, "phncdn.com/videos")) {
- (*item_data->result_items)[item_data->index]->thumbnail_url = src;
- ++item_data->index;
- }
- }, &item_data);
-
- // Attempt to skip promoted videos (that are not related to the search term)
- if(result_items.size() >= 4) {
- result_items.erase(result_items.begin(), result_items.begin() + 4);
- }
-
- cleanup:
- quickmedia_html_search_deinit(&html_search);
+ get_videos_in_page(url, is_tor_enabled(), result_items);
return result_items;
}
} \ No newline at end of file