#include "../../plugins/Pornhub.hpp" #include "../../include/StringUtils.hpp" #include "../../include/NetUtils.hpp" extern "C" { #include } #include namespace QuickMedia { static bool begins_with(const char *str, const char *begin_with) { return strncmp(str, begin_with, strlen(begin_with)) == 0; } // TODO: Optimize by using HtmlStringView instead of std::string struct HtmlElement { std::string tag_name; std::map attributes; std::vector children; HtmlElement *parent = nullptr; // ref }; static void html_cleanup(HtmlElement *html_element_root) { for(HtmlElement *child_html_element : html_element_root->children) { html_cleanup(child_html_element); } delete html_element_root; } static const std::string& html_get_attribute_or(HtmlElement *html_element, const std::string &attr_key, const std::string &default_value) { auto it = html_element->attributes.find(attr_key); if(it != html_element->attributes.end()) return it->second; else return default_value; } struct HtmlParseUserdata { HtmlElement *current_html_element; }; static void html_page_callback(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata) { HtmlParseUserdata *parse_userdata = (HtmlParseUserdata*)userdata; if(parse_type == HTML_PARSE_TAG_START) { auto new_html_element = new HtmlElement(); new_html_element->tag_name.assign(html_parser->tag_name.data, html_parser->tag_name.size); new_html_element->parent = parse_userdata->current_html_element; parse_userdata->current_html_element->children.push_back(new_html_element); parse_userdata->current_html_element = new_html_element; } else if(parse_type == HTML_PARSE_TAG_END) { if(parse_userdata->current_html_element->parent) parse_userdata->current_html_element = parse_userdata->current_html_element->parent; } else if(parse_type == HTML_PARSE_ATTRIBUTE) { std::string attr_key(html_parser->attribute_key.data, html_parser->attribute_key.size); std::string attr_value(html_parser->attribute_value.data, html_parser->attribute_value.size); parse_userdata->current_html_element->attributes.insert(std::make_pair(std::move(attr_key), std::move(attr_value))); } } static HtmlElement* html_parse(char *source, size_t size) { HtmlElement *html_element_root = new HtmlElement(); HtmlParseUserdata parse_userdata; parse_userdata.current_html_element = html_element_root; HtmlParser html_parser; html_parser_init(&html_parser, source, size, html_page_callback, &parse_userdata); html_parser_parse(&html_parser); html_parser_deinit(&html_parser); return html_element_root; } using HtmlFindTagsCallback = std::function; static void html_find_tags_with_class(HtmlElement *html_element, const std::string &tag_name, const std::string &class_value, const HtmlFindTagsCallback &callback) { if(html_element->tag_name == tag_name) { if(html_get_attribute_or(html_element, "class", "") == class_value) callback(html_element); } for(HtmlElement *child_html_element : html_element->children) { html_find_tags_with_class(child_html_element, tag_name, class_value, callback); } } static void html_find_tags(HtmlElement *html_element, const std::string &tag_name, const HtmlFindTagsCallback &callback) { if(html_element->tag_name == tag_name) callback(html_element); for(HtmlElement *child_html_element : html_element->children) { html_find_tags(child_html_element, tag_name, callback); } } static SearchResult get_videos_in_page(const std::string &url, bool use_tor, BodyItems &result_items) { std::string website_data; if(download_to_string(url, website_data, {}, use_tor) != DownloadResult::OK) return SearchResult::NET_ERR; HtmlElement *html_root = html_parse(website_data.data(), website_data.size()); html_find_tags_with_class(html_root, "div", "phimage", [&result_items](HtmlElement *html_element) { auto it = html_element->attributes.find("data-entrycode"); if(it == html_element->attributes.end() || it->second != "VidPg-premVid-videoPage") { html_find_tags(html_element, "a", [&result_items](HtmlElement *html_element) { const std::string &href = html_get_attribute_or(html_element, "href", ""); const std::string &title = html_get_attribute_or(html_element, "title", ""); if(!href.empty() && !title.empty() && begins_with(href.c_str(), "/view_video.php?viewkey")) { std::string title_fixed = strip(title); html_unescape_sequences(title_fixed); auto item = BodyItem::create(std::move(title_fixed)); item->url = std::string("https://www.pornhub.com") + href; item->thumbnail_size = sf::Vector2i(192, 108); result_items.push_back(std::move(item)); html_find_tags(html_element, "img", [&result_items](HtmlElement *html_element) { const std::string &src = html_get_attribute_or(html_element, "data-src", ""); if(src.find("phncdn.com/videos") != std::string::npos) result_items.back()->thumbnail_url = src; }); } }); } }); html_cleanup(html_root); // Attempt to skip promoted videos (that are not related to the search term) if(result_items.size() >= 4) result_items.erase(result_items.begin(), result_items.begin() + 4); return SearchResult::OK; } SearchResult PornhubSearchPage::search(const std::string &str, BodyItems &result_items) { std::string url = "https://www.pornhub.com/video/search?search="; url += url_param_encode(str); return get_videos_in_page(url, is_tor_enabled(), result_items); } PluginResult PornhubSearchPage::submit(const std::string &title, const std::string &url, std::vector &result_tabs) { (void)title; (void)url; result_tabs.push_back(Tab{create_body(), std::make_unique(program), nullptr}); return PluginResult::OK; } BodyItems PornhubVideoPage::get_related_media(const std::string &url) { BodyItems result_items; get_videos_in_page(url, is_tor_enabled(), result_items); return result_items; } }