aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/Pornhub.cpp
blob: b4e908a2dcb83865090cb19c9d4b63def0be6555 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#include "../../plugins/Pornhub.hpp"
#include "../../include/StringUtils.hpp"
#include "../../include/NetUtils.hpp"
extern "C" {
#include <HtmlParser.h>
}
#include <string.h>

namespace {
    // TODO: Optimize by using HtmlStringView instead of std::string
    struct HtmlElement {
        std::string tag_name;
        std::map<std::string, std::string> attributes;
        std::vector<HtmlElement*> children;
        HtmlElement *parent = nullptr; // ref
    };

    struct HtmlParseUserdata {
        HtmlElement *current_html_element;
    };
}

namespace QuickMedia {
    static bool begins_with(const char *str, const char *begin_with) {
        return strncmp(str, begin_with, strlen(begin_with)) == 0;
    }

    static void html_cleanup(HtmlElement *html_element_root) {
        for(HtmlElement *child_html_element : html_element_root->children) {
            html_cleanup(child_html_element);
        }
        delete html_element_root;
    }

    static const std::string& html_get_attribute_or(HtmlElement *html_element, const std::string &attr_key, const std::string &default_value) {
        auto it = html_element->attributes.find(attr_key);
        if(it != html_element->attributes.end())
            return it->second;
        else
            return default_value;
    }

    static void html_page_callback(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata) {
        HtmlParseUserdata *parse_userdata = (HtmlParseUserdata*)userdata;
        if(parse_type == HTML_PARSE_TAG_START) {
            auto new_html_element = new HtmlElement();
            new_html_element->tag_name.assign(html_parser->tag_name.data, html_parser->tag_name.size);
            new_html_element->parent = parse_userdata->current_html_element;

            parse_userdata->current_html_element->children.push_back(new_html_element);
            parse_userdata->current_html_element = new_html_element;
        } else if(parse_type == HTML_PARSE_TAG_END) {
            if(parse_userdata->current_html_element->parent)
                parse_userdata->current_html_element = parse_userdata->current_html_element->parent;
        } else if(parse_type == HTML_PARSE_ATTRIBUTE) {
            std::string attr_key(html_parser->attribute_key.data, html_parser->attribute_key.size);
            std::string attr_value(html_parser->attribute_value.data, html_parser->attribute_value.size);
            parse_userdata->current_html_element->attributes.insert(std::make_pair(std::move(attr_key), std::move(attr_value)));
        }
    }

    static HtmlElement* html_parse(char *source, size_t size) {
        HtmlElement *html_element_root = new HtmlElement();
        HtmlParseUserdata parse_userdata;
        parse_userdata.current_html_element = html_element_root;
        HtmlParser html_parser;
        html_parser_init(&html_parser, source, size, html_page_callback, &parse_userdata);
        html_parser_parse(&html_parser);
        html_parser_deinit(&html_parser);
        return html_element_root;
    }

    using HtmlFindTagsCallback = std::function<void(HtmlElement *html_element)>;
    static void html_find_tags_with_class(HtmlElement *html_element, const std::string &tag_name, const std::string &class_value, const HtmlFindTagsCallback &callback) {
        if(html_element->tag_name == tag_name) {
            if(html_get_attribute_or(html_element, "class", "") == class_value)
                callback(html_element);
        }
        for(HtmlElement *child_html_element : html_element->children) {
            html_find_tags_with_class(child_html_element, tag_name, class_value, callback);
        }
    }

    static void html_find_tags(HtmlElement *html_element, const std::string &tag_name, const HtmlFindTagsCallback &callback) {
        if(html_element->tag_name == tag_name)
            callback(html_element);
        for(HtmlElement *child_html_element : html_element->children) {
            html_find_tags(child_html_element, tag_name, callback);
        }
    }

    static SearchResult get_videos_in_page(const std::string &url, bool use_tor, BodyItems &result_items) {
        std::string website_data;
        if(download_to_string(url, website_data, {}, use_tor) != DownloadResult::OK)
            return SearchResult::NET_ERR;

        HtmlElement *html_root = html_parse(website_data.data(), website_data.size());
        html_find_tags_with_class(html_root, "div", "phimage", [&result_items](HtmlElement *html_element) {
            auto it = html_element->attributes.find("data-entrycode");
            if(it == html_element->attributes.end() || it->second != "VidPg-premVid-videoPage") {
                html_find_tags(html_element, "a", [&result_items](HtmlElement *html_element) {
                    const std::string &href = html_get_attribute_or(html_element, "href", "");
                    const std::string &title = html_get_attribute_or(html_element, "title", "");
                    if(!href.empty() && !title.empty() && begins_with(href.c_str(), "/view_video.php?viewkey")) {
                        std::string title_fixed = strip(title);
                        html_unescape_sequences(title_fixed);
                        auto item = BodyItem::create(std::move(title_fixed));
                        item->url = std::string("https://www.pornhub.com") + href;
                        item->thumbnail_size = sf::Vector2i(192, 108);
                        result_items.push_back(std::move(item));

                        html_find_tags(html_element, "img", [&result_items](HtmlElement *html_element) {
                            const std::string &src = html_get_attribute_or(html_element, "data-src", "");
                            if(src.find("phncdn.com/videos") != std::string::npos)
                                result_items.back()->thumbnail_url = src;
                        });
                    }
                });
            }
        });
        html_cleanup(html_root);

        // Attempt to skip promoted videos (that are not related to the search term)
        if(result_items.size() >= 4)
            result_items.erase(result_items.begin(), result_items.begin() + 4);

        return SearchResult::OK;
    }

    SearchResult PornhubSearchPage::search(const std::string &str, BodyItems &result_items) {
        std::string url = "https://www.pornhub.com/video/search?search=";
        url += url_param_encode(str);
        return get_videos_in_page(url, is_tor_enabled(), result_items);
    }

    PluginResult PornhubSearchPage::get_page(const std::string &str, int page, BodyItems &result_items) {
        std::string url = "https://www.pornhub.com/video/search?search=";
        url += url_param_encode(str);
        url += "&page=" + std::to_string(1 + page);
        return search_result_to_plugin_result(get_videos_in_page(url, is_tor_enabled(), result_items));
    }

    PluginResult PornhubSearchPage::submit(const std::string&, const std::string&, std::vector<Tab> &result_tabs) {
        result_tabs.push_back(Tab{nullptr, std::make_unique<PornhubVideoPage>(program), nullptr});
        return PluginResult::OK;
    }

    PluginResult PornhubRelatedVideosPage::submit(const std::string&, const std::string&, std::vector<Tab> &result_tabs) {
        result_tabs.push_back(Tab{nullptr, std::make_unique<PornhubVideoPage>(program), nullptr});
        return PluginResult::OK;
    }

    BodyItems PornhubVideoPage::get_related_media(const std::string &url, std::string&) {
        BodyItems result_items;
        get_videos_in_page(url, is_tor_enabled(), result_items);
        return result_items;
    }

    std::unique_ptr<Page> PornhubVideoPage::create_search_page(Program *program, int &search_delay) {
        search_delay = 500;
        return std::make_unique<PornhubSearchPage>(program);
    }

    std::unique_ptr<RelatedVideosPage> PornhubVideoPage::create_related_videos_page(Program *program, const std::string&, const std::string&) {
        return std::make_unique<PornhubRelatedVideosPage>(program);
    }
}