aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/NyaaSi.cpp
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2020-10-03 08:58:11 +0200
committerdec05eba <dec05eba@protonmail.com>2020-10-03 08:58:11 +0200
commitc470174b397c7a81b7510f191e404cb895d462af (patch)
treeb0c41393b887cb598e9f5e89c35002aafde79b96 /src/plugins/NyaaSi.cpp
parentacaa77f837af3ea8503888910d7888efd3c6cb3e (diff)
Nyaa.si: parse html instead of rss, allows for pagination
Diffstat (limited to 'src/plugins/NyaaSi.cpp')
-rw-r--r--src/plugins/NyaaSi.cpp208
1 files changed, 140 insertions, 68 deletions
diff --git a/src/plugins/NyaaSi.cpp b/src/plugins/NyaaSi.cpp
index 8e13789..98c6eee 100644
--- a/src/plugins/NyaaSi.cpp
+++ b/src/plugins/NyaaSi.cpp
@@ -3,19 +3,30 @@
#include <quickmedia/HtmlSearch.h>
namespace QuickMedia {
- // Returns empty string on error
- static std::string get_rss_item_text(const std::string &data, size_t start, size_t end, const std::string &tag_start, const std::string &tag_end) {
- size_t item_begin = data.find(tag_start, start);
- if(item_begin == std::string::npos || item_begin >= end)
- return "";
-
- size_t item_end = data.find(tag_end, item_begin + tag_start.size());
- if(item_end == std::string::npos || item_end >= end)
- return "";
-
- std::string result = data.substr(item_begin + tag_start.size(), item_end - (item_begin + tag_start.size()));
- html_unescape_sequences(result);
- return strip(result);
+ // Return end of td tag, or std::string::npos
+ static size_t find_td_get_value(const std::string &str, size_t start_index, size_t end_index, std::string &result) {
+ size_t td_begin = str.find("<td", start_index);
+ if(td_begin == std::string::npos || td_begin >= end_index)
+ return std::string::npos;
+
+ size_t td_end = str.find("</td>", td_begin + 3);
+ if(td_end == std::string::npos || td_end >= end_index)
+ return std::string::npos;
+
+ size_t value_begin = str.find('>', td_begin + 3);
+ if(value_begin == std::string::npos || value_begin >= td_end)
+ return std::string::npos;
+
+ result = str.substr(value_begin + 1, td_end - (value_begin + 1));
+ return td_end + 5;
+ }
+
+ static bool is_only_numbers(const char *str, size_t size) {
+ for(size_t i = 0; i < size; ++i) {
+ if(str[i] < '0' || str[i] > '9')
+ return false;
+ }
+ return true;
}
NyaaSi::NyaaSi() : Plugin("nyaa.si") {
@@ -60,77 +71,138 @@ namespace QuickMedia {
return PluginResult::OK;
}
+ // TODO: Also show the number of comments for each torrent. TODO: Optimize?
+ // TODO: Show each field as seperate columns instead of seperating by |
SearchResult NyaaSi::content_list_search(const std::string &list_url, const std::string &text, BodyItems &result_items) {
- std::string full_url = "https://nyaa.si/?page=rss&c=" + list_url + "&f=0&p=1&q=";
+ std::string full_url = "https://nyaa.si/?c=" + list_url + "&f=0&p=1&q=";
full_url += url_param_encode(text);
std::string website_data;
- if(download_to_string(full_url, website_data, {}, use_tor) != DownloadResult::OK)
+ if(download_to_string(full_url, website_data, {}, use_tor, true) != DownloadResult::OK)
return SearchResult::NET_ERR;
- const std::string title_tag_begin = "<title>";
- const std::string title_tag_end = "</title>";
- const std::string link_tag_begin = "<guid isPermaLink=\"true\">";
- const std::string link_tag_end = "</guid>";
- const std::string pub_date_tag_begin = "<pubDate>";
- const std::string pub_date_tag_end = "</pubDate>";
- const std::string seeders_tag_begin = "<nyaa:seeders>";
- const std::string seeders_tag_end = "</nyaa:seeders>";
- const std::string leechers_tag_begin = "<nyaa:leechers>";
- const std::string leechers_tag_end = "</nyaa:leechers>";
- const std::string downloads_tag_begin = "<nyaa:downloads>";
- const std::string downloads_tag_end = "</nyaa:downloads>";
- const std::string category_id_tag_begin = "<nyaa:categoryId>";
- const std::string category_id_tag_end = "</nyaa:categoryId>";
- const std::string size_tag_begin = "<nyaa:size>";
- const std::string size_tag_end = "</nyaa:size>";
- const std::string comments_tag_begin = "<nyaa:comments>";
- const std::string comments_tag_end = "</nyaa:comments>";
- const std::string trusted_tag_begin = "<nyaa:trusted>";
- const std::string trusted_tag_end = "</nyaa:trusted>";
- const std::string remake_tag_begin = "<nyaa:remake>";
- const std::string remake_tag_end = "</nyaa:remake>";
-
- size_t index = 0;
- while(index < website_data.size()) {
- size_t item_start = website_data.find("<item>", index);
- if(item_start == std::string::npos)
+ size_t tbody_begin = website_data.find("<tbody>");
+ if(tbody_begin == std::string::npos)
+ return SearchResult::ERR;
+
+ size_t tbody_end = website_data.find("</tbody>", tbody_begin + 7);
+ if(tbody_end == std::string::npos)
+ return SearchResult::ERR;
+
+ size_t index = tbody_begin + 7;
+ while(index < tbody_end) {
+ size_t tr_begin = website_data.find("<tr", index);
+ if(tr_begin == std::string::npos || tr_begin >= tbody_end)
break;
- index = item_start + 6;
+ size_t tr_end = website_data.find("</tr>", tr_begin + 3);
+ if(tr_end == std::string::npos || tr_end >= tbody_end)
+ return SearchResult::ERR;
+
+ index = tr_begin + 3;
+
+ bool is_trusted = false;
+ bool is_remake = false;
+ size_t tr_class_begin = website_data.find("class=\"", index);
+ if(tr_class_begin != std::string::npos && tr_class_begin < tr_end) {
+ size_t tr_class_end = website_data.find('"', tr_class_begin + 7);
+ size_t class_length = tr_class_end - (tr_class_begin + 7);
+ if(strncmp(website_data.c_str() + tr_class_begin + 7, "success", class_length) == 0)
+ is_trusted = true;
+ else if(strncmp(website_data.c_str() + tr_class_begin + 7, "danger", class_length) == 0)
+ is_remake = true;
+ index = tr_class_end + 1;
+ }
+
+ size_t category_begin = website_data.find("/?c=", index);
+ if(category_begin == std::string::npos || category_begin >= tr_end)
+ return SearchResult::ERR;
+
+ size_t category_end = website_data.find('"', category_begin + 4);
+ if(category_end == std::string::npos || category_end >= tr_end)
+ return SearchResult::ERR;
- size_t item_end = website_data.find("</item>", index);
- if(item_end == std::string::npos)
+ index = category_end + 1;
+ size_t view_begin = website_data.find("/view/", index);
+ if(view_begin == std::string::npos || view_begin >= tr_end)
return SearchResult::ERR;
- std::string title = get_rss_item_text(website_data, index, item_end, title_tag_begin, title_tag_end);
- std::string link = get_rss_item_text(website_data, index, item_end, link_tag_begin, link_tag_end);
- std::string pub_date = get_rss_item_text(website_data, index, item_end, pub_date_tag_begin, pub_date_tag_end);
- std::string seeders = get_rss_item_text(website_data, index, item_end, seeders_tag_begin, seeders_tag_end);
- std::string leechers = get_rss_item_text(website_data, index, item_end, leechers_tag_begin, leechers_tag_end);
- std::string downloads = get_rss_item_text(website_data, index, item_end, downloads_tag_begin, downloads_tag_end);
- std::string category_id = get_rss_item_text(website_data, index, item_end, category_id_tag_begin, category_id_tag_end);
- std::string size = get_rss_item_text(website_data, index, item_end, size_tag_begin, size_tag_end);
- std::string comments = get_rss_item_text(website_data, index, item_end, comments_tag_begin, comments_tag_end);
- std::string trusted = get_rss_item_text(website_data, index, item_end, trusted_tag_begin, trusted_tag_end);
- std::string remake = get_rss_item_text(website_data, index, item_end, remake_tag_begin, remake_tag_end);
-
- if(title.empty() || link.empty() || pub_date.empty() || seeders.empty() || leechers.empty() || downloads.empty() || category_id.empty() || size.empty()) {
- fprintf(stderr, "Error: failed to parse nyaa.si rss items\n");
+ size_t view_end = website_data.find('"', view_begin + 6);
+ if(view_end == std::string::npos || view_end >= tr_end)
return SearchResult::ERR;
+
+ std::string view_url = website_data.substr(view_begin, view_end - view_begin);
+
+ // Torrents with comments have two /view/, one for comments and one for the title
+ if(!is_only_numbers(website_data.c_str() + view_begin + 6, view_end - (view_begin + 6))) {
+ size_t view_begin2 = website_data.find("/view/", view_end + 1);
+ if(view_begin2 == std::string::npos || view_begin2 >= tr_end)
+ return SearchResult::ERR;
+
+ size_t view_end2 = website_data.find('"', view_begin2 + 6);
+ if(view_end2 == std::string::npos || view_end2 >= tr_end)
+ return SearchResult::ERR;
+
+ view_end = view_end2;
}
+ size_t title_begin = website_data.find('>', view_end + 1);
+ if(title_begin == std::string::npos || title_begin >= tr_end)
+ return SearchResult::ERR;
+ size_t title_end = website_data.find("</a>", title_begin + 1);
+ if(title_end == std::string::npos || title_end >= tr_end)
+ return SearchResult::ERR;
+ std::string title = website_data.substr(title_begin + 1, title_end - (title_begin + 1));
+ html_unescape_sequences(title);
+ title = strip(title);
+
+ index = title_end + 4;
+ size_t magnet_begin = website_data.find("magnet:?xt", index);
+ if(magnet_begin == std::string::npos || magnet_begin >= tr_end)
+ return SearchResult::ERR;
+
+ size_t magnet_end = website_data.find('"', magnet_begin + 10);
+ if(magnet_end == std::string::npos || magnet_end >= tr_end)
+ return SearchResult::ERR;
+
+ index = magnet_end + 1;
+ std::string size;
+ index = find_td_get_value(website_data, index, tr_end, size);
+ if(index == std::string::npos)
+ return SearchResult::ERR;
+
+ std::string timestamp;
+ index = find_td_get_value(website_data, index, tr_end, timestamp);
+ if(index == std::string::npos)
+ return SearchResult::ERR;
+
+ std::string seeders;
+ index = find_td_get_value(website_data, index, tr_end, seeders);
+ if(index == std::string::npos)
+ return SearchResult::ERR;
+
+ std::string leechers;
+ index = find_td_get_value(website_data, index, tr_end, leechers);
+ if(index == std::string::npos)
+ return SearchResult::ERR;
+
+ std::string completed;
+ index = find_td_get_value(website_data, index, tr_end, completed);
+ if(index == std::string::npos)
+ return SearchResult::ERR;
+
+ index = tr_end + 5;
+
+ std::string description = "Size: " + size + " | Published: " + timestamp + " | Seeders: " + seeders + " | Leechers: " + leechers + " | Completed: " + completed;
auto body_item = BodyItem::create(std::move(title));
- body_item->url = std::move(link);
- body_item->thumbnail_url = "https://nyaa.si/static/img/icons/nyaa/" + category_id + ".png";
- body_item->set_description("Published: " + pub_date + "\nSeeders: " + seeders + "\nLeechers: " + leechers + "\nDownloads: " + downloads + "\nSize: " + size + "\nComments: " + comments);
- if(trusted == "Yes")
- body_item->title_color = sf::Color(43, 200, 47);
- else if(remake == "Yes")
- body_item->title_color = sf::Color(200, 45, 47);
+ body_item->thumbnail_url = "https://nyaa.si/static/img/icons/nyaa/" + website_data.substr(category_begin + 4, category_end - (category_begin + 4)) + ".png";
+ body_item->set_description(std::move(description));
+ body_item->url = "https://nyaa.si" + std::move(view_url);
+ if(is_trusted)
+ body_item->title_color = sf::Color(43, 255, 47);
+ else if(is_remake)
+ body_item->title_color = sf::Color(255, 45, 47);
result_items.push_back(std::move(body_item));
-
- index = item_end + 7;
}
return SearchResult::OK;