#include "../../plugins/Fourchan.hpp" #include "../../include/DataView.hpp" #include "../../include/Storage.hpp" #include "../../include/StringUtils.hpp" #include "../../include/NetUtils.hpp" #include "../../include/Notification.hpp" #include "../../external/cppcodec/base64_rfc4648.hpp" #include "../../include/QuickMedia.hpp" #include #include #include // API documentation: https://github.com/4chan/4chan-API static const std::string fourchan_url = "https://a.4cdn.org/"; static const std::string fourchan_image_url = "https://i.4cdn.org/"; static const char *SERVICE_NAME = "4chan"; namespace QuickMedia { // Returns empty string on failure to read cookie static std::string get_pass_id_from_cookies_file(const Path &cookies_filepath) { std::string file_content; if(file_get_content(cookies_filepath, file_content) != 0) return ""; size_t pass_id_index = file_content.find("pass_id"); if(pass_id_index == std::string::npos) return ""; pass_id_index += 7; size_t line_end = file_content.find('\n', pass_id_index); if(line_end == std::string::npos) line_end = file_content.size(); return strip(file_content.substr(pass_id_index, line_end - pass_id_index)); } static bool is_logged_in() { Path cookies_filepath; if(get_cookies_filepath(cookies_filepath, SERVICE_NAME) != 0) { fprintf(stderr, "Failed to get 4chan cookies filepath\n"); return false; } std::vector additional_args = { CommandArg{"-c", cookies_filepath.data}, CommandArg{"-b", cookies_filepath.data} }; std::string website_data; if(download_to_string("https://sys.4chan.org/auth", website_data, additional_args) != DownloadResult::OK) { show_notification("QuickMedia", "Failed to check if you are logged in", Urgency::CRITICAL); return false; } // TODO: Check if this is correct return website_data.find("field-id") == std::string::npos; } static bool login(Page *page, const std::string &token, const std::string &pin, std::string &pass_id, std::string &response_msg) { response_msg.clear(); Path cookies_filepath; if(get_cookies_filepath(cookies_filepath, SERVICE_NAME) != 0) { fprintf(stderr, "Failed to get 4chan cookies filepath\n"); return false; } std::vector additional_args = { CommandArg{"--form-string", "id=" + token}, CommandArg{"--form-string", "pin=" + pin}, CommandArg{"--form-string", "xhr=1"}, CommandArg{"-c", cookies_filepath.data} }; Json::Value json_root; DownloadResult result = page->download_json(json_root, "https://sys.4chan.org/auth", std::move(additional_args), true); if(result != DownloadResult::OK) return false; if(!json_root.isObject()) return false; const Json::Value &status_json = json_root["status"]; if(!status_json.isNumeric()) return false; const Json::Value &message_json = json_root["message"]; if(message_json.isString()) response_msg = message_json.asString(); if(status_json.asInt64() == 1) { pass_id = get_pass_id_from_cookies_file(cookies_filepath); if(pass_id.empty()) return false; return true; } else { return false; } } struct CommentPiece { enum class Type { TEXT, QUOTE, // >, Set for span QUOTE_CONTINUE, // Set for span QUOTELINK, // >>POSTNO, Set for a DEADLINK, // Set for span CROSSBOARD_LINK, // Set for a CODEBLOCK // Set for pre }; std::string text; int64_t quote_postnumber = 0; // Set when type is QUOTELINK Type type; }; enum class NodeType { A, SPAN, PRE }; // Returns -1 if no match static NodeType tag_name_to_node_type(HtmlStringView str) { if(str.size == 1 && str.data[0] == 'a') { return NodeType::A; } else if(str.size == 4 && memcmp(str.data, "span", 4) == 0) { return NodeType::SPAN; } else if(str.size == 3 && memcmp(str.data, "pre", 3) == 0) { return NodeType::PRE; } else { return (NodeType)-1; } } struct HtmlNode { NodeType node_type; std::string klass; std::string href; int output_count = 0; }; using CommentPieceCallback = std::function; struct HtmlParseUserdata { CommentPieceCallback callback; std::stack html_node; }; static int html_parse_callback(HtmlParser *html_parser, HtmlParseType parse_type, void *userdata) { HtmlParseUserdata *parse_userdata = (HtmlParseUserdata*)userdata; switch(parse_type) { case HTML_PARSE_TAG_START: { if(html_parser->tag_name.size == 2 && memcmp(html_parser->tag_name.data, "br", 2) == 0) { CommentPiece comment_piece; comment_piece.type = CommentPiece::Type::TEXT; comment_piece.text = "\n"; parse_userdata->callback(comment_piece); } const NodeType node_type = tag_name_to_node_type(html_parser->tag_name); if(node_type != (NodeType)-1) parse_userdata->html_node.push({ node_type, "", "", 0 }); break; } case HTML_PARSE_TAG_END: { if(!parse_userdata->html_node.empty()) { const NodeType node_type = tag_name_to_node_type(html_parser->tag_name); if(node_type != (NodeType)-1) parse_userdata->html_node.pop(); } break; } case HTML_PARSE_ATTRIBUTE: { if(!parse_userdata->html_node.empty()) { HtmlNode &html_node = parse_userdata->html_node.top(); if(html_parser->attribute_key.size == 5 && memcmp(html_parser->attribute_key.data, "class", 5) == 0) { html_node.klass.assign(html_parser->attribute_value.data, html_parser->attribute_value.size); } else if(html_parser->attribute_key.size == 4 && memcmp(html_parser->attribute_key.data, "href", 4) == 0) { html_node.href.assign(html_parser->attribute_value.data, html_parser->attribute_value.size); } } break; } case HTML_PARSE_TEXT: { std::string text(html_parser->text.data, html_parser->text.size); html_unescape_sequences(text); CommentPiece comment_piece; comment_piece.type = CommentPiece::Type::TEXT; comment_piece.text = std::move(text); if(parse_userdata->html_node.empty()) { comment_piece.type = CommentPiece::Type::TEXT; } else { HtmlNode &html_node = parse_userdata->html_node.top(); switch(html_node.node_type) { case NodeType::A: { if(html_node.klass == "quotelink") { if(string_starts_with(html_node.href, "#p")) { comment_piece.type = CommentPiece::Type::QUOTELINK; comment_piece.quote_postnumber = strtoll(html_node.href.c_str() + 2, nullptr, 10); } else if(string_starts_with(html_node.href, "/")) { comment_piece.type = CommentPiece::Type::CROSSBOARD_LINK; } else { fprintf(stderr, "Unexpected href for quotelink: %s\n", html_node.href.c_str()); } } else { fprintf(stderr, "Unexpected class for a: %s\n", html_node.klass.c_str()); } break; } case NodeType::SPAN: { if(html_node.klass == "quote") { comment_piece.type = html_node.output_count ? CommentPiece::Type::QUOTE : CommentPiece::Type::QUOTE_CONTINUE; } else if(html_node.klass == "deadlink") { comment_piece.type = CommentPiece::Type::DEADLINK; } else { fprintf(stderr, "Unexpected class for span: %s\n", html_node.klass.c_str()); } break; } case NodeType::PRE: { if(html_node.klass == "prettyprint") { comment_piece.type = CommentPiece::Type::CODEBLOCK; } else { fprintf(stderr, "Unexpected class for pre: %s\n", html_node.klass.c_str()); } break; } } html_node.output_count++; } parse_userdata->callback(comment_piece); break; } case HTML_PARSE_JAVASCRIPT_CODE: { break; } } return 0; } static void extract_comment_pieces(const char *html_source, size_t size, CommentPieceCallback callback) { HtmlParseUserdata parse_userdata; parse_userdata.callback = std::move(callback); html_parser_parse(html_source, size, html_parse_callback, &parse_userdata); } static std::string html_to_text(const char *html_source, size_t size, std::unordered_map &comment_by_postno, BodyItems &result_items, size_t body_item_index) { std::string comment_text; extract_comment_pieces(html_source, size, [&comment_text, &comment_by_postno, &result_items, body_item_index](const CommentPiece &cp) { switch(cp.type) { case CommentPiece::Type::TEXT: comment_text += std::move(cp.text); break; case CommentPiece::Type::QUOTE: comment_text += std::move(cp.text); break; case CommentPiece::Type::QUOTE_CONTINUE: comment_text += std::move(cp.text); break; case CommentPiece::Type::QUOTELINK: { comment_text += std::move(cp.text); auto it = comment_by_postno.find(cp.quote_postnumber); if(it == comment_by_postno.end()) { // TODO: Link this quote to a 4chan archive that still has the quoted comment (if available) comment_text += " (Dead)"; } else { result_items[body_item_index]->replies_to.push_back(it->second); result_items[it->second]->replies.push_back(body_item_index); } break; } case CommentPiece::Type::DEADLINK: // TODO: Link this quote to a 4chan archive that still has the quoted comment (if available) comment_text += std::move(cp.text) + " (Dead)"; break; case CommentPiece::Type::CROSSBOARD_LINK: // TODO: Link this to another thread and allow navigating to it comment_text += std::move(cp.text) + " (Cross-thread)"; break; case CommentPiece::Type::CODEBLOCK: // TODO: Use a different colored background and use a monospace font comment_text += std::move(cp.text); break; } }); return comment_text; } PluginResult FourchanBoardsPage::submit(const SubmitArgs &args, std::vector &result_tabs) { result_tabs.push_back(Tab{create_body(false, true), std::make_unique(program, args.title, args.url, pass_id), create_search_bar("Search...", SEARCH_DELAY_FILTER)}); return PluginResult::OK; } void FourchanBoardsPage::get_boards(BodyItems &result_items) { std::string server_response; if(file_get_content(resources_root + "boards.json", server_response) != 0) { fprintf(stderr, "failed to read boards.json\n"); return; } Json::Value json_root; Json::CharReaderBuilder json_builder; std::unique_ptr json_reader(json_builder.newCharReader()); std::string json_errors; if(!json_reader->parse(server_response.data(), server_response.data() + server_response.size(), &json_root, &json_errors)) { fprintf(stderr, "4chan front page json error: %s\n", json_errors.c_str()); return; } if(!json_root.isObject()) return; const Json::Value &boards = json_root["boards"]; if(!boards.isArray()) return; for(const Json::Value &board : boards) { const Json::Value &board_id = board["board"]; // /g/, /a/, /b/ etc const Json::Value &board_title = board["title"]; const Json::Value &board_description = board["meta_description"]; if(board_id.isString() && board_title.isString() && board_description.isString()) { std::string board_description_str = board_description.asString(); html_unescape_sequences(board_description_str); auto body_item = BodyItem::create("/" + board_id.asString() + "/ " + board_title.asString()); body_item->url = board_id.asString(); result_items.push_back(std::move(body_item)); } } } PluginResult FourchanLoginPage::submit(const SubmitArgs &args, std::vector&) { if(args.url == "logout") { Path cookies_filepath; if(get_cookies_filepath(cookies_filepath, SERVICE_NAME) == 0) remove(cookies_filepath.data.c_str()); boards_page->pass_id.clear(); logged_in = LoggedIn::No; needs_refresh = true; return PluginResult::OK; } for(const auto &login_input : login_inputs->inputs) { if(login_input->get_text().empty()) { show_notification("QuickMedia", "All fields need to be filled in", Urgency::CRITICAL); return PluginResult::OK; } } std::string err_msg; if(login(this, login_inputs->inputs[0]->get_text(), login_inputs->inputs[1]->get_text(), boards_page->pass_id, err_msg)) { login_finish(); return PluginResult::OK; } else { show_notification("QuickMedia", "Failed to login, error: " + err_msg, Urgency::CRITICAL); return PluginResult::OK; } } PluginResult FourchanLoginPage::lazy_fetch(BodyItems &result_items) { if(logged_in == LoggedIn::Yes || (logged_in == LoggedIn::Unknown && is_logged_in())) { logged_in = LoggedIn::Yes; auto logout_body_item = BodyItem::create("Logout"); logout_body_item->url = "logout"; result_items.push_back(std::move(logout_body_item)); } else { logged_in = LoggedIn::No; program->add_login_inputs(&tabs->at(tab_index), { { "Token", SearchBarType::Text }, { "PIN", SearchBarType::Password } }); } return PluginResult::OK; } void FourchanLoginPage::login_finish() { logged_in = LoggedIn::Yes; needs_refresh = true; } // TODO: Merge with lazy fetch PluginResult FourchanThreadListPage::submit(const SubmitArgs &args, std::vector &result_tabs) { Json::Value json_root; DownloadResult result = download_json(json_root, fourchan_url + board_id + "/thread/" + args.url + ".json", {}, true); if(result != DownloadResult::OK) return download_result_to_plugin_result(result); if(!json_root.isObject()) return PluginResult::ERR; BodyItems result_items; std::unordered_map comment_by_postno; const Json::Value &posts = json_root["posts"]; if(!posts.isArray()) return PluginResult::OK; for(const Json::Value &post : posts) { if(!post.isObject()) continue; const Json::Value &post_num = post["no"]; if(!post_num.isNumeric()) continue; int64_t post_num_int = post_num.asInt64(); comment_by_postno[post_num_int] = result_items.size(); result_items.push_back(BodyItem::create("")); result_items.back()->post_number = std::to_string(post_num_int); } size_t body_item_index = 0; for(const Json::Value &post : posts) { if(!post.isObject()) continue; const Json::Value &sub = post["sub"]; const char *sub_begin = ""; const char *sub_end = sub_begin; sub.getString(&sub_begin, &sub_end); const Json::Value &com = post["com"]; const char *comment_begin = ""; const char *comment_end = comment_begin; com.getString(&comment_begin, &comment_end); const Json::Value &post_num = post["no"]; if(!post_num.isNumeric()) continue; const Json::Value &author = post["name"]; std::string author_str = "Anonymous"; if(author.isString()) author_str = author.asString(); author_str += " #" + std::to_string(post_num.asInt64()); std::string comment_text = html_to_text(sub_begin, sub_end - sub_begin, comment_by_postno, result_items, body_item_index); if(!comment_text.empty()) comment_text += '\n'; comment_text += html_to_text(comment_begin, comment_end - comment_begin, comment_by_postno, result_items, body_item_index); if(!comment_text.empty() && comment_text.back() == '\n') comment_text.pop_back(); BodyItem *body_item = result_items[body_item_index].get(); body_item->set_title(std::move(comment_text)); body_item->set_author(std::move(author_str)); const Json::Value &ext = post["ext"]; const Json::Value &tim = post["tim"]; if(tim.isNumeric() && ext.isString()) { std::string ext_str = ext.asString(); if(ext_str == ".png" || ext_str == ".jpg" || ext_str == ".jpeg" || ext_str == ".webm" || ext_str == ".mp4" || ext_str == ".gif") { } else { fprintf(stderr, "TODO: Support file extension: %s\n", ext_str.c_str()); } // "s" means small, that's the url 4chan uses for thumbnails. // thumbnails always has .jpg extension even if they are gifs or webm. std::string tim_str = std::to_string(tim.asInt64()); body_item->thumbnail_url = fourchan_image_url + board_id + "/" + tim_str + "s.jpg"; body_item->url = fourchan_image_url + board_id + "/" + tim_str + ext_str; mgl::vec2i thumbnail_size(64, 64); const Json::Value &tn_w = post["tn_w"]; const Json::Value &tn_h = post["tn_h"]; if(tn_w.isNumeric() && tn_h.isNumeric()) thumbnail_size = mgl::vec2i(tn_w.asInt(), tn_h.asInt()); body_item->thumbnail_size = std::move(thumbnail_size); } ++body_item_index; } auto body = create_body(false); body->set_items(std::move(result_items)); result_tabs.push_back(Tab{std::move(body), std::make_unique(program, board_id, args.url, pass_id), nullptr}); return PluginResult::OK; } PluginResult FourchanThreadListPage::lazy_fetch(BodyItems &result_items) { Json::Value json_root; DownloadResult result = download_json(json_root, fourchan_url + board_id + "/catalog.json?s=Index", {}, true); if(result != DownloadResult::OK) return download_result_to_plugin_result(result); if(!json_root.isArray()) return PluginResult::ERR; std::unordered_map comment_by_postno; for(const Json::Value &page_data : json_root) { if(!page_data.isObject()) continue; const Json::Value &threads = page_data["threads"]; if(!threads.isArray()) continue; for(const Json::Value &thread : threads) { if(!thread.isObject()) continue; const Json::Value &sub = thread["sub"]; const char *sub_begin = ""; const char *sub_end = sub_begin; sub.getString(&sub_begin, &sub_end); const Json::Value &com = thread["com"]; const char *comment_begin = ""; const char *comment_end = comment_begin; com.getString(&comment_begin, &comment_end); const Json::Value &thread_num = thread["no"]; if(!thread_num.isNumeric()) continue; std::string title_text = html_to_text(sub_begin, sub_end - sub_begin, comment_by_postno, result_items, 0); if(!title_text.empty() && title_text.back() == '\n') title_text.back() = ' '; std::string comment_text = html_to_text(comment_begin, comment_end - comment_begin, comment_by_postno, result_items, 0); // TODO: Do the same when wrapping is implemented // TODO: Remove this int num_lines = 0; for(size_t i = 0; i < comment_text.size(); ++i) { if(comment_text[i] == '\n') { ++num_lines; if(num_lines == 6) { comment_text = comment_text.substr(0, i) + " (...)"; break; } } } auto body_item = BodyItem::create(std::move(comment_text)); body_item->set_author(std::move(title_text)); body_item->url = std::to_string(thread_num.asInt64()); const Json::Value &ext = thread["ext"]; const Json::Value &tim = thread["tim"]; if(tim.isNumeric() && ext.isString()) { std::string ext_str = ext.asString(); if(ext_str == ".png" || ext_str == ".jpg" || ext_str == ".jpeg" || ext_str == ".webm" || ext_str == ".mp4" || ext_str == ".gif") { } else { fprintf(stderr, "TODO: Support file extension: %s\n", ext_str.c_str()); } // "s" means small, that's the url 4chan uses for thumbnails. // thumbnails always has .jpg extension even if they are gifs or webm. body_item->thumbnail_url = fourchan_image_url + board_id + "/" + std::to_string(tim.asInt64()) + "s.jpg"; mgl::vec2i thumbnail_size(64, 64); const Json::Value &tn_w = thread["tn_w"]; const Json::Value &tn_h = thread["tn_h"]; if(tn_w.isNumeric() && tn_h.isNumeric()) thumbnail_size = mgl::vec2i(tn_w.asInt() / 2, tn_h.asInt() / 2); body_item->thumbnail_size = std::move(thumbnail_size); } result_items.push_back(std::move(body_item)); } } return PluginResult::OK; } static std::string file_get_filename(const std::string &filepath) { size_t index = filepath.rfind('/'); if(index == std::string::npos) return filepath.c_str(); return filepath.c_str() + index + 1; } PostResult FourchanThreadPage::post_comment(const std::string &captcha_id, const std::string &captcha_solution, const std::string &comment, const std::string &filepath) { Path cookies_filepath; if(get_cookies_filepath(cookies_filepath, SERVICE_NAME) != 0) { fprintf(stderr, "Failed to get 4chan cookies filepath\n"); return PostResult::ERR; } std::string url = "https://sys.4chan.org/" + board_id + "/post"; std::vector additional_args = { CommandArg{"-H", "Referer: https://boards.4chan.org/"}, CommandArg{"-H", "Origin: https://boards.4chan.org"}, CommandArg{"--form-string", "resto=" + thread_id}, CommandArg{"--form-string", "com=" + comment}, CommandArg{"--form-string", "mode=regist"}, CommandArg{"-c", cookies_filepath.data}, CommandArg{"-b", cookies_filepath.data} }; if(!filepath.empty()) { additional_args.push_back({ "-F", "upfile=@" + filepath }); additional_args.push_back({ "--form-string", "filename=" + file_get_filename(filepath) }); } if(pass_id.empty() && !captcha_id.empty()) { additional_args.push_back(CommandArg{"--form-string", "t-challenge=" + captcha_id}); additional_args.push_back(CommandArg{"--form-string", "t-response=" + captcha_solution}); } std::string response; if(download_to_string(url, response, additional_args, true) != DownloadResult::OK) return PostResult::ERR; if(response.find("successful") != std::string::npos) return PostResult::OK; if(response.find("banned") != std::string::npos) return PostResult::BANNED; if(response.find("mistyped the CAPTCHA") != std::string::npos || response.find("No valid captcha") != std::string::npos) return PostResult::INVALID_CAPTCHA; if(response.find("Audio streams are not allowed") != std::string::npos) return PostResult::FILE_TYPE_NOT_ALLOWED; if(response.find("Error: Upload failed") != std::string::npos) return PostResult::UPLOAD_FAILED; if(response.find("try again") != std::string::npos) return PostResult::TRY_AGAIN; return PostResult::ERR; } const std::string& FourchanThreadPage::get_pass_id() { if(pass_id.empty()) { Path cookies_filepath; if(get_cookies_filepath(cookies_filepath, SERVICE_NAME) != 0) { fprintf(stderr, "Failed to get 4chan cookies filepath\n"); } else { pass_id = get_pass_id_from_cookies_file(cookies_filepath); } } return pass_id; } static bool base64_decode(const Json::Value &json_to_decode, std::string &decoded) { if(!json_to_decode.isString()) return false; const char *start; const char *end; if(!json_to_decode.getString(&start, &end)) return false; try { decoded = cppcodec::base64_rfc4648::decode(start, end - start); return true; } catch(std::exception&) { return false; } } PluginResult FourchanThreadPage::request_captcha_challenge(ImageBoardCaptchaChallenge &challenge_response) { Path cookies_filepath; if(get_cookies_filepath(cookies_filepath, SERVICE_NAME) != 0) { fprintf(stderr, "Failed to get 4chan cookies filepath\n"); return PluginResult::ERR; } std::vector additional_args = { CommandArg{"-c", cookies_filepath.data}, CommandArg{"-b", cookies_filepath.data} }; Json::Value json_root; DownloadResult result = download_json(json_root, "https://sys.4chan.org/captcha?board=" + url_param_encode(board_id) + "&thread_id=" + thread_id, std::move(additional_args), true); if(result != DownloadResult::OK) return download_result_to_plugin_result(result); if(!json_root.isObject()) return PluginResult::ERR; const Json::Value &error_json = json_root["error"]; if(error_json.isString()) { show_notification("QuickMedia", "Failed to get captcha, error: " + error_json.asString(), Urgency::CRITICAL); return PluginResult::ERR; } const Json::Value &challenge_json = json_root["challenge"]; const Json::Value &img_json = json_root["img"]; const Json::Value &bg_json = json_root["bg"]; const Json::Value &ttl_json = json_root["ttl"]; if(!challenge_json.isString()) return PluginResult::ERR; challenge_response.challenge_id = challenge_json.asString(); if(strcmp(challenge_json.asCString(), "noop") != 0) { if(!img_json.isString()) return PluginResult::ERR; if(!base64_decode(img_json, challenge_response.img_data)) return PluginResult::ERR; if(bg_json.isString() && !base64_decode(bg_json, challenge_response.bg_data)) return PluginResult::ERR; } if(ttl_json.isInt()) challenge_response.ttl = ttl_json.asInt(); else challenge_response.ttl = 120; return PluginResult::OK; } }