#include "rss.h" #include "download.h" #include "transmission.h" #include "stringutils.h" #include "fileutils.h" #include "buffer.h" #include "episode.h" #include "rss_html_common.h" #include "../depends/cJSON.h" #include "alloc.h" #include #include #include #include #include #include static int is_alpha_lowercase(char c) { return c >= 'a' && c <= 'z'; } static int is_digit(char c) { return c >= '0' && c <= '9'; } static char* get_amp_end(char *str) { for(;;) { char c = *str; if(is_alpha_lowercase(c) || is_digit(c) || c == '#') ++str; else if(c == ';' || c == '\0') break; } return str; } static void xml_unescape(char *str) { char *result = str; int index = 0; for(;;) { char c = *str; if(c == '&') { char *amp_end = get_amp_end(str + 1); char prev_char = *amp_end; *amp_end = '\0'; if(str[1] == '#') { /* TODO: Also support non-ascii numbers */ result[index++] = atoi(str + 2); } else { if(strcmp(str + 1, "amp") == 0) result[index++] = '&'; else if(strcmp(str + 1, "lt") == 0) result[index++] = '<'; else if(strcmp(str + 1, "gt") == 0) result[index++] = '>'; else if(strcmp(str + 1, "apos") == 0) result[index++] = '\''; } *amp_end = prev_char; str = amp_end; if(prev_char != '\0') ++str; } else if(c == '\0') { result[index] = '\0'; break; } else { result[index++] = c; ++str; } } } static char* string_substr_before_tag_end(char *str, const char *tag) { char *tag_p = strstr(str, tag); if(tag_p) *tag_p = '\0'; return tag_p; } typedef int (*RssParseCallback)(const char *title, const char *link, void *userdata); static int parse_rss(char *str, char **rss_title_str, RssParseCallback parse_callback, void *userdata) { *rss_title_str = NULL; char *channel_start = strstr(str, ""); if(!channel_start) return 1; char *after_channel = channel_start + 9; char *rss_title = strstr(after_channel, ""); char *first_item = strstr(after_channel, "<item>"); if(!first_item) { rss_title += 7; string_substr_before_tag_end(rss_title, ""); xml_unescape(rss_title); *rss_title_str = rss_title; return 0; } if(rss_title < first_item) { rss_title += 7; string_substr_before_tag_end(rss_title, ""); xml_unescape(rss_title); *rss_title_str = rss_title; } char *item = first_item; for(;;) { char *after_first_item = item + 6; char *item_end = strstr(after_first_item, ""); if(!item_end) return 1; char *item_title = strstr(after_first_item, ""); if(!item_title) return 1; if(item_title >= item_end) return 1; item_title += 7; char *after_title = string_substr_before_tag_end(item_title, ""); if(!after_title) return 1; after_title += 8; char *item_link = strstr(after_title, ""); if(!item_link) return 1; if(item_link >= item_end) return 1; item_link += 6; string_substr_before_tag_end(item_link, ""); xml_unescape(item_title); xml_unescape(item_link); string_replace(item_title, '/', '_'); char *stripped_title_str = strip(item_title); if(parse_callback(stripped_title_str, item_link, userdata) != 0) return 0; item = strstr(item_end + 7, ""); if(!item) return 0; } } static int rss_parse_add_callback(const char *title, const char *link, void *userdata) { Buffer *download_items_buffer = userdata; DownloadItemsData download_items_data; download_items_data.title = title; download_items_data.link = link; buffer_append(download_items_buffer, &download_items_data, sizeof(download_items_data)); return 0; } static const char hex_characters[] = "0123456789ABCDEF"; /* TODO: Also support unicode escape characters */ static void url_escape(const char *str, char *output) { int index = 0; for(;;) { char c = *str; if(c == '\0') { output[index] = '\0'; break; } else if(c < 32) { output[index++] = '%'; output[index++] = hex_characters[(c>>4) & 0x0F]; output[index++] = hex_characters[c&0x0F]; } else { switch(c) { case '!': case '#': case '$': case '%': case '&': case '\'': case '(': case ')': case '*': case '+': case ',': case '/': case ':': case ';': case '=': case '?': case '@': case '[': case ']': case '"': case '-': case '.': case '<': case '>': case '\\': case '^': case '_': case '`': case '{': case '|': case '}': case '~': case ' ': output[index++] = '%'; output[index++] = hex_characters[(c>>4) & 0x0F]; output[index++] = hex_characters[c&0x0F]; break; default: output[index++] = c; break; } } ++str; } } static int get_rss_url_from_episode_info(const char *episode_name, EpisodeInfo *episode_info, char *rss_url, fallback *fall) { char *selected_submitter = NULL; char response[512]; char group_name_escaped[1536]; for(;;) { printf("Enter the name of the submitter (leave empty to choose \"%s\" or type \"anon\" to choose all submitters): ", episode_info->group_name); fflush(stdout); if(!fgets(response, sizeof(response), stdin)) { fprintf(stderr, "Failed to read response from stdin\n"); return -1; } char *response_str = strip(response); if(strcmp(response, "anon") == 0) { break; } else { if(strlen(response_str) == 0) response_str = episode_info->group_name; url_escape(response_str, group_name_escaped); char url[4096]; if(snprintf(url, sizeof(url), "https://nyaa.si/user/%s", group_name_escaped) >= (int)sizeof(url)) { fprintf(stderr, "Error: url is too long!\n"); return -1; } if(is_header_response_ok(url, fall) == 0) { selected_submitter = response_str; break; } else { printf("The submitter \"%s\" doesn't exist on nyaa.si, please choose another submitter.\n", response_str); } } } char generic_name[2048]; if(episode_info_get_generic_name(episode_info, generic_name, sizeof(generic_name)) != 0) { fprintf(stderr, "Failed to get name for episode!\n"); return -1; } for(;;) { printf("Filter: %s\n", generic_name); printf("After: %s\n", episode_name); printf("Submitter: %s\n", selected_submitter ? selected_submitter : "all submitters"); printf("Is the tracking data above correct? (Y)es/No: "); fflush(stdout); char sresp[128]; if(!fgets(sresp, sizeof(sresp), stdin)) { fprintf(stderr, "Failed to read response from stdin\n"); return -1; } char *response_str = strip(sresp); int response_len = strlen(response_str); if(response_len > 0 && (response_str[0] == 'n' || response_str[0] == 'N')) { rss_url[0] = '\0'; return 0; } else if(response_len == 0 || response_str[0] == 'y' || response_str[0] == 'Y') { break; } } char generic_name_escaped[2048]; url_escape(generic_name, generic_name_escaped); if(selected_submitter) sprintf(rss_url, "https://nyaa.si/?page=rss&q=%s&u=%s", generic_name_escaped, group_name_escaped); else sprintf(rss_url, "https://nyaa.si/?page=rss&q=%s", generic_name_escaped); return 0; } int add_rss(const char *name, char *url, char *rss_config_dir, const char *start_after, fallback *fall) { int result = 0; char rss_url[4096]; Buffer buffer; buffer_init(&buffer); Buffer download_items_buffer; buffer_init(&download_items_buffer); result = download_to_buffer(url, &buffer, fall); if(result != 0) { EpisodeInfo episode_info; if(episode_info_create_from_episode_name(&episode_info, url) != 0) { fprintf(stderr, "Failed to download rss: %s\n", url); goto cleanup; } if(get_rss_url_from_episode_info(url, &episode_info, rss_url, fall) != 0) goto cleanup; /* User didn't want to track rss */ if(rss_url[0] == '\0') { result = 0; goto cleanup; } string_replace(url, '/', '_'); start_after = strip(url); url = rss_url; buffer_clear(&buffer); result = download_to_buffer(url, &buffer, fall); if(result != 0) { fprintf(stderr, "Failed to download rss: %s\n", url); goto cleanup; } } char *rss_title = NULL; result = parse_rss(buffer.data, &rss_title, rss_parse_add_callback, &download_items_buffer); if(result != 0) { fprintf(stderr, "Failed to parse rss for url: %s\n", url); goto cleanup; } DownloadItemsData *download_items_start = NULL; if(start_after) { DownloadItemsData *download_items_it = buffer_begin(&download_items_buffer); DownloadItemsData *download_items_end = buffer_end(&download_items_buffer); for(; download_items_it != download_items_end; ++download_items_it) { if(strcmp(start_after, download_items_it->title) == 0) { download_items_start = download_items_it; break; } } if(!download_items_start) { fprintf(stderr, "Failed to find %s in rss %s\n", start_after, url); result = -1; goto cleanup; } } if(!name) { if(!rss_title) { fprintf(stderr, "Failed to find rss title and --name was not provided\n"); result = -1; goto cleanup; } string_replace(rss_title, '/', '_'); char *stripped_rss_title = strip(rss_title); name = stripped_rss_title; } if(name[0] == '\0' || strcmp(name, ".") == 0 || strcmp(name, "..") == 0) { fprintf(stderr, "Rss name can't be empty, . or ..\n"); result = -1; goto cleanup; } char *rss_tracked_dir = rss_config_dir; strcat(rss_tracked_dir, "/tracked/"); strcat(rss_tracked_dir, name); char in_progress_filepath[PATH_MAX]; strcpy(in_progress_filepath, rss_tracked_dir); strcat(in_progress_filepath, "/.in_progress"); if(file_exists(rss_tracked_dir) == 0 && file_exists(in_progress_filepath) != 0) { fprintf(stderr, "You are already tracking %s\n", url); result = -1; goto cleanup; } result = create_directory_recursive(rss_tracked_dir); if(result != 0) { fprintf(stderr, "Failed to create %s, error: %s\n", rss_tracked_dir, strerror(result)); goto cleanup; } /* Create an ".in_progress" file to prevent periodic sync from reading rss data before we have finished adding all the data. */ remove(in_progress_filepath); result = create_lock_file(in_progress_filepath); if(result != 0) { fprintf(stderr, "Failed to create %s/.in_progress\n", rss_tracked_dir); remove_recursive(rss_tracked_dir); goto cleanup; } result = file_overwrite_in_dir(rss_tracked_dir, "link", url, strlen(url)); if(result != 0) { fprintf(stderr, "Failed to create %s/link\n", rss_tracked_dir); remove_recursive(rss_tracked_dir); goto cleanup; } char updated[32]; snprintf(updated, sizeof(updated), "%ld", time(NULL)); result = file_overwrite_in_dir(rss_tracked_dir, "updated", updated, strlen(updated)); if(result != 0) { fprintf(stderr, "Failed to create %s/updated\n", rss_tracked_dir); remove_recursive(rss_tracked_dir); goto cleanup; } size_t num_download_items = download_items_start ? (((DownloadItemsData*)buffer_end(&download_items_buffer)) - download_items_start) : 0; result = write_plugin_json_to_file(rss_tracked_dir, "data", url, updated, download_items_start, num_download_items, NULL, fall); if(result != 0) { fprintf(stderr, "Failed to create %s/data\n", rss_tracked_dir); remove_recursive(rss_tracked_dir); goto cleanup; } cleanup: remove(in_progress_filepath); buffer_deinit(&download_items_buffer); buffer_deinit(&buffer); return result; } static int is_item_already_downloaded(const char *title, const char *link, TrackedRss *tracked_rss) { /* TODO: Optimize this... */ cJSON *downloaded_json = cJSON_GetObjectItemCaseSensitive(tracked_rss->json_data, "downloaded"); if(!cJSON_IsArray(downloaded_json)) return 0; cJSON *downloaded_item = NULL; cJSON_ArrayForEach(downloaded_item, downloaded_json) { if(!cJSON_IsObject(downloaded_item)) continue; cJSON *download_title_value = cJSON_GetObjectItemCaseSensitive(downloaded_item, "title"); cJSON *download_url_value = cJSON_GetObjectItemCaseSensitive(downloaded_item, "url"); if((cJSON_IsString(download_title_value) && strcmp(download_title_value->valuestring, title) == 0) || (cJSON_IsString(download_url_value) && strcmp(download_url_value->valuestring, link) == 0)) return 1; } return 0; } typedef struct { TrackedRss *tracked_rss; Buffer *download_items_buffer; } RssParseSyncData; static int rss_parse_sync_callback(const char *title, const char *link, void *userdata) { RssParseSyncData *rss_parse_sync_data = userdata; if(is_item_already_downloaded(title, link, rss_parse_sync_data->tracked_rss)) return 1; DownloadItemsData download_items_data; download_items_data.title = title; download_items_data.link = link; buffer_append(rss_parse_sync_data->download_items_buffer, &download_items_data, sizeof(download_items_data)); return 0; } static int int_min(int a, int b) { return a < b ? a : b; } static int add_torrents_in_reverse(TransmissionSession *transmission_session, Buffer *download_items_buffer, TrackedRss *tracked_rss, char *rss_tracked_dir, fallback *fall) { int result = 0; char *torrent_names[MAX_UPDATE_ITEMS]; DownloadItemsData *added_download_items[MAX_UPDATE_ITEMS]; long timestamps[MAX_UPDATE_ITEMS]; Buffer json_element_buffer; buffer_init(&json_element_buffer); DownloadItemsData *download_items_it = buffer_end(download_items_buffer); DownloadItemsData *download_items_end = buffer_begin(download_items_buffer); const size_t num_items_to_download = int_min(download_items_it - download_items_end, MAX_UPDATE_ITEMS); download_items_it--; download_items_end--; int torrent_name_index = 0; for(; download_items_it != download_items_end && torrent_name_index < MAX_UPDATE_ITEMS; --download_items_it) { fprintf(stderr, "Starting download of torrent: %s (title: %s)\n", download_items_it->link, download_items_it->title); if(transmission_add_torrent(transmission_session, download_items_it->link, NULL, &torrent_names[torrent_name_index]) != 0) { fprintf(stderr, "Failed to add torrent: %s\n", download_items_it->link); result = -1; break; } added_download_items[torrent_name_index] = download_items_it; timestamps[torrent_name_index] = time(NULL) - num_items_to_download + (torrent_name_index + 1); ++torrent_name_index; /* Show notification that download has started? */ } TrackedItem tracked_item; tracked_item.title = tracked_rss->title; tracked_item.json_data = tracked_rss->json_data; result = tracked_item_update_latest(&tracked_item, rss_tracked_dir, added_download_items, torrent_names, timestamps, torrent_name_index, fall); for(int i = 0; i < torrent_name_index; ++i) { free(torrent_names[i]); } buffer_deinit(&json_element_buffer); return result; } int sync_rss(TrackedRss *tracked_rss, TransmissionSession *transmission_session, char *rss_config_dir, fallback *fall) { /* TODO: This can be cached */ int rss_config_dir_len = strlen(rss_config_dir); fprintf(stderr, "Syncing %s\n", tracked_rss->title); int result = 0; Buffer download_items_buffer; buffer_init(&download_items_buffer); Buffer rss_data_buffer; buffer_init(&rss_data_buffer); result = download_to_buffer(tracked_rss->link, &rss_data_buffer, fall); if(result != 0) { fprintf(stderr, "Failed to download rss: %s\n", tracked_rss->link); goto cleanup; } RssParseSyncData rss_parse_sync_data; rss_parse_sync_data.tracked_rss = tracked_rss; rss_parse_sync_data.download_items_buffer = &download_items_buffer; char *rss_title = NULL; result = parse_rss(rss_data_buffer.data, &rss_title, rss_parse_sync_callback, &rss_parse_sync_data); if(result != 0) { fprintf(stderr, "Failed to parse rss for url: %s\n", tracked_rss->link); goto cleanup; } char *rss_tracked_dir = rss_config_dir; strcat(rss_tracked_dir, "/tracked/"); result = add_torrents_in_reverse(transmission_session, &download_items_buffer, tracked_rss, rss_tracked_dir, fall); if(result != 0) { fprintf(stderr, "Failed while adding torrents for url: %s\n", tracked_rss->link); goto cleanup; } char updated[32]; snprintf(updated, sizeof(updated), "%ld", time(NULL)); strcat(rss_tracked_dir, tracked_rss->title); result = file_overwrite_in_dir(rss_tracked_dir, "synced", updated, strlen(updated)); if(result != 0) { fprintf(stderr, "Failed to update %s/synced\n", rss_tracked_dir); goto cleanup; } cleanup: rss_config_dir[rss_config_dir_len] = '\0'; buffer_deinit(&rss_data_buffer); buffer_deinit(&download_items_buffer); return result; }