#include "rss.h" #include "download.h" #include "transmission.h" #include "stringutils.h" #include "fileutils.h" #include "buffer.h" #include "rss_html_common.h" #include "json.h" #include "alloc.h" #include #include #include #include #include #include static int is_alpha_lowercase(char c) { return c >= 'a' && c <= 'z'; } static int is_digit(char c) { return c >= '0' && c <= '9'; } static char* get_amp_end(char *str) { for(;;) { char c = *str; if(is_alpha_lowercase(c) || is_digit(c) || c == '#') ++str; else if(c == ';' || c == '\0') break; } return str; } static void xml_unescape(char *str) { char *result = str; int index = 0; for(;;) { char c = *str; if(c == '&') { char *amp_end = get_amp_end(str + 1); char prev_char = *amp_end; *amp_end = '\0'; if(str[1] == '#') { /* TODO: Also support non-ascii numbers */ result[index++] = atoi(str + 2); } else { if(strcmp(str + 1, "amp") == 0) result[index++] = '&'; else if(strcmp(str + 1, "lt") == 0) result[index++] = '<'; else if(strcmp(str + 1, "gt") == 0) result[index++] = '>'; else if(strcmp(str + 1, "apos") == 0) result[index++] = '\''; } *amp_end = prev_char; str = amp_end; if(prev_char != '\0') ++str; } else if(c == '\0') { result[index] = '\0'; break; } else { result[index++] = c; ++str; } } } static char* string_substr_before_tag_end(char *str, const char *tag) { char *tag_p = strstr(str, tag); if(tag_p) *tag_p = '\0'; return tag_p; } typedef int (*RssParseCallback)(char *title, char *link, void *userdata); static int parse_rss(char *str, char **rss_title_str, RssParseCallback parse_callback, void *userdata) { *rss_title_str = NULL; char *channel_start = strstr(str, ""); if(!channel_start) return 1; char *after_channel = channel_start + 9; char *rss_title = strstr(after_channel, ""); char *first_item = strstr(after_channel, "<item>"); if(!first_item) { rss_title += 7; string_substr_before_tag_end(rss_title, ""); xml_unescape(rss_title); *rss_title_str = rss_title; return 0; } if(rss_title < first_item) { rss_title += 7; string_substr_before_tag_end(rss_title, ""); xml_unescape(rss_title); *rss_title_str = rss_title; } char *item = first_item; for(;;) { char *after_first_item = item + 6; char *item_end = strstr(after_first_item, ""); if(!item_end) return 1; char *item_title = strstr(after_first_item, ""); if(!item_title) return 1; if(item_title >= item_end) return 1; item_title += 7; char *after_title = string_substr_before_tag_end(item_title, ""); if(!after_title) return 1; after_title += 8; char *item_link = strstr(after_title, ""); if(!item_link) return 1; if(item_link >= item_end) return 1; item_link += 6; string_substr_before_tag_end(item_link, ""); xml_unescape(item_title); xml_unescape(item_link); string_replace(item_title, '/', '_'); char *stripped_title_str = strip(item_title); if(parse_callback(stripped_title_str, item_link, userdata) != 0) return 0; item = strstr(item_end + 7, ""); if(!item) return 0; } } typedef struct { const char *start_after; int found_start_after; const char *start_after_url; } RssParseUserdata; static int rss_parse_add_callback(char *title, char *link, void *userdata) { RssParseUserdata *rss_parse_userdata = userdata; if(rss_parse_userdata->start_after && strcmp(rss_parse_userdata->start_after, title) == 0) { rss_parse_userdata->found_start_after = 1; rss_parse_userdata->start_after_url = link; return 1; } return 0; } int add_rss(const char *name, const char *url, char *rss_config_dir, const char *start_after) { int result = 0; Buffer buffer; buffer_init(&buffer); result = download_to_buffer(url, &buffer); if(result != 0) { fprintf(stderr, "Failed to download rss: %s\n", url); goto cleanup; } RssParseUserdata rss_parse_userdata; rss_parse_userdata.start_after = start_after; rss_parse_userdata.found_start_after = 0; rss_parse_userdata.start_after_url = NULL; char *rss_title = NULL; result = parse_rss(buffer.data, &rss_title, rss_parse_add_callback, &rss_parse_userdata); if(result != 0) { fprintf(stderr, "Failed to parse rss for url: %s\n", url); goto cleanup; } if(start_after && !rss_parse_userdata.found_start_after) { fprintf(stderr, "Failed to find %s in rss %s", start_after, url); result = -1; goto cleanup; } /* TODO: Add (add rss ) here */ if(!name) { if(!rss_title) { fprintf(stderr, "Failed to find rss title and --name was not provided\n"); result = -1; goto cleanup; } string_replace(rss_title, '/', '_'); char *stripped_rss_title = strip(rss_title); name = stripped_rss_title; } char *rss_tracked_dir = rss_config_dir; strcat(rss_tracked_dir, "/tracked/"); strcat(rss_tracked_dir, name); if(file_exists(rss_tracked_dir) == 0) { fprintf(stderr, "You are already tracking %s\n", url); result = -1; goto cleanup; } result = create_directory_recursive(rss_tracked_dir); if(result != 0) { fprintf(stderr, "Failed to create %s, error: %s\n", rss_tracked_dir, strerror(result)); goto cleanup; } /* Create an ".in_progress" file to prevent periodic sync from reading rss data before we have finished adding all the data. */ char in_progress_filepath[PATH_MAX]; strcpy(in_progress_filepath, rss_tracked_dir); strcat(in_progress_filepath, "/.in_progress"); result = create_lock_file(in_progress_filepath); if(result != 0) { fprintf(stderr, "Failed to create %s/.in_progress\n", rss_tracked_dir); goto cleanup; } result = file_overwrite_in_dir(rss_tracked_dir, "link", url, strlen(url)); if(result != 0) { fprintf(stderr, "Failed to create %s/link\n", rss_tracked_dir); remove(rss_tracked_dir); goto cleanup; } char updated[32]; sprintf(updated, "%ld", time(NULL)); result = file_overwrite_in_dir(rss_tracked_dir, "updated", updated, strlen(updated)); if(result != 0) { fprintf(stderr, "Failed to create %s/updated\n", rss_tracked_dir); remove(rss_tracked_dir); goto cleanup; } result = write_plugin_json_to_file(rss_tracked_dir, "data", url, updated, start_after, rss_parse_userdata.start_after_url, NULL); if(result != 0) { fprintf(stderr, "Failed to create %s/data\n", rss_tracked_dir); remove(rss_tracked_dir); goto cleanup; } remove(in_progress_filepath); cleanup: buffer_deinit(&buffer); return result; } static int is_item_already_downloaded(const char *title, const char *link, TrackedRss *tracked_rss) { /* TODO: Optimize this... */ struct json_value_s *downloaded_json = json_object_get_field_by_name(tracked_rss->json_data, "downloaded"); if(downloaded_json && downloaded_json->type == json_type_array) { struct json_array_s *downloaded_json_array = json_value_as_array(downloaded_json); struct json_array_element_s *downloaded_item = downloaded_json_array->start; for(; downloaded_item; downloaded_item = downloaded_item->next) { struct json_object_s *downloaded_obj = json_value_as_object(downloaded_item->value); if(!downloaded_obj) continue; struct json_value_s *download_title_value = json_object_get_field_by_name(downloaded_obj, "title"); struct json_value_s *download_url_value = json_object_get_field_by_name(downloaded_obj, "url"); struct json_string_s *download_title_str = NULL; struct json_string_s *download_url_str = NULL; if(download_title_value) download_title_str = json_value_as_string(download_title_value); if(download_url_value) download_url_str = json_value_as_string(download_url_value); if((download_title_str && strcmp(download_title_str->string, title) == 0) || (download_url_str && strcmp(download_url_str->string, link) == 0)) return 1; } } return 0; } typedef struct { TrackedRss *tracked_rss; Buffer *download_items_buffer; } RssParseSyncData; static int rss_parse_sync_callback(char *title, char *link, void *userdata) { RssParseSyncData *rss_parse_sync_data = userdata; if(is_item_already_downloaded(title, link, rss_parse_sync_data->tracked_rss)) return 1; DownloadItemsData download_items_data; download_items_data.title = title; download_items_data.link = link; buffer_append(rss_parse_sync_data->download_items_buffer, &download_items_data, sizeof(download_items_data)); return 0; } static int add_torrents_in_reverse(TransmissionSession *transmission_session, Buffer *download_items_buffer, TrackedRss *tracked_rss, char *rss_tracked_dir) { int result = 0; char *torrent_names[MAX_UPDATE_ITEMS]; DownloadItemsData *added_download_items[MAX_UPDATE_ITEMS]; Buffer json_element_buffer; buffer_init(&json_element_buffer); DownloadItemsData *download_items_it = buffer_end(download_items_buffer); DownloadItemsData *download_items_end = buffer_begin(download_items_buffer); download_items_it--; download_items_end--; int torrent_name_index = 0; for(; download_items_it != download_items_end && torrent_name_index < MAX_UPDATE_ITEMS; --download_items_it) { if(transmission_add_torrent(transmission_session, download_items_it->link, &torrent_names[torrent_name_index]) != 0) { fprintf(stderr, "Failed to add torrent: %s\n", download_items_it->link); result = -1; break; } added_download_items[torrent_name_index] = download_items_it; ++torrent_name_index; fprintf(stderr, "Starting download of torrent: %s (title: %s)\n", download_items_it->link, download_items_it->title); /* Show notification that download has started? */ } TrackedItem tracked_item; tracked_item.title = tracked_rss->title; tracked_item.link = tracked_rss->link; tracked_item.json_data = tracked_rss->json_data; result = tracked_item_update_latest(&tracked_item, rss_tracked_dir, added_download_items, torrent_names, torrent_name_index); for(int i = 0; i < torrent_name_index; ++i) { free(torrent_names[torrent_name_index]); } buffer_deinit(&json_element_buffer); return result; } int sync_rss(TrackedRss *tracked_rss, TransmissionSession *transmission_session, char *rss_config_dir) { /* TODO: This can be cached */ int rss_config_dir_len = strlen(rss_config_dir); fprintf(stderr, "Syncing %s\n", tracked_rss->title); int result = 0; Buffer download_items_buffer; buffer_init(&download_items_buffer); Buffer rss_data_buffer; buffer_init(&rss_data_buffer); result = download_to_buffer(tracked_rss->link, &rss_data_buffer); if(result != 0) { fprintf(stderr, "Failed to download rss: %s\n", tracked_rss->link); goto cleanup; } RssParseSyncData rss_parse_sync_data; rss_parse_sync_data.tracked_rss = tracked_rss; rss_parse_sync_data.download_items_buffer = &download_items_buffer; char *rss_title = NULL; result = parse_rss(rss_data_buffer.data, &rss_title, rss_parse_sync_callback, &rss_parse_sync_data); if(result != 0) { fprintf(stderr, "Failed to parse rss for url: %s\n", tracked_rss->link); goto cleanup; } char *rss_tracked_dir = rss_config_dir; strcat(rss_tracked_dir, "/tracked/"); result = add_torrents_in_reverse(transmission_session, &download_items_buffer, tracked_rss, rss_tracked_dir); if(result != 0) { fprintf(stderr, "Failed while adding torrents for url: %s\n", tracked_rss->link); goto cleanup; } char updated[32]; sprintf(updated, "%ld", time(NULL)); strcat(rss_tracked_dir, tracked_rss->title); result = file_overwrite_in_dir(rss_tracked_dir, "synced", updated, strlen(updated)); if(result != 0) { fprintf(stderr, "Failed to update %s/synced\n", rss_tracked_dir); goto cleanup; } cleanup: rss_config_dir[rss_config_dir_len] = '\0'; buffer_deinit(&rss_data_buffer); buffer_deinit(&download_items_buffer); return result; }