#include "rss.h" #include "download.h" #include "transmission.h" #include "stringutils.h" #include "fileutils.h" #include "buffer.h" #include "rss_html_common.h" #include "json.h" #include #include #include #include #include #include static int is_alpha_lowercase(char c) { return c >= 'a' && c <= 'z'; } static int is_digit(char c) { return c >= '0' && c <= '9'; } static char* get_amp_end(char *str) { for(;;) { char c = *str; if(is_alpha_lowercase(c) || is_digit(c) || c == '#') ++str; else if(c == ';' || c == '\0') break; } return str; } static void xml_unescape(char *str) { char *result = str; int index = 0; for(;;) { char c = *str; if(c == '&') { char *amp_end = get_amp_end(str + 1); char prev_char = *amp_end; *amp_end = '\0'; if(str[1] == '#') { /* TODO: Also support non-ascii numbers */ result[index++] = atoi(str + 2); } else { if(strcmp(str + 1, "amp") == 0) result[index++] = '&'; else if(strcmp(str + 1, "lt") == 0) result[index++] = '<'; else if(strcmp(str + 1, "gt") == 0) result[index++] = '>'; else if(strcmp(str + 1, "apos") == 0) result[index++] = '\''; } *amp_end = prev_char; str = amp_end; if(prev_char != '\0') ++str; } else if(c == '\0') { result[index] = '\0'; break; } else { result[index++] = c; ++str; } } } static char* string_substr_before_tag_end(char *str, const char *tag) { char *tag_p = strstr(str, tag); if(tag_p) *tag_p = '\0'; return tag_p; } typedef int (*RssParseCallback)(char *title, char *link, void *userdata); static int parse_rss(char *str, char **rss_title_str, RssParseCallback parse_callback, void *userdata) { *rss_title_str = NULL; char *channel_start = strstr(str, ""); if(!channel_start) return 1; char *after_channel = channel_start + 9; char *rss_title = strstr(after_channel, ""); char *first_item = strstr(after_channel, "<item>"); if(!first_item) { rss_title += 7; string_substr_before_tag_end(rss_title, ""); xml_unescape(rss_title); *rss_title_str = rss_title; return 0; } if(rss_title < first_item) { rss_title += 7; string_substr_before_tag_end(rss_title, ""); xml_unescape(rss_title); *rss_title_str = rss_title; } char *item = first_item; for(;;) { char *after_first_item = item + 6; char *item_end = strstr(after_first_item, ""); if(!item_end) return 1; char *item_title = strstr(after_first_item, ""); if(!item_title) return 1; if(item_title >= item_end) return 1; item_title += 7; char *after_title = string_substr_before_tag_end(item_title, ""); if(!after_title) return 1; after_title += 8; char *item_link = strstr(after_title, ""); if(!item_link) return 1; if(item_link >= item_end) return 1; item_link += 6; string_substr_before_tag_end(item_link, ""); xml_unescape(item_title); xml_unescape(item_link); string_replace(item_title, '/', '_'); char *stripped_title_str = strip(item_title); if(parse_callback(stripped_title_str, item_link, userdata) != 0) return 0; item = strstr(item_end + 7, ""); if(!item) return 0; } } typedef struct { const char *start_after; int found_start_after; const char *start_after_url; } RssParseUserdata; static int rss_parse_add_callback(char *title, char *link, void *userdata) { RssParseUserdata *rss_parse_userdata = userdata; if(rss_parse_userdata->start_after && strcmp(rss_parse_userdata->start_after, title) == 0) { rss_parse_userdata->found_start_after = 1; rss_parse_userdata->start_after_url = link; return 1; } return 0; } int add_rss(const char *name, const char *url, char *rss_config_dir, const char *start_after) { int result = 0; Buffer buffer; buffer_init(&buffer); result = download_to_buffer(url, &buffer); if(result != 0) { fprintf(stderr, "Failed to download rss: %s\n", url); goto cleanup; } RssParseUserdata rss_parse_userdata; rss_parse_userdata.start_after = start_after; rss_parse_userdata.found_start_after = 0; rss_parse_userdata.start_after_url = NULL; char *rss_title = NULL; result = parse_rss(buffer.data, &rss_title, rss_parse_add_callback, &rss_parse_userdata); if(result != 0) { fprintf(stderr, "Failed to parse rss for url: %s\n", url); goto cleanup; } if(start_after && !rss_parse_userdata.found_start_after) { fprintf(stderr, "Failed to find %s in rss %s", start_after, url); result = -1; goto cleanup; } /* TODO: Add (add rss ) here */ if(!name) { if(!rss_title) { fprintf(stderr, "Failed to find rss title and --name was not provided\n"); result = -1; goto cleanup; } string_replace(rss_title, '/', '_'); char *stripped_rss_title = strip(rss_title); name = stripped_rss_title; } char *rss_tracked_dir = rss_config_dir; strcat(rss_tracked_dir, "/tracked/"); strcat(rss_tracked_dir, name); if(file_exists(rss_tracked_dir) == 0) { fprintf(stderr, "You are already tracking %s\n", url); result = -1; goto cleanup; } result = create_directory_recursive(rss_tracked_dir); if(result != 0) { fprintf(stderr, "Failed to create %s, error: %s\n", rss_tracked_dir, strerror(result)); goto cleanup; } /* Create an ".in_progress" file to prevent periodic sync from reading rss data before we have finished adding all the data. */ char in_progress_filepath[PATH_MAX]; strcpy(in_progress_filepath, rss_tracked_dir); strcat(in_progress_filepath, "/.in_progress"); result = create_lock_file(in_progress_filepath); if(result != 0) { fprintf(stderr, "Failed to create %s/.in_progress\n", rss_tracked_dir); goto cleanup; } result = file_overwrite_in_dir(rss_tracked_dir, "link", url, strlen(url)); if(result != 0) { fprintf(stderr, "Failed to create %s/link\n", rss_tracked_dir); remove(rss_tracked_dir); goto cleanup; } char updated[32]; sprintf(updated, "%ld", time(NULL)); result = file_overwrite_in_dir(rss_tracked_dir, "updated", updated, strlen(updated)); if(result != 0) { fprintf(stderr, "Failed to create %s/updated\n", rss_tracked_dir); remove(rss_tracked_dir); goto cleanup; } result = write_plugin_json_to_file(rss_tracked_dir, "data", url, updated, start_after, rss_parse_userdata.start_after_url, NULL); if(result != 0) { fprintf(stderr, "Failed to create %s/data\n", rss_tracked_dir); remove(rss_tracked_dir); goto cleanup; } remove(in_progress_filepath); cleanup: buffer_deinit(&buffer); return result; } static int is_item_already_downloaded(const char *title, const char *link, TrackedRss *tracked_rss) { /* TODO: Optimize this... */ struct json_value_s *downloaded_json = json_object_get_field_by_name(tracked_rss->json_data, "downloaded"); if(downloaded_json && downloaded_json->type == json_type_array) { struct json_array_s *downloaded_json_array = json_value_as_array(downloaded_json); struct json_array_element_s *downloaded_item = downloaded_json_array->start; for(; downloaded_item; downloaded_item = downloaded_item->next) { struct json_object_s *downloaded_obj = json_value_as_object(downloaded_item->value); if(!downloaded_obj) continue; struct json_value_s *download_title_value = json_object_get_field_by_name(downloaded_obj, "title"); struct json_value_s *download_url_value = json_object_get_field_by_name(downloaded_obj, "url"); struct json_string_s *download_title_str = NULL; struct json_string_s *download_url_str = NULL; if(download_title_value) download_title_str = json_value_as_string(download_title_value); if(download_url_value) download_url_str = json_value_as_string(download_url_value); if((download_title_str && strcmp(download_title_str->string, title) == 0) || (download_url_str && strcmp(download_url_str->string, link) == 0)) return 1; } } return 0; } typedef struct { const char *title; const char *link; } DownloadItemsData; typedef struct { TrackedRss *tracked_rss; Buffer *download_items_buffer; } RssParseSyncData; static int rss_parse_sync_callback(char *title, char *link, void *userdata) { RssParseSyncData *rss_parse_sync_data = userdata; if(is_item_already_downloaded(title, link, rss_parse_sync_data->tracked_rss)) return 1; DownloadItemsData download_items_data; download_items_data.title = title; download_items_data.link = link; buffer_append(rss_parse_sync_data->download_items_buffer, &download_items_data, sizeof(download_items_data)); return 0; } static struct json_array_element_s* get_last_element_in_json_array(struct json_array_s *json_array) { struct json_array_element_s *json_element = json_array->start; while(json_element) { struct json_array_element_s *next_json_element = json_element->next; if(next_json_element) json_element = next_json_element; else return json_element; } return NULL; } /* TODO: If this fails in the middle, recover and update this next time somehow */ static int rss_update_latest(char *rss_tracked_dir, TrackedRss *tracked_rss, const char *latest_title, const char *url, const char *filename) { int rss_tracked_dir_len = strlen(rss_tracked_dir); int result = 0; char *item_filepath = rss_tracked_dir; strcat(item_filepath, tracked_rss->title); char updated[32]; assert(sizeof(time_t) == sizeof(long)); sprintf(updated, "%ld", time(NULL)); int updated_len = strlen(updated); result = file_overwrite_in_dir(item_filepath, "updated", updated, updated_len); if(result != 0) { fprintf(stderr, "Failed to update %s/updated\n", item_filepath); goto cleanup; } struct json_string_s *updated_json = json_value_as_string(json_object_get_field_by_name(tracked_rss->json_data, "updated")); updated_json->string = updated; updated_json->string_size = updated_len; struct json_value_s *downloaded_json = json_object_get_field_by_name(tracked_rss->json_data, "downloaded"); /* TODO:; WHAT IF DJSONWLOADING JSON DOENS*T SHIT */ struct json_string_s title_json_key; create_json_string(&title_json_key, "title", 5); struct json_string_s title_json_value_str; create_json_string(&title_json_value_str, latest_title, strlen(latest_title)); struct json_value_s title_json_value; init_json_value_str(&title_json_value, &title_json_value_str); struct json_string_s filename_json_key; create_json_string(&filename_json_key, "title", 5); struct json_string_s filename_json_value_str; create_json_string(&filename_json_value_str, filename, strlen(filename)); struct json_value_s filename_json_value; init_json_value_str(&filename_json_value, &filename_json_value_str); struct json_string_s time_json_key; create_json_string(&time_json_key, "time", 4); struct json_string_s time_value_str; create_json_string(&time_value_str, updated, updated_len); struct json_value_s time_json_value; init_json_value_str(&time_json_value, &time_value_str); struct json_string_s url_json_key; create_json_string(&url_json_key, "url", 3); struct json_string_s url_value_str; create_json_string(&url_value_str, url, strlen(url)); struct json_value_s url_json_value; init_json_value_str(&url_json_value, &url_value_str); struct json_object_element_s downloaded_title_element; downloaded_title_element.name = &title_json_key; downloaded_title_element.value = &title_json_value; struct json_object_element_s downloaded_filename_element; downloaded_filename_element.name = &filename_json_key; downloaded_filename_element.value = &filename_json_value; struct json_object_element_s downloaded_time_element; downloaded_time_element.name = &time_json_key; downloaded_time_element.value = &time_json_value; struct json_object_element_s downloaded_url_element; downloaded_url_element.name = &url_json_key; downloaded_url_element.value = &url_json_value; downloaded_title_element.next = &downloaded_filename_element; downloaded_filename_element.next = &downloaded_time_element; downloaded_time_element.next = &downloaded_url_element; downloaded_url_element.next = NULL; struct json_object_s new_downloaded_json_obj; new_downloaded_json_obj.length = 4; new_downloaded_json_obj.start = &downloaded_title_element; struct json_value_s new_downloaded_json_val; new_downloaded_json_val.payload = &new_downloaded_json_obj; new_downloaded_json_val.type = json_type_object; struct json_array_element_s new_downloaded_item_element; new_downloaded_item_element.value = &new_downloaded_json_val; new_downloaded_item_element.next = NULL; struct json_array_s new_downloaded_array; struct json_value_s new_downloaded_array_val; new_downloaded_array_val.payload = &new_downloaded_array; new_downloaded_array_val.type = json_type_array; struct json_string_s downloaded_json_key; create_json_string(&downloaded_json_key, "downloaded", 10); struct json_object_element_s new_downloaded_array_obj_el; new_downloaded_array_obj_el.name = &downloaded_json_key; new_downloaded_array_obj_el.value = &new_downloaded_array_val; if(downloaded_json && downloaded_json->type == json_type_array) { struct json_array_s *downloaded_json_array = json_value_as_array(downloaded_json); struct json_array_element_s *last_downloaded_element = get_last_element_in_json_array(downloaded_json_array); if(last_downloaded_element) last_downloaded_element->next = &new_downloaded_item_element; else downloaded_json_array->start = &new_downloaded_item_element; downloaded_json_array->length++; } else { new_downloaded_array.start = &new_downloaded_item_element; new_downloaded_array.length = 1; struct json_object_element_s *prev_start = tracked_rss->json_data->start; tracked_rss->json_data->start = &new_downloaded_array_obj_el; new_downloaded_array_obj_el.next = prev_start; tracked_rss->json_data->length++; } struct json_value_s json_root_value; json_root_value.payload = tracked_rss->json_data; json_root_value.type = json_type_object; size_t json_body_size = 0; char *json_body_str = json_write_pretty(&json_root_value, " ", "\n", &json_body_size); if(!json_body_str) { fprintf(stderr, "Failed to write json data to file %s/data\n", item_filepath); result = -1; goto cleanup; } /* Workaround json bug (?) */ json_body_size = strlen(json_body_str); result = file_overwrite_in_dir(item_filepath, "data", json_body_str, json_body_size); free(json_body_str); cleanup: rss_tracked_dir[rss_tracked_dir_len] = '\0'; return result; } static int add_torrents_in_reverse(Buffer *download_items_buffer, TrackedRss *tracked_rss, char *rss_tracked_dir) { DownloadItemsData *download_items_it = buffer_end(download_items_buffer); DownloadItemsData *download_items_end = buffer_begin(download_items_buffer); download_items_it--; download_items_end--; for(; download_items_it != download_items_end; --download_items_it) { if(transmission_add_torrent(download_items_it->link) != 0) { fprintf(stderr, "Failed to add torrent: %s\n", download_items_it->link); return 1; } /* TODO: Verify if the last torrent is immediately accessible or if it gets an old torrent... */ int id; float percentage_done; char torrent_name[256]; if(transmission_get_last_added_torrent(&id, &percentage_done, torrent_name) != 0) { fprintf(stderr, "Failed to get added torrent name for torrent: %s\n", download_items_it->link); return 1; } if(rss_update_latest(rss_tracked_dir, tracked_rss, download_items_it->title, download_items_it->link, torrent_name) != 0) { fprintf(stderr, "Failed to update rss tracked data for %s\n", download_items_it->title); return 1; } /* Show notification that download has started? */ } return 0; } int sync_rss(TrackedRss *tracked_rss, char *rss_config_dir) { /* TODO: This can be cached */ int rss_config_dir_len = strlen(rss_config_dir); fprintf(stderr, "Syncing %s\n", tracked_rss->title); int result = 0; Buffer download_items_buffer; buffer_init(&download_items_buffer); Buffer rss_data_buffer; buffer_init(&rss_data_buffer); result = download_to_buffer(tracked_rss->link, &rss_data_buffer); if(result != 0) { fprintf(stderr, "Failed to download rss: %s\n", tracked_rss->link); goto cleanup; } RssParseSyncData rss_parse_sync_data; rss_parse_sync_data.tracked_rss = tracked_rss; rss_parse_sync_data.download_items_buffer = &download_items_buffer; char *rss_title = NULL; result = parse_rss(rss_data_buffer.data, &rss_title, rss_parse_sync_callback, &rss_parse_sync_data); if(result != 0) { fprintf(stderr, "Failed to parse rss for url: %s\n", tracked_rss->link); goto cleanup; } char *rss_tracked_dir = rss_config_dir; strcat(rss_tracked_dir, "/tracked/"); result = add_torrents_in_reverse(&download_items_buffer, tracked_rss, rss_tracked_dir); if(result != 0) { fprintf(stderr, "Failed while adding torrents for url: %s\n", tracked_rss->link); goto cleanup; } char updated[32]; sprintf(updated, "%ld", time(NULL)); strcat(rss_tracked_dir, tracked_rss->title); result = file_overwrite_in_dir(rss_tracked_dir, "synced", updated, strlen(updated)); if(result != 0) { fprintf(stderr, "Failed to update %s/synced\n", rss_tracked_dir); goto cleanup; } cleanup: rss_config_dir[rss_config_dir_len] = '\0'; buffer_deinit(&rss_data_buffer); buffer_deinit(&download_items_buffer); return result; }