diff options
Diffstat (limited to 'src/rss.c')
-rw-r--r-- | src/rss.c | 334 |
1 files changed, 312 insertions, 22 deletions
@@ -1,14 +1,17 @@ #include "rss.h" #include "download.h" +#include "transmission.h" #include "stringutils.h" #include "fileutils.h" #include "buffer.h" #include "rss_html_common.h" +#include "json.h" #include <string.h> #include <stdio.h> #include <stdlib.h> #include <limits.h> #include <time.h> +#include <assert.h> static int is_alpha_lowercase(char c) { return c >= 'a' && c <= 'z'; @@ -29,7 +32,8 @@ static char* get_amp_end(char *str) { return str; } -static void xml_unescape(char *str, char *result, int result_length) { +static void xml_unescape(char *str) { + char *result = str; int index = 0; for(;;) { char c = *str; @@ -39,6 +43,7 @@ static void xml_unescape(char *str, char *result, int result_length) { *amp_end = '\0'; if(str[1] == '#') { + /* TODO: Also support non-ascii numbers */ result[index++] = atoi(str + 2); } else { if(strcmp(str + 1, "amp") == 0) @@ -62,11 +67,6 @@ static void xml_unescape(char *str, char *result, int result_length) { result[index++] = c; ++str; } - - if(index == result_length - 1) { - result[index] = '\0'; - break; - } } } @@ -79,7 +79,9 @@ static char* string_substr_before_tag_end(char *str, const char *tag) { typedef int (*RssParseCallback)(char *title, char *link, void *userdata); -static int parse_rss(char *str, char *rss_title_str, int rss_title_str_size, RssParseCallback parse_callback, void *userdata) { +static int parse_rss(char *str, char **rss_title_str, RssParseCallback parse_callback, void *userdata) { + *rss_title_str = NULL; + char *channel_start = strstr(str, "<channel>"); if(!channel_start) return 1; @@ -91,21 +93,18 @@ static int parse_rss(char *str, char *rss_title_str, int rss_title_str_size, Rss if(!first_item) { rss_title += 7; string_substr_before_tag_end(rss_title, "</title>"); - xml_unescape(rss_title, rss_title_str, rss_title_str_size); + xml_unescape(rss_title); + *rss_title_str = rss_title; return 0; } if(rss_title < first_item) { rss_title += 7; string_substr_before_tag_end(rss_title, "</title>"); - xml_unescape(rss_title, rss_title_str, rss_title_str_size); - } else { - rss_title_str[0] = '\0'; + xml_unescape(rss_title); + *rss_title_str = rss_title; } - char title_str[256]; - char link_str[2084]; - char *item = first_item; for(;;) { char *after_first_item = item + 6; @@ -136,9 +135,11 @@ static int parse_rss(char *str, char *rss_title_str, int rss_title_str_size, Rss item_link += 6; string_substr_before_tag_end(item_link, "</link>"); - xml_unescape(item_title, title_str, sizeof(title_str)); - xml_unescape(item_link, link_str, sizeof(link_str)); - if(parse_callback(title_str, link_str, userdata) != 0) + xml_unescape(item_title); + xml_unescape(item_link); + string_replace(item_title, '/', '_'); + char *stripped_title_str = strip(item_title); + if(parse_callback(stripped_title_str, item_link, userdata) != 0) return 0; item = strstr(item_end + 7, "<item>"); @@ -153,7 +154,7 @@ typedef struct { const char *start_after_url; } RssParseUserdata; -static int rss_parse_callback(char *title, char *link, void *userdata) { +static int rss_parse_add_callback(char *title, char *link, void *userdata) { RssParseUserdata *rss_parse_userdata = userdata; if(rss_parse_userdata->start_after && strcmp(rss_parse_userdata->start_after, title) == 0) { rss_parse_userdata->found_start_after = 1; @@ -179,9 +180,8 @@ int add_rss(const char *name, const char *url, char *rss_config_dir, const char rss_parse_userdata.found_start_after = 0; rss_parse_userdata.start_after_url = NULL; - /* TODO: What if rss title is longer than this? */ - char rss_title[250]; - result = parse_rss(buffer.data, rss_title, sizeof(rss_title), rss_parse_callback, &rss_parse_userdata); + char *rss_title = NULL; + result = parse_rss(buffer.data, &rss_title, rss_parse_add_callback, &rss_parse_userdata); if(result != 0) { fprintf(stderr, "Failed to parse rss for url: %s\n", url); goto cleanup; @@ -196,7 +196,7 @@ int add_rss(const char *name, const char *url, char *rss_config_dir, const char /* TODO: Add (add rss <episode name>) here */ if(!name) { - if(rss_title[0] == '\0') { + if(!rss_title) { fprintf(stderr, "Failed to find rss title and --name was not provided\n"); result = -1; goto cleanup; @@ -265,3 +265,293 @@ int add_rss(const char *name, const char *url, char *rss_config_dir, const char buffer_deinit(&buffer); return result; } + +static int is_item_already_downloaded(const char *title, const char *link, TrackedRss *tracked_rss) { + /* TODO: Optimize this... */ + struct json_value_s *downloaded_json = json_object_get_field_by_name(tracked_rss->json_data, "downloaded"); + if(downloaded_json && downloaded_json->type == json_type_array) { + struct json_array_s *downloaded_json_array = json_value_as_array(downloaded_json); + struct json_array_element_s *downloaded_item = downloaded_json_array->start; + for(; downloaded_item; downloaded_item = downloaded_item->next) { + struct json_object_s *downloaded_obj = json_value_as_object(downloaded_item->value); + if(!downloaded_obj) + continue; + + struct json_value_s *download_title_value = json_object_get_field_by_name(downloaded_obj, "title"); + struct json_value_s *download_url_value = json_object_get_field_by_name(downloaded_obj, "url"); + + struct json_string_s *download_title_str = NULL; + struct json_string_s *download_url_str = NULL; + + if(download_title_value) + download_title_str = json_value_as_string(download_title_value); + + if(download_url_value) + download_url_str = json_value_as_string(download_url_value); + + if((download_title_str && strcmp(download_title_str->string, title) == 0) || (download_url_str && strcmp(download_url_str->string, link) == 0)) + return 1; + } + } + return 0; +} + +typedef struct { + const char *title; + const char *link; +} DownloadItemsData; + +typedef struct { + TrackedRss *tracked_rss; + Buffer *download_items_buffer; +} RssParseSyncData; + +static int rss_parse_sync_callback(char *title, char *link, void *userdata) { + RssParseSyncData *rss_parse_sync_data = userdata; + if(is_item_already_downloaded(title, link, rss_parse_sync_data->tracked_rss)) + return 1; + + DownloadItemsData download_items_data; + download_items_data.title = title; + download_items_data.link = link; + buffer_append(rss_parse_sync_data->download_items_buffer, &download_items_data, sizeof(download_items_data)); + return 0; +} + +static struct json_array_element_s* get_last_element_in_json_array(struct json_array_s *json_array) { + struct json_array_element_s *json_element = json_array->start; + while(json_element) { + struct json_array_element_s *next_json_element = json_element->next; + if(next_json_element) + json_element = next_json_element; + else + return json_element; + } + return NULL; +} + +/* TODO: If this fails in the middle, recover and update this next time somehow */ +static int rss_update_latest(char *rss_tracked_dir, TrackedRss *tracked_rss, const char *latest_title, const char *url, const char *filename) { + int rss_tracked_dir_len = strlen(rss_tracked_dir); + int result = 0; + + char *item_filepath = rss_tracked_dir; + strcat(item_filepath, tracked_rss->title); + + char updated[32]; + assert(sizeof(time_t) == sizeof(long)); + sprintf(updated, "%ld", time(NULL)); + int updated_len = strlen(updated); + result = file_overwrite_in_dir(item_filepath, "updated", updated, updated_len); + if(result != 0) { + fprintf(stderr, "Failed to update %s/updated\n", item_filepath); + goto cleanup; + } + + struct json_string_s *updated_json = json_value_as_string(json_object_get_field_by_name(tracked_rss->json_data, "updated")); + updated_json->string = updated; + updated_json->string_size = updated_len; + + struct json_value_s *downloaded_json = json_object_get_field_by_name(tracked_rss->json_data, "downloaded"); + /* TODO:; WHAT IF DJSONWLOADING JSON DOENS*T SHIT */ + + struct json_string_s title_json_key; + create_json_string(&title_json_key, "title", 5); + + struct json_string_s title_json_value_str; + create_json_string(&title_json_value_str, latest_title, strlen(latest_title)); + struct json_value_s title_json_value; + init_json_value_str(&title_json_value, &title_json_value_str); + + struct json_string_s filename_json_key; + create_json_string(&filename_json_key, "title", 5); + + struct json_string_s filename_json_value_str; + create_json_string(&filename_json_value_str, filename, strlen(filename)); + struct json_value_s filename_json_value; + init_json_value_str(&filename_json_value, &filename_json_value_str); + + struct json_string_s time_json_key; + create_json_string(&time_json_key, "time", 4); + + struct json_string_s time_value_str; + create_json_string(&time_value_str, updated, updated_len); + struct json_value_s time_json_value; + init_json_value_str(&time_json_value, &time_value_str); + + struct json_string_s url_json_key; + create_json_string(&url_json_key, "url", 3); + + struct json_string_s url_value_str; + create_json_string(&url_value_str, url, strlen(url)); + struct json_value_s url_json_value; + init_json_value_str(&url_json_value, &url_value_str); + + struct json_object_element_s downloaded_title_element; + downloaded_title_element.name = &title_json_key; + downloaded_title_element.value = &title_json_value; + + struct json_object_element_s downloaded_filename_element; + downloaded_filename_element.name = &filename_json_key; + downloaded_filename_element.value = &filename_json_value; + + struct json_object_element_s downloaded_time_element; + downloaded_time_element.name = &time_json_key; + downloaded_time_element.value = &time_json_value; + + struct json_object_element_s downloaded_url_element; + downloaded_url_element.name = &url_json_key; + downloaded_url_element.value = &url_json_value; + + downloaded_title_element.next = &downloaded_filename_element; + downloaded_filename_element.next = &downloaded_time_element; + downloaded_time_element.next = &downloaded_url_element; + downloaded_url_element.next = NULL; + + struct json_object_s new_downloaded_json_obj; + new_downloaded_json_obj.length = 4; + new_downloaded_json_obj.start = &downloaded_title_element; + + struct json_value_s new_downloaded_json_val; + new_downloaded_json_val.payload = &new_downloaded_json_obj; + new_downloaded_json_val.type = json_type_object; + + struct json_array_element_s new_downloaded_item_element; + new_downloaded_item_element.value = &new_downloaded_json_val; + new_downloaded_item_element.next = NULL; + + struct json_array_s new_downloaded_array; + struct json_value_s new_downloaded_array_val; + new_downloaded_array_val.payload = &new_downloaded_array; + new_downloaded_array_val.type = json_type_array; + + struct json_string_s downloaded_json_key; + create_json_string(&downloaded_json_key, "downloaded", 10); + + struct json_object_element_s new_downloaded_array_obj_el; + new_downloaded_array_obj_el.name = &downloaded_json_key; + new_downloaded_array_obj_el.value = &new_downloaded_array_val; + + if(downloaded_json && downloaded_json->type == json_type_array) { + struct json_array_s *downloaded_json_array = json_value_as_array(downloaded_json); + struct json_array_element_s *last_downloaded_element = get_last_element_in_json_array(downloaded_json_array); + if(last_downloaded_element) + last_downloaded_element->next = &new_downloaded_item_element; + else + downloaded_json_array->start = &new_downloaded_item_element; + downloaded_json_array->length++; + } else { + new_downloaded_array.start = &new_downloaded_item_element; + new_downloaded_array.length = 1; + + struct json_object_element_s *prev_start = tracked_rss->json_data->start; + tracked_rss->json_data->start = &new_downloaded_array_obj_el; + new_downloaded_array_obj_el.next = prev_start; + tracked_rss->json_data->length++; + } + + struct json_value_s json_root_value; + json_root_value.payload = tracked_rss->json_data; + json_root_value.type = json_type_object; + + size_t json_body_size = 0; + char *json_body_str = json_write_pretty(&json_root_value, " ", "\n", &json_body_size); + if(!json_body_str) { + fprintf(stderr, "Failed to write json data to file %s/data\n", item_filepath); + result = -1; + goto cleanup; + } + + /* Workaround json bug (?) */ + json_body_size = strlen(json_body_str); + + result = file_overwrite_in_dir(item_filepath, "data", json_body_str, json_body_size); + free(json_body_str); + + cleanup: + rss_tracked_dir[rss_tracked_dir_len] = '\0'; + return result; +} + +static int add_torrents_in_reverse(Buffer *download_items_buffer, TrackedRss *tracked_rss, char *rss_tracked_dir) { + DownloadItemsData *download_items_it = buffer_end(download_items_buffer); + DownloadItemsData *download_items_end = buffer_begin(download_items_buffer); + download_items_it--; + download_items_end--; + for(; download_items_it != download_items_end; --download_items_it) { + if(transmission_add_torrent(download_items_it->link) != 0) { + fprintf(stderr, "Failed to add torrent: %s\n", download_items_it->link); + return 1; + } + + /* TODO: Verify if the last torrent is immediately accessible or if it gets an old torrent... */ + int id; + float percentage_done; + char torrent_name[256]; + if(transmission_get_last_added_torrent(&id, &percentage_done, torrent_name) != 0) { + fprintf(stderr, "Failed to get added torrent name for torrent: %s\n", download_items_it->link); + return 1; + } + + if(rss_update_latest(rss_tracked_dir, tracked_rss, download_items_it->title, download_items_it->link, torrent_name) != 0) { + fprintf(stderr, "Failed to update rss tracked data for %s\n", download_items_it->title); + return 1; + } + + /* Show notification that download has started? */ + } + return 0; +} + +int sync_rss(TrackedRss *tracked_rss, char *rss_config_dir) { + /* TODO: This can be cached */ + int rss_config_dir_len = strlen(rss_config_dir); + + fprintf(stderr, "Syncing %s\n", tracked_rss->title); + + int result = 0; + Buffer download_items_buffer; + buffer_init(&download_items_buffer); + + Buffer rss_data_buffer; + buffer_init(&rss_data_buffer); + result = download_to_buffer(tracked_rss->link, &rss_data_buffer); + if(result != 0) { + fprintf(stderr, "Failed to download rss: %s\n", tracked_rss->link); + goto cleanup; + } + + RssParseSyncData rss_parse_sync_data; + rss_parse_sync_data.tracked_rss = tracked_rss; + rss_parse_sync_data.download_items_buffer = &download_items_buffer; + char *rss_title = NULL; + result = parse_rss(rss_data_buffer.data, &rss_title, rss_parse_sync_callback, &rss_parse_sync_data); + if(result != 0) { + fprintf(stderr, "Failed to parse rss for url: %s\n", tracked_rss->link); + goto cleanup; + } + + char *rss_tracked_dir = rss_config_dir; + strcat(rss_tracked_dir, "/tracked/"); + + result = add_torrents_in_reverse(&download_items_buffer, tracked_rss, rss_tracked_dir); + if(result != 0) { + fprintf(stderr, "Failed while adding torrents for url: %s\n", tracked_rss->link); + goto cleanup; + } + + char updated[32]; + sprintf(updated, "%ld", time(NULL)); + strcat(rss_tracked_dir, tracked_rss->title); + result = file_overwrite_in_dir(rss_tracked_dir, "synced", updated, strlen(updated)); + if(result != 0) { + fprintf(stderr, "Failed to update %s/synced\n", rss_tracked_dir); + goto cleanup; + } + + cleanup: + rss_config_dir[rss_config_dir_len] = '\0'; + buffer_deinit(&rss_data_buffer); + buffer_deinit(&download_items_buffer); + return result; +} |