aboutsummaryrefslogtreecommitdiff
path: root/src/rss.c
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2020-07-15 06:09:50 +0200
committerdec05eba <dec05eba@protonmail.com>2020-07-15 07:46:57 +0200
commit35aca1f0582c43b5f6818c8fc00b924247e45881 (patch)
tree66d5e8f7954481863ba6d79db22a6df32f78af69 /src/rss.c
parent5b20475c7faf89bbabc9eab43c7e5622317a18fc (diff)
Implement rss sync
Diffstat (limited to 'src/rss.c')
-rw-r--r--src/rss.c334
1 files changed, 312 insertions, 22 deletions
diff --git a/src/rss.c b/src/rss.c
index 84fa345..ebbb1cf 100644
--- a/src/rss.c
+++ b/src/rss.c
@@ -1,14 +1,17 @@
#include "rss.h"
#include "download.h"
+#include "transmission.h"
#include "stringutils.h"
#include "fileutils.h"
#include "buffer.h"
#include "rss_html_common.h"
+#include "json.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <time.h>
+#include <assert.h>
static int is_alpha_lowercase(char c) {
return c >= 'a' && c <= 'z';
@@ -29,7 +32,8 @@ static char* get_amp_end(char *str) {
return str;
}
-static void xml_unescape(char *str, char *result, int result_length) {
+static void xml_unescape(char *str) {
+ char *result = str;
int index = 0;
for(;;) {
char c = *str;
@@ -39,6 +43,7 @@ static void xml_unescape(char *str, char *result, int result_length) {
*amp_end = '\0';
if(str[1] == '#') {
+ /* TODO: Also support non-ascii numbers */
result[index++] = atoi(str + 2);
} else {
if(strcmp(str + 1, "amp") == 0)
@@ -62,11 +67,6 @@ static void xml_unescape(char *str, char *result, int result_length) {
result[index++] = c;
++str;
}
-
- if(index == result_length - 1) {
- result[index] = '\0';
- break;
- }
}
}
@@ -79,7 +79,9 @@ static char* string_substr_before_tag_end(char *str, const char *tag) {
typedef int (*RssParseCallback)(char *title, char *link, void *userdata);
-static int parse_rss(char *str, char *rss_title_str, int rss_title_str_size, RssParseCallback parse_callback, void *userdata) {
+static int parse_rss(char *str, char **rss_title_str, RssParseCallback parse_callback, void *userdata) {
+ *rss_title_str = NULL;
+
char *channel_start = strstr(str, "<channel>");
if(!channel_start)
return 1;
@@ -91,21 +93,18 @@ static int parse_rss(char *str, char *rss_title_str, int rss_title_str_size, Rss
if(!first_item) {
rss_title += 7;
string_substr_before_tag_end(rss_title, "</title>");
- xml_unescape(rss_title, rss_title_str, rss_title_str_size);
+ xml_unescape(rss_title);
+ *rss_title_str = rss_title;
return 0;
}
if(rss_title < first_item) {
rss_title += 7;
string_substr_before_tag_end(rss_title, "</title>");
- xml_unescape(rss_title, rss_title_str, rss_title_str_size);
- } else {
- rss_title_str[0] = '\0';
+ xml_unescape(rss_title);
+ *rss_title_str = rss_title;
}
- char title_str[256];
- char link_str[2084];
-
char *item = first_item;
for(;;) {
char *after_first_item = item + 6;
@@ -136,9 +135,11 @@ static int parse_rss(char *str, char *rss_title_str, int rss_title_str_size, Rss
item_link += 6;
string_substr_before_tag_end(item_link, "</link>");
- xml_unescape(item_title, title_str, sizeof(title_str));
- xml_unescape(item_link, link_str, sizeof(link_str));
- if(parse_callback(title_str, link_str, userdata) != 0)
+ xml_unescape(item_title);
+ xml_unescape(item_link);
+ string_replace(item_title, '/', '_');
+ char *stripped_title_str = strip(item_title);
+ if(parse_callback(stripped_title_str, item_link, userdata) != 0)
return 0;
item = strstr(item_end + 7, "<item>");
@@ -153,7 +154,7 @@ typedef struct {
const char *start_after_url;
} RssParseUserdata;
-static int rss_parse_callback(char *title, char *link, void *userdata) {
+static int rss_parse_add_callback(char *title, char *link, void *userdata) {
RssParseUserdata *rss_parse_userdata = userdata;
if(rss_parse_userdata->start_after && strcmp(rss_parse_userdata->start_after, title) == 0) {
rss_parse_userdata->found_start_after = 1;
@@ -179,9 +180,8 @@ int add_rss(const char *name, const char *url, char *rss_config_dir, const char
rss_parse_userdata.found_start_after = 0;
rss_parse_userdata.start_after_url = NULL;
- /* TODO: What if rss title is longer than this? */
- char rss_title[250];
- result = parse_rss(buffer.data, rss_title, sizeof(rss_title), rss_parse_callback, &rss_parse_userdata);
+ char *rss_title = NULL;
+ result = parse_rss(buffer.data, &rss_title, rss_parse_add_callback, &rss_parse_userdata);
if(result != 0) {
fprintf(stderr, "Failed to parse rss for url: %s\n", url);
goto cleanup;
@@ -196,7 +196,7 @@ int add_rss(const char *name, const char *url, char *rss_config_dir, const char
/* TODO: Add (add rss <episode name>) here */
if(!name) {
- if(rss_title[0] == '\0') {
+ if(!rss_title) {
fprintf(stderr, "Failed to find rss title and --name was not provided\n");
result = -1;
goto cleanup;
@@ -265,3 +265,293 @@ int add_rss(const char *name, const char *url, char *rss_config_dir, const char
buffer_deinit(&buffer);
return result;
}
+
+static int is_item_already_downloaded(const char *title, const char *link, TrackedRss *tracked_rss) {
+ /* TODO: Optimize this... */
+ struct json_value_s *downloaded_json = json_object_get_field_by_name(tracked_rss->json_data, "downloaded");
+ if(downloaded_json && downloaded_json->type == json_type_array) {
+ struct json_array_s *downloaded_json_array = json_value_as_array(downloaded_json);
+ struct json_array_element_s *downloaded_item = downloaded_json_array->start;
+ for(; downloaded_item; downloaded_item = downloaded_item->next) {
+ struct json_object_s *downloaded_obj = json_value_as_object(downloaded_item->value);
+ if(!downloaded_obj)
+ continue;
+
+ struct json_value_s *download_title_value = json_object_get_field_by_name(downloaded_obj, "title");
+ struct json_value_s *download_url_value = json_object_get_field_by_name(downloaded_obj, "url");
+
+ struct json_string_s *download_title_str = NULL;
+ struct json_string_s *download_url_str = NULL;
+
+ if(download_title_value)
+ download_title_str = json_value_as_string(download_title_value);
+
+ if(download_url_value)
+ download_url_str = json_value_as_string(download_url_value);
+
+ if((download_title_str && strcmp(download_title_str->string, title) == 0) || (download_url_str && strcmp(download_url_str->string, link) == 0))
+ return 1;
+ }
+ }
+ return 0;
+}
+
+typedef struct {
+ const char *title;
+ const char *link;
+} DownloadItemsData;
+
+typedef struct {
+ TrackedRss *tracked_rss;
+ Buffer *download_items_buffer;
+} RssParseSyncData;
+
+static int rss_parse_sync_callback(char *title, char *link, void *userdata) {
+ RssParseSyncData *rss_parse_sync_data = userdata;
+ if(is_item_already_downloaded(title, link, rss_parse_sync_data->tracked_rss))
+ return 1;
+
+ DownloadItemsData download_items_data;
+ download_items_data.title = title;
+ download_items_data.link = link;
+ buffer_append(rss_parse_sync_data->download_items_buffer, &download_items_data, sizeof(download_items_data));
+ return 0;
+}
+
+static struct json_array_element_s* get_last_element_in_json_array(struct json_array_s *json_array) {
+ struct json_array_element_s *json_element = json_array->start;
+ while(json_element) {
+ struct json_array_element_s *next_json_element = json_element->next;
+ if(next_json_element)
+ json_element = next_json_element;
+ else
+ return json_element;
+ }
+ return NULL;
+}
+
+/* TODO: If this fails in the middle, recover and update this next time somehow */
+static int rss_update_latest(char *rss_tracked_dir, TrackedRss *tracked_rss, const char *latest_title, const char *url, const char *filename) {
+ int rss_tracked_dir_len = strlen(rss_tracked_dir);
+ int result = 0;
+
+ char *item_filepath = rss_tracked_dir;
+ strcat(item_filepath, tracked_rss->title);
+
+ char updated[32];
+ assert(sizeof(time_t) == sizeof(long));
+ sprintf(updated, "%ld", time(NULL));
+ int updated_len = strlen(updated);
+ result = file_overwrite_in_dir(item_filepath, "updated", updated, updated_len);
+ if(result != 0) {
+ fprintf(stderr, "Failed to update %s/updated\n", item_filepath);
+ goto cleanup;
+ }
+
+ struct json_string_s *updated_json = json_value_as_string(json_object_get_field_by_name(tracked_rss->json_data, "updated"));
+ updated_json->string = updated;
+ updated_json->string_size = updated_len;
+
+ struct json_value_s *downloaded_json = json_object_get_field_by_name(tracked_rss->json_data, "downloaded");
+ /* TODO:; WHAT IF DJSONWLOADING JSON DOENS*T SHIT */
+
+ struct json_string_s title_json_key;
+ create_json_string(&title_json_key, "title", 5);
+
+ struct json_string_s title_json_value_str;
+ create_json_string(&title_json_value_str, latest_title, strlen(latest_title));
+ struct json_value_s title_json_value;
+ init_json_value_str(&title_json_value, &title_json_value_str);
+
+ struct json_string_s filename_json_key;
+ create_json_string(&filename_json_key, "title", 5);
+
+ struct json_string_s filename_json_value_str;
+ create_json_string(&filename_json_value_str, filename, strlen(filename));
+ struct json_value_s filename_json_value;
+ init_json_value_str(&filename_json_value, &filename_json_value_str);
+
+ struct json_string_s time_json_key;
+ create_json_string(&time_json_key, "time", 4);
+
+ struct json_string_s time_value_str;
+ create_json_string(&time_value_str, updated, updated_len);
+ struct json_value_s time_json_value;
+ init_json_value_str(&time_json_value, &time_value_str);
+
+ struct json_string_s url_json_key;
+ create_json_string(&url_json_key, "url", 3);
+
+ struct json_string_s url_value_str;
+ create_json_string(&url_value_str, url, strlen(url));
+ struct json_value_s url_json_value;
+ init_json_value_str(&url_json_value, &url_value_str);
+
+ struct json_object_element_s downloaded_title_element;
+ downloaded_title_element.name = &title_json_key;
+ downloaded_title_element.value = &title_json_value;
+
+ struct json_object_element_s downloaded_filename_element;
+ downloaded_filename_element.name = &filename_json_key;
+ downloaded_filename_element.value = &filename_json_value;
+
+ struct json_object_element_s downloaded_time_element;
+ downloaded_time_element.name = &time_json_key;
+ downloaded_time_element.value = &time_json_value;
+
+ struct json_object_element_s downloaded_url_element;
+ downloaded_url_element.name = &url_json_key;
+ downloaded_url_element.value = &url_json_value;
+
+ downloaded_title_element.next = &downloaded_filename_element;
+ downloaded_filename_element.next = &downloaded_time_element;
+ downloaded_time_element.next = &downloaded_url_element;
+ downloaded_url_element.next = NULL;
+
+ struct json_object_s new_downloaded_json_obj;
+ new_downloaded_json_obj.length = 4;
+ new_downloaded_json_obj.start = &downloaded_title_element;
+
+ struct json_value_s new_downloaded_json_val;
+ new_downloaded_json_val.payload = &new_downloaded_json_obj;
+ new_downloaded_json_val.type = json_type_object;
+
+ struct json_array_element_s new_downloaded_item_element;
+ new_downloaded_item_element.value = &new_downloaded_json_val;
+ new_downloaded_item_element.next = NULL;
+
+ struct json_array_s new_downloaded_array;
+ struct json_value_s new_downloaded_array_val;
+ new_downloaded_array_val.payload = &new_downloaded_array;
+ new_downloaded_array_val.type = json_type_array;
+
+ struct json_string_s downloaded_json_key;
+ create_json_string(&downloaded_json_key, "downloaded", 10);
+
+ struct json_object_element_s new_downloaded_array_obj_el;
+ new_downloaded_array_obj_el.name = &downloaded_json_key;
+ new_downloaded_array_obj_el.value = &new_downloaded_array_val;
+
+ if(downloaded_json && downloaded_json->type == json_type_array) {
+ struct json_array_s *downloaded_json_array = json_value_as_array(downloaded_json);
+ struct json_array_element_s *last_downloaded_element = get_last_element_in_json_array(downloaded_json_array);
+ if(last_downloaded_element)
+ last_downloaded_element->next = &new_downloaded_item_element;
+ else
+ downloaded_json_array->start = &new_downloaded_item_element;
+ downloaded_json_array->length++;
+ } else {
+ new_downloaded_array.start = &new_downloaded_item_element;
+ new_downloaded_array.length = 1;
+
+ struct json_object_element_s *prev_start = tracked_rss->json_data->start;
+ tracked_rss->json_data->start = &new_downloaded_array_obj_el;
+ new_downloaded_array_obj_el.next = prev_start;
+ tracked_rss->json_data->length++;
+ }
+
+ struct json_value_s json_root_value;
+ json_root_value.payload = tracked_rss->json_data;
+ json_root_value.type = json_type_object;
+
+ size_t json_body_size = 0;
+ char *json_body_str = json_write_pretty(&json_root_value, " ", "\n", &json_body_size);
+ if(!json_body_str) {
+ fprintf(stderr, "Failed to write json data to file %s/data\n", item_filepath);
+ result = -1;
+ goto cleanup;
+ }
+
+ /* Workaround json bug (?) */
+ json_body_size = strlen(json_body_str);
+
+ result = file_overwrite_in_dir(item_filepath, "data", json_body_str, json_body_size);
+ free(json_body_str);
+
+ cleanup:
+ rss_tracked_dir[rss_tracked_dir_len] = '\0';
+ return result;
+}
+
+static int add_torrents_in_reverse(Buffer *download_items_buffer, TrackedRss *tracked_rss, char *rss_tracked_dir) {
+ DownloadItemsData *download_items_it = buffer_end(download_items_buffer);
+ DownloadItemsData *download_items_end = buffer_begin(download_items_buffer);
+ download_items_it--;
+ download_items_end--;
+ for(; download_items_it != download_items_end; --download_items_it) {
+ if(transmission_add_torrent(download_items_it->link) != 0) {
+ fprintf(stderr, "Failed to add torrent: %s\n", download_items_it->link);
+ return 1;
+ }
+
+ /* TODO: Verify if the last torrent is immediately accessible or if it gets an old torrent... */
+ int id;
+ float percentage_done;
+ char torrent_name[256];
+ if(transmission_get_last_added_torrent(&id, &percentage_done, torrent_name) != 0) {
+ fprintf(stderr, "Failed to get added torrent name for torrent: %s\n", download_items_it->link);
+ return 1;
+ }
+
+ if(rss_update_latest(rss_tracked_dir, tracked_rss, download_items_it->title, download_items_it->link, torrent_name) != 0) {
+ fprintf(stderr, "Failed to update rss tracked data for %s\n", download_items_it->title);
+ return 1;
+ }
+
+ /* Show notification that download has started? */
+ }
+ return 0;
+}
+
+int sync_rss(TrackedRss *tracked_rss, char *rss_config_dir) {
+ /* TODO: This can be cached */
+ int rss_config_dir_len = strlen(rss_config_dir);
+
+ fprintf(stderr, "Syncing %s\n", tracked_rss->title);
+
+ int result = 0;
+ Buffer download_items_buffer;
+ buffer_init(&download_items_buffer);
+
+ Buffer rss_data_buffer;
+ buffer_init(&rss_data_buffer);
+ result = download_to_buffer(tracked_rss->link, &rss_data_buffer);
+ if(result != 0) {
+ fprintf(stderr, "Failed to download rss: %s\n", tracked_rss->link);
+ goto cleanup;
+ }
+
+ RssParseSyncData rss_parse_sync_data;
+ rss_parse_sync_data.tracked_rss = tracked_rss;
+ rss_parse_sync_data.download_items_buffer = &download_items_buffer;
+ char *rss_title = NULL;
+ result = parse_rss(rss_data_buffer.data, &rss_title, rss_parse_sync_callback, &rss_parse_sync_data);
+ if(result != 0) {
+ fprintf(stderr, "Failed to parse rss for url: %s\n", tracked_rss->link);
+ goto cleanup;
+ }
+
+ char *rss_tracked_dir = rss_config_dir;
+ strcat(rss_tracked_dir, "/tracked/");
+
+ result = add_torrents_in_reverse(&download_items_buffer, tracked_rss, rss_tracked_dir);
+ if(result != 0) {
+ fprintf(stderr, "Failed while adding torrents for url: %s\n", tracked_rss->link);
+ goto cleanup;
+ }
+
+ char updated[32];
+ sprintf(updated, "%ld", time(NULL));
+ strcat(rss_tracked_dir, tracked_rss->title);
+ result = file_overwrite_in_dir(rss_tracked_dir, "synced", updated, strlen(updated));
+ if(result != 0) {
+ fprintf(stderr, "Failed to update %s/synced\n", rss_tracked_dir);
+ goto cleanup;
+ }
+
+ cleanup:
+ rss_config_dir[rss_config_dir_len] = '\0';
+ buffer_deinit(&rss_data_buffer);
+ buffer_deinit(&download_items_buffer);
+ return result;
+}