From a15d7ad07a5865f2f51e85d5e4e049922c50deec Mon Sep 17 00:00:00 2001 From: dec05eba Date: Mon, 13 Jul 2020 18:09:15 +0200 Subject: Finish add_rss (still need add rss of filename though) --- src/fileutils.c | 55 +++++++++++++ src/fileutils.h | 10 +++ src/main.c | 4 +- src/rss.c | 252 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- src/rss.h | 2 +- 5 files changed, 310 insertions(+), 13 deletions(-) diff --git a/src/fileutils.c b/src/fileutils.c index ea57550..fe9ab88 100644 --- a/src/fileutils.c +++ b/src/fileutils.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -80,3 +81,57 @@ int create_directory_recursive(char *path) { } return 0; } + +int file_exists(const char *path) { + struct stat st; + return stat(path, &st); +} + +int create_lock_file(const char *path) { + int fd = open(path, O_CREAT | O_EXCL); + if(fd == -1) + return errno; + fsync(fd); + return close(fd); +} + +int file_overwrite(const char *filepath, const char *data, size_t size) { + FILE *file = fopen(filepath, "wb"); + if(!file) { + int err = -errno; + perror(filepath); + return err; + } + + unsigned long bytes_written = fwrite(data, 1, size, file); + if(bytes_written != size) { + fprintf(stderr, "Failed to write all bytes to file %s. Expected to write %zu bytes, only wrote %zu bytes\n", filepath, size, bytes_written); + fclose(file); + return -1; + } + + fclose(file); + return 0; +} + +int file_overwrite_in_dir(const char *dir, const char *filename, const char *data, size_t size) { + char filepath[PATH_MAX]; + const char *filepath_components[] = { dir, filename }; + path_join(filepath, filepath_components, 2); + return file_overwrite(filepath, data, size); +} + +void path_join(char *output, const char **components, int num_components) { + int offset = 0; + for(int i = 0; i < num_components; ++i) { + if(i > 0) { + output[offset] = '/'; + ++offset; + } + + int component_len = strlen(components[i]); + memcpy(output + offset, components[i], component_len); + offset += component_len; + } + output[offset] = '\0'; +} diff --git a/src/fileutils.h b/src/fileutils.h index 6c514bb..2b9906c 100644 --- a/src/fileutils.h +++ b/src/fileutils.h @@ -1,10 +1,20 @@ #ifndef FILEUTILS_H #define FILEUTILS_H +#include + const char* get_home_dir(); /* Returns 0 on success */ int file_get_content(const char *filepath, char **data, long *size); /* Returns 0 on success (if the directories are created or if the directories already exists) */ int create_directory_recursive(char *path); +/* Returns 0 if the file exists */ +int file_exists(const char *path); +/* Returns 0 on success */ +int create_lock_file(const char *path); + +int file_overwrite(const char *filepath, const char *data, size_t size); +int file_overwrite_in_dir(const char *dir, const char *filename, const char *data, size_t size); +void path_join(char *output, const char **components, int num_components); #endif diff --git a/src/main.c b/src/main.c index bae9644..11b2a5d 100644 --- a/src/main.c +++ b/src/main.c @@ -242,7 +242,9 @@ static void command_add(int argc, char **argv, char *rss_config_dir, char *html_ fprintf(stderr, "Failed to create %s, error: %s\n", rss_config_dir, strerror(res)); exit(1); } - add_rss(media_name, media_url, rss_config_dir, start_after); + + if(add_rss(media_name, media_url, rss_config_dir, start_after) != 0) + exit(1); } else if(strcmp(media_type, "html") == 0) { (void)html_config_dir; } else { diff --git a/src/rss.c b/src/rss.c index ba2f74d..23ccf89 100644 --- a/src/rss.c +++ b/src/rss.c @@ -1,9 +1,14 @@ #include "rss.h" #include "download.h" +#include "stringutils.h" +#include "fileutils.h" #include "buffer.h" #include #include #include +#include +#include +#include "json.h" static int is_alpha_lowercase(char c) { return c >= 'a' && c <= 'z'; @@ -72,7 +77,7 @@ static char* string_substr_before_tag_end(char *str, const char *tag) { return tag_p; } -typedef void (*RssParseCallback)(const char *title, const char *link, void *userdata); +typedef int (*RssParseCallback)(char *title, char *link, void *userdata); static int parse_rss(char *str, char *rss_title_str, int rss_title_str_size, RssParseCallback parse_callback, void *userdata) { char *channel_start = strstr(str, ""); @@ -133,7 +138,8 @@ static int parse_rss(char *str, char *rss_title_str, int rss_title_str_size, Rss xml_unescape(item_title, title_str, sizeof(title_str)); xml_unescape(item_link, link_str, sizeof(link_str)); - parse_callback(title_str, link_str, userdata); + if(parse_callback(title_str, link_str, userdata) != 0) + return 0; item = strstr(item_end + 7, ""); if(!item) @@ -141,12 +147,155 @@ static int parse_rss(char *str, char *rss_title_str, int rss_title_str_size, Rss } } -static void rss_parse_callback(const char *title, const char *link, void *userdata) { - (void)userdata; - fprintf(stderr, "title: |%s|, link: |%s|\n", title, link); +typedef struct { + const char *start_after; + int found_start_after; + const char *start_after_url; +} RssParseUserdata; + +static int rss_parse_callback(char *title, char *link, void *userdata) { + RssParseUserdata *rss_parse_userdata = userdata; + if(rss_parse_userdata->start_after && strcmp(rss_parse_userdata->start_after, title) == 0) { + rss_parse_userdata->found_start_after = 1; + rss_parse_userdata->start_after_url = link; + return 1; + } + return 0; +} + +static void create_json_string(struct json_string_s *json_result, const char *str, int len) { + json_result->string = str; + json_result->string_size = len; +} + +static void init_json_value_str(struct json_value_s *json_value, struct json_string_s *json_str) { + json_value->payload = json_str; + json_value->type = json_type_string; +} + +static int write_rss_json_to_file(const char *dir, const char *filename, const char *url, const char *updated, const char *start_after, const char *start_after_url) { + struct json_string_s title_json_key; + create_json_string(&title_json_key, "title", 5); + + struct json_string_s title_json_value_str; + create_json_string(&title_json_value_str, start_after, start_after ? strlen(start_after) : 0); + struct json_value_s title_json_value; + init_json_value_str(&title_json_value, &title_json_value_str); + + struct json_string_s time_json_key; + create_json_string(&time_json_key, "time", 4); + + struct json_string_s time_value_str; + create_json_string(&time_value_str, updated, updated ? strlen(updated) : 0); + struct json_value_s time_json_value; + init_json_value_str(&time_json_value, &time_value_str); + + struct json_string_s url_json_key; + create_json_string(&url_json_key, "url", 3); + + struct json_string_s url_value_str; + create_json_string(&url_value_str, start_after_url, start_after_url ? strlen(start_after_url) : 0); + struct json_value_s url_json_value; + init_json_value_str(&url_json_value, &url_value_str); + + struct json_string_s link_json_key; + create_json_string(&link_json_key, "link", 4); + + struct json_string_s link_json_value_str; + create_json_string(&link_json_value_str, url, strlen(url)); + struct json_value_s link_json_value; + init_json_value_str(&link_json_value, &link_json_value_str); + + struct json_string_s updated_json_key; + create_json_string(&updated_json_key, "updated", 7); + + struct json_string_s updated_json_value_str; + create_json_string(&updated_json_value_str, updated, strlen(updated)); + struct json_value_s updated_json_value; + init_json_value_str(&updated_json_value, &updated_json_value_str); + + struct json_string_s downloaded_json_key; + create_json_string(&downloaded_json_key, "downloaded", 10); + + struct json_object_s downloaded_json; + downloaded_json.start = NULL; + downloaded_json.length = 0; + + struct json_value_s downloaded_json_value; + downloaded_json_value.payload = &downloaded_json; + downloaded_json_value.type = json_type_object; + + struct json_object_element_s downloaded_title_element; + downloaded_title_element.name = &title_json_key; + downloaded_title_element.value = &title_json_value; + + struct json_object_element_s downloaded_time_element; + downloaded_time_element.name = &time_json_key; + downloaded_time_element.value = &time_json_value; + + struct json_object_element_s downloaded_url_element; + downloaded_url_element.name = &url_json_key; + downloaded_url_element.value = &url_json_value; + + downloaded_title_element.next = &downloaded_time_element; + downloaded_time_element.next = &downloaded_url_element; + downloaded_url_element.next = NULL; + + struct json_array_s downloaded_json_array; + downloaded_json_array.start = NULL; + downloaded_json_array.length = 0; + struct json_array_element_s downloaded_json_array_element; + + if(start_after) { + downloaded_json.start = &downloaded_title_element; + downloaded_json.length = 3; + + downloaded_json_array_element.value = &downloaded_json_value; + downloaded_json_array_element.next = NULL; + downloaded_json_array.start = &downloaded_json_array_element; + downloaded_json_array.length = 1; + } + + struct json_value_s downloaded_json_array_value; + downloaded_json_array_value.payload = &downloaded_json_array; + downloaded_json_array_value.type = json_type_array; + + struct json_object_s json_root; + json_root.length = 3; + + struct json_object_element_s link_element; + link_element.name = &link_json_key; + link_element.value = &link_json_value; + + struct json_object_element_s updated_element; + updated_element.name = &updated_json_key; + updated_element.value = &updated_json_value; + + struct json_object_element_s downloaded_element; + downloaded_element.name = &downloaded_json_key; + downloaded_element.value = &downloaded_json_array_value; + + link_element.next = &updated_element; + updated_element.next = &downloaded_element; + downloaded_element.next = NULL; + json_root.start = &link_element; + + struct json_value_s json_root_value; + json_root_value.payload = &json_root; + json_root_value.type = json_type_object; + size_t json_body_size = 0; + char *json_body_str = json_write_pretty(&json_root_value, " ", "\n", &json_body_size); + if(!json_body_str || json_body_size == 0) { + fprintf(stderr, "Failed to write json data to rss file %s/%s\n", dir, filename); + return -1; + } + + int result = file_overwrite_in_dir(dir, filename, json_body_str, json_body_size - 1); + free(json_body_str); + return result; } -int add_rss(const char *name, const char *url, const char *rss_config_dir, const char *start_after) { +int add_rss(const char *name, const char *url, char *rss_config_dir, const char *start_after) { (void)name; (void)rss_config_dir; (void)start_after; @@ -161,17 +310,98 @@ int add_rss(const char *name, const char *url, const char *rss_config_dir, const goto cleanup; } - char rss_title[256]; - res = parse_rss(buffer.data, rss_title, sizeof(rss_title), rss_parse_callback, NULL); + RssParseUserdata rss_parse_userdata; + rss_parse_userdata.start_after = start_after; + rss_parse_userdata.found_start_after = 0; + rss_parse_userdata.start_after_url = NULL; + + /* TODO: What if rss title is longer than this? */ + char rss_title[250]; + res = parse_rss(buffer.data, rss_title, sizeof(rss_title), rss_parse_callback, &rss_parse_userdata); if(res != 0) { fprintf(stderr, "Failed to parse rss for url: %s\n", url); result = res; goto cleanup; } - fprintf(stderr, "rss title: |%s|\n", rss_title); - /*if(!name) - name = rss_title*/ + if(start_after && !rss_parse_userdata.found_start_after) { + fprintf(stderr, "Failed to find %s in rss %s", start_after, url); + result = -1; + goto cleanup; + } + + /* TODO: Add (add rss ) here */ + + if(!name) { + if(rss_title[0] == '\0') { + fprintf(stderr, "Failed to find rss title and --name was not provided\n"); + result = -1; + goto cleanup; + } + + string_replace(rss_title, '/', '_'); + char *stripped_rss_title = strip(rss_title); + name = stripped_rss_title; + } + + char *rss_tracked_dir = rss_config_dir; + strcat(rss_tracked_dir, "/tracked/"); + strcat(rss_tracked_dir, name); + + if(file_exists(rss_tracked_dir) == 0) { + fprintf(stderr, "You are already tracking %s\n", url); + result = -1; + goto cleanup; + } + + res = create_directory_recursive(rss_tracked_dir); + if(res != 0) { + fprintf(stderr, "Failed to create %s, error: %s\n", rss_tracked_dir, strerror(res)); + result = res; + goto cleanup; + } + + /* + Create an ".in_progress" file to prevent periodic sync from reading rss data + before we have finished adding all the data. + */ + char in_progress_filepath[PATH_MAX]; + strcpy(in_progress_filepath, rss_tracked_dir); + strcat(in_progress_filepath, "/.in_progress"); + res = create_lock_file(in_progress_filepath); + if(res != 0) { + fprintf(stderr, "Failed to create %s/.in_progress\n", rss_tracked_dir); + result = res; + goto cleanup; + } + + res = file_overwrite_in_dir(rss_tracked_dir, "link", url, strlen(url)); + if(res != 0) { + fprintf(stderr, "Failed to create %s/link\n", rss_tracked_dir); + remove(rss_tracked_dir); + result = res; + goto cleanup; + } + + char updated[32]; + sprintf(updated, "%ld", time(NULL)); + res = file_overwrite_in_dir(rss_tracked_dir, "updated", updated, strlen(updated)); + if(res != 0) { + fprintf(stderr, "Failed to create %s/updated\n", rss_tracked_dir); + remove(rss_tracked_dir); + result = res; + goto cleanup; + } + + res = write_rss_json_to_file(rss_tracked_dir, "data", url, updated, start_after, rss_parse_userdata.start_after_url); + if(res != 0) { + fprintf(stderr, "Failed to create %s/data\n", rss_tracked_dir); + remove(rss_tracked_dir); + result = res; + goto cleanup; + } + + remove(in_progress_filepath); cleanup: buffer_deinit(&buffer); diff --git a/src/rss.h b/src/rss.h index 9c584ae..b33231d 100644 --- a/src/rss.h +++ b/src/rss.h @@ -1,6 +1,6 @@ #ifndef RSS_H #define RSS_H -int add_rss(const char *name, const char *url, const char *rss_config_dir, const char *start_after); +int add_rss(const char *name, const char *url, char *rss_config_dir, const char *start_after); #endif -- cgit v1.2.3