#include "rss.h" #include "download.h" #include "stringutils.h" #include "fileutils.h" #include "buffer.h" #include #include #include #include #include #include "json.h" static int is_alpha_lowercase(char c) { return c >= 'a' && c <= 'z'; } static int is_digit(char c) { return c >= '0' && c <= '9'; } static char* get_amp_end(char *str) { for(;;) { char c = *str; if(is_alpha_lowercase(c) || is_digit(c) || c == '#') ++str; else if(c == ';' || c == '\0') break; } return str; } static void xml_unescape(char *str, char *result, int result_length) { int index = 0; for(;;) { char c = *str; if(c == '&') { char *amp_end = get_amp_end(str + 1); char prev_char = *amp_end; *amp_end = '\0'; if(str[1] == '#') { result[index++] = atoi(str + 2); } else { if(strcmp(str + 1, "amp") == 0) result[index++] = '&'; else if(strcmp(str + 1, "lt") == 0) result[index++] = '<'; else if(strcmp(str + 1, "gt") == 0) result[index++] = '>'; else if(strcmp(str + 1, "apos") == 0) result[index++] = '\''; } *amp_end = prev_char; str = amp_end; if(prev_char != '\0') ++str; } else if(c == '\0') { result[index] = '\0'; break; } else { result[index++] = c; ++str; } if(index == result_length - 1) { result[index] = '\0'; break; } } } static char* string_substr_before_tag_end(char *str, const char *tag) { char *tag_p = strstr(str, tag); if(tag_p) *tag_p = '\0'; return tag_p; } typedef int (*RssParseCallback)(char *title, char *link, void *userdata); static int parse_rss(char *str, char *rss_title_str, int rss_title_str_size, RssParseCallback parse_callback, void *userdata) { char *channel_start = strstr(str, ""); if(!channel_start) return 1; char *after_channel = channel_start + 9; char *rss_title = strstr(after_channel, ""); char *first_item = strstr(after_channel, "<item>"); if(!first_item) { rss_title += 7; string_substr_before_tag_end(rss_title, ""); xml_unescape(rss_title, rss_title_str, rss_title_str_size); return 0; } if(rss_title < first_item) { rss_title += 7; string_substr_before_tag_end(rss_title, ""); xml_unescape(rss_title, rss_title_str, rss_title_str_size); } else { rss_title_str[0] = '\0'; } char title_str[256]; char link_str[2084]; char *item = first_item; for(;;) { char *after_first_item = item + 6; char *item_end = strstr(after_first_item, ""); if(!item_end) return 1; char *item_title = strstr(after_first_item, ""); if(!item_title) return 1; if(item_title >= item_end) return 1; item_title += 7; char *after_title = string_substr_before_tag_end(item_title, ""); if(!after_title) return 1; after_title += 8; char *item_link = strstr(after_title, ""); if(!item_link) return 1; if(item_link >= item_end) return 1; item_link += 6; string_substr_before_tag_end(item_link, ""); xml_unescape(item_title, title_str, sizeof(title_str)); xml_unescape(item_link, link_str, sizeof(link_str)); if(parse_callback(title_str, link_str, userdata) != 0) return 0; item = strstr(item_end + 7, ""); if(!item) return 0; } } typedef struct { const char *start_after; int found_start_after; const char *start_after_url; } RssParseUserdata; static int rss_parse_callback(char *title, char *link, void *userdata) { RssParseUserdata *rss_parse_userdata = userdata; if(rss_parse_userdata->start_after && strcmp(rss_parse_userdata->start_after, title) == 0) { rss_parse_userdata->found_start_after = 1; rss_parse_userdata->start_after_url = link; return 1; } return 0; } static void create_json_string(struct json_string_s *json_result, const char *str, int len) { json_result->string = str; json_result->string_size = len; } static void init_json_value_str(struct json_value_s *json_value, struct json_string_s *json_str) { json_value->payload = json_str; json_value->type = json_type_string; } static int write_rss_json_to_file(const char *dir, const char *filename, const char *url, const char *updated, const char *start_after, const char *start_after_url) { struct json_string_s title_json_key; create_json_string(&title_json_key, "title", 5); struct json_string_s title_json_value_str; create_json_string(&title_json_value_str, start_after, start_after ? strlen(start_after) : 0); struct json_value_s title_json_value; init_json_value_str(&title_json_value, &title_json_value_str); struct json_string_s time_json_key; create_json_string(&time_json_key, "time", 4); struct json_string_s time_value_str; create_json_string(&time_value_str, updated, updated ? strlen(updated) : 0); struct json_value_s time_json_value; init_json_value_str(&time_json_value, &time_value_str); struct json_string_s url_json_key; create_json_string(&url_json_key, "url", 3); struct json_string_s url_value_str; create_json_string(&url_value_str, start_after_url, start_after_url ? strlen(start_after_url) : 0); struct json_value_s url_json_value; init_json_value_str(&url_json_value, &url_value_str); struct json_string_s link_json_key; create_json_string(&link_json_key, "link", 4); struct json_string_s link_json_value_str; create_json_string(&link_json_value_str, url, strlen(url)); struct json_value_s link_json_value; init_json_value_str(&link_json_value, &link_json_value_str); struct json_string_s updated_json_key; create_json_string(&updated_json_key, "updated", 7); struct json_string_s updated_json_value_str; create_json_string(&updated_json_value_str, updated, strlen(updated)); struct json_value_s updated_json_value; init_json_value_str(&updated_json_value, &updated_json_value_str); struct json_string_s downloaded_json_key; create_json_string(&downloaded_json_key, "downloaded", 10); struct json_object_s downloaded_json; downloaded_json.start = NULL; downloaded_json.length = 0; struct json_value_s downloaded_json_value; downloaded_json_value.payload = &downloaded_json; downloaded_json_value.type = json_type_object; struct json_object_element_s downloaded_title_element; downloaded_title_element.name = &title_json_key; downloaded_title_element.value = &title_json_value; struct json_object_element_s downloaded_time_element; downloaded_time_element.name = &time_json_key; downloaded_time_element.value = &time_json_value; struct json_object_element_s downloaded_url_element; downloaded_url_element.name = &url_json_key; downloaded_url_element.value = &url_json_value; downloaded_title_element.next = &downloaded_time_element; downloaded_time_element.next = &downloaded_url_element; downloaded_url_element.next = NULL; struct json_array_s downloaded_json_array; downloaded_json_array.start = NULL; downloaded_json_array.length = 0; struct json_array_element_s downloaded_json_array_element; if(start_after) { downloaded_json.start = &downloaded_title_element; downloaded_json.length = 3; downloaded_json_array_element.value = &downloaded_json_value; downloaded_json_array_element.next = NULL; downloaded_json_array.start = &downloaded_json_array_element; downloaded_json_array.length = 1; } struct json_value_s downloaded_json_array_value; downloaded_json_array_value.payload = &downloaded_json_array; downloaded_json_array_value.type = json_type_array; struct json_object_s json_root; json_root.length = 3; struct json_object_element_s link_element; link_element.name = &link_json_key; link_element.value = &link_json_value; struct json_object_element_s updated_element; updated_element.name = &updated_json_key; updated_element.value = &updated_json_value; struct json_object_element_s downloaded_element; downloaded_element.name = &downloaded_json_key; downloaded_element.value = &downloaded_json_array_value; link_element.next = &updated_element; updated_element.next = &downloaded_element; downloaded_element.next = NULL; json_root.start = &link_element; struct json_value_s json_root_value; json_root_value.payload = &json_root; json_root_value.type = json_type_object; size_t json_body_size = 0; char *json_body_str = json_write_pretty(&json_root_value, " ", "\n", &json_body_size); if(!json_body_str || json_body_size == 0) { fprintf(stderr, "Failed to write json data to rss file %s/%s\n", dir, filename); return -1; } int result = file_overwrite_in_dir(dir, filename, json_body_str, json_body_size - 1); free(json_body_str); return result; } int add_rss(const char *name, const char *url, char *rss_config_dir, const char *start_after) { (void)name; (void)rss_config_dir; (void)start_after; int result = 0; Buffer buffer; buffer_init(&buffer); int res = download_to_buffer(url, &buffer); if(res != 0) { fprintf(stderr, "Failed to download rss: %s\n", url); result = res; goto cleanup; } RssParseUserdata rss_parse_userdata; rss_parse_userdata.start_after = start_after; rss_parse_userdata.found_start_after = 0; rss_parse_userdata.start_after_url = NULL; /* TODO: What if rss title is longer than this? */ char rss_title[250]; res = parse_rss(buffer.data, rss_title, sizeof(rss_title), rss_parse_callback, &rss_parse_userdata); if(res != 0) { fprintf(stderr, "Failed to parse rss for url: %s\n", url); result = res; goto cleanup; } if(start_after && !rss_parse_userdata.found_start_after) { fprintf(stderr, "Failed to find %s in rss %s", start_after, url); result = -1; goto cleanup; } /* TODO: Add (add rss ) here */ if(!name) { if(rss_title[0] == '\0') { fprintf(stderr, "Failed to find rss title and --name was not provided\n"); result = -1; goto cleanup; } string_replace(rss_title, '/', '_'); char *stripped_rss_title = strip(rss_title); name = stripped_rss_title; } char *rss_tracked_dir = rss_config_dir; strcat(rss_tracked_dir, "/tracked/"); strcat(rss_tracked_dir, name); if(file_exists(rss_tracked_dir) == 0) { fprintf(stderr, "You are already tracking %s\n", url); result = -1; goto cleanup; } res = create_directory_recursive(rss_tracked_dir); if(res != 0) { fprintf(stderr, "Failed to create %s, error: %s\n", rss_tracked_dir, strerror(res)); result = res; goto cleanup; } /* Create an ".in_progress" file to prevent periodic sync from reading rss data before we have finished adding all the data. */ char in_progress_filepath[PATH_MAX]; strcpy(in_progress_filepath, rss_tracked_dir); strcat(in_progress_filepath, "/.in_progress"); res = create_lock_file(in_progress_filepath); if(res != 0) { fprintf(stderr, "Failed to create %s/.in_progress\n", rss_tracked_dir); result = res; goto cleanup; } res = file_overwrite_in_dir(rss_tracked_dir, "link", url, strlen(url)); if(res != 0) { fprintf(stderr, "Failed to create %s/link\n", rss_tracked_dir); remove(rss_tracked_dir); result = res; goto cleanup; } char updated[32]; sprintf(updated, "%ld", time(NULL)); res = file_overwrite_in_dir(rss_tracked_dir, "updated", updated, strlen(updated)); if(res != 0) { fprintf(stderr, "Failed to create %s/updated\n", rss_tracked_dir); remove(rss_tracked_dir); result = res; goto cleanup; } res = write_rss_json_to_file(rss_tracked_dir, "data", url, updated, start_after, rss_parse_userdata.start_after_url); if(res != 0) { fprintf(stderr, "Failed to create %s/data\n", rss_tracked_dir); remove(rss_tracked_dir); result = res; goto cleanup; } remove(in_progress_filepath); cleanup: buffer_deinit(&buffer); return result; }