#include "rss.h" #include "download.h" #include "stringutils.h" #include "fileutils.h" #include "buffer.h" #include "rss_html_common.h" #include #include #include #include #include static int is_alpha_lowercase(char c) { return c >= 'a' && c <= 'z'; } static int is_digit(char c) { return c >= '0' && c <= '9'; } static char* get_amp_end(char *str) { for(;;) { char c = *str; if(is_alpha_lowercase(c) || is_digit(c) || c == '#') ++str; else if(c == ';' || c == '\0') break; } return str; } static void xml_unescape(char *str, char *result, int result_length) { int index = 0; for(;;) { char c = *str; if(c == '&') { char *amp_end = get_amp_end(str + 1); char prev_char = *amp_end; *amp_end = '\0'; if(str[1] == '#') { result[index++] = atoi(str + 2); } else { if(strcmp(str + 1, "amp") == 0) result[index++] = '&'; else if(strcmp(str + 1, "lt") == 0) result[index++] = '<'; else if(strcmp(str + 1, "gt") == 0) result[index++] = '>'; else if(strcmp(str + 1, "apos") == 0) result[index++] = '\''; } *amp_end = prev_char; str = amp_end; if(prev_char != '\0') ++str; } else if(c == '\0') { result[index] = '\0'; break; } else { result[index++] = c; ++str; } if(index == result_length - 1) { result[index] = '\0'; break; } } } static char* string_substr_before_tag_end(char *str, const char *tag) { char *tag_p = strstr(str, tag); if(tag_p) *tag_p = '\0'; return tag_p; } typedef int (*RssParseCallback)(char *title, char *link, void *userdata); static int parse_rss(char *str, char *rss_title_str, int rss_title_str_size, RssParseCallback parse_callback, void *userdata) { char *channel_start = strstr(str, ""); if(!channel_start) return 1; char *after_channel = channel_start + 9; char *rss_title = strstr(after_channel, ""); char *first_item = strstr(after_channel, "<item>"); if(!first_item) { rss_title += 7; string_substr_before_tag_end(rss_title, ""); xml_unescape(rss_title, rss_title_str, rss_title_str_size); return 0; } if(rss_title < first_item) { rss_title += 7; string_substr_before_tag_end(rss_title, ""); xml_unescape(rss_title, rss_title_str, rss_title_str_size); } else { rss_title_str[0] = '\0'; } char title_str[256]; char link_str[2084]; char *item = first_item; for(;;) { char *after_first_item = item + 6; char *item_end = strstr(after_first_item, ""); if(!item_end) return 1; char *item_title = strstr(after_first_item, ""); if(!item_title) return 1; if(item_title >= item_end) return 1; item_title += 7; char *after_title = string_substr_before_tag_end(item_title, ""); if(!after_title) return 1; after_title += 8; char *item_link = strstr(after_title, ""); if(!item_link) return 1; if(item_link >= item_end) return 1; item_link += 6; string_substr_before_tag_end(item_link, ""); xml_unescape(item_title, title_str, sizeof(title_str)); xml_unescape(item_link, link_str, sizeof(link_str)); if(parse_callback(title_str, link_str, userdata) != 0) return 0; item = strstr(item_end + 7, ""); if(!item) return 0; } } typedef struct { const char *start_after; int found_start_after; const char *start_after_url; } RssParseUserdata; static int rss_parse_callback(char *title, char *link, void *userdata) { RssParseUserdata *rss_parse_userdata = userdata; if(rss_parse_userdata->start_after && strcmp(rss_parse_userdata->start_after, title) == 0) { rss_parse_userdata->found_start_after = 1; rss_parse_userdata->start_after_url = link; return 1; } return 0; } int add_rss(const char *name, const char *url, char *rss_config_dir, const char *start_after) { int result = 0; Buffer buffer; buffer_init(&buffer); result = download_to_buffer(url, &buffer); if(result != 0) { fprintf(stderr, "Failed to download rss: %s\n", url); goto cleanup; } RssParseUserdata rss_parse_userdata; rss_parse_userdata.start_after = start_after; rss_parse_userdata.found_start_after = 0; rss_parse_userdata.start_after_url = NULL; /* TODO: What if rss title is longer than this? */ char rss_title[250]; result = parse_rss(buffer.data, rss_title, sizeof(rss_title), rss_parse_callback, &rss_parse_userdata); if(result != 0) { fprintf(stderr, "Failed to parse rss for url: %s\n", url); goto cleanup; } if(start_after && !rss_parse_userdata.found_start_after) { fprintf(stderr, "Failed to find %s in rss %s", start_after, url); result = -1; goto cleanup; } /* TODO: Add (add rss ) here */ if(!name) { if(rss_title[0] == '\0') { fprintf(stderr, "Failed to find rss title and --name was not provided\n"); result = -1; goto cleanup; } string_replace(rss_title, '/', '_'); char *stripped_rss_title = strip(rss_title); name = stripped_rss_title; } char *rss_tracked_dir = rss_config_dir; strcat(rss_tracked_dir, "/tracked/"); strcat(rss_tracked_dir, name); if(file_exists(rss_tracked_dir) == 0) { fprintf(stderr, "You are already tracking %s\n", url); result = -1; goto cleanup; } result = create_directory_recursive(rss_tracked_dir); if(result != 0) { fprintf(stderr, "Failed to create %s, error: %s\n", rss_tracked_dir, strerror(result)); goto cleanup; } /* Create an ".in_progress" file to prevent periodic sync from reading rss data before we have finished adding all the data. */ char in_progress_filepath[PATH_MAX]; strcpy(in_progress_filepath, rss_tracked_dir); strcat(in_progress_filepath, "/.in_progress"); result = create_lock_file(in_progress_filepath); if(result != 0) { fprintf(stderr, "Failed to create %s/.in_progress\n", rss_tracked_dir); goto cleanup; } result = file_overwrite_in_dir(rss_tracked_dir, "link", url, strlen(url)); if(result != 0) { fprintf(stderr, "Failed to create %s/link\n", rss_tracked_dir); remove(rss_tracked_dir); goto cleanup; } char updated[32]; sprintf(updated, "%ld", time(NULL)); result = file_overwrite_in_dir(rss_tracked_dir, "updated", updated, strlen(updated)); if(result != 0) { fprintf(stderr, "Failed to create %s/updated\n", rss_tracked_dir); remove(rss_tracked_dir); goto cleanup; } result = write_plugin_json_to_file(rss_tracked_dir, "data", url, updated, start_after, rss_parse_userdata.start_after_url, NULL); if(result != 0) { fprintf(stderr, "Failed to create %s/data\n", rss_tracked_dir); remove(rss_tracked_dir); goto cleanup; } remove(in_progress_filepath); cleanup: buffer_deinit(&buffer); return result; }