#include "rss.h" #include "download.h" #include "buffer.h" #include #include #include static int is_alpha_lowercase(char c) { return c >= 'a' && c <= 'z'; } static int is_digit(char c) { return c >= '0' && c <= '9'; } static char* get_amp_end(char *str) { for(;;) { char c = *str; if(is_alpha_lowercase(c) || is_digit(c) || c == '#') ++str; else if(c == ';' || c == '\0') break; } return str; } static void xml_unescape(char *str, char *result, int result_length) { int index = 0; for(;;) { char c = *str; if(c == '&') { char *amp_end = get_amp_end(str + 1); char prev_char = *amp_end; *amp_end = '\0'; if(str[1] == '#') { result[index++] = atoi(str + 2); } else { if(strcmp(str + 1, "amp") == 0) result[index++] = '&'; else if(strcmp(str + 1, "lt") == 0) result[index++] = '<'; else if(strcmp(str + 1, "gt") == 0) result[index++] = '>'; else if(strcmp(str + 1, "apos") == 0) result[index++] = '\''; } *amp_end = prev_char; str = amp_end; if(prev_char != '\0') ++str; } else if(c == '\0') { result[index] = '\0'; break; } else { result[index++] = c; ++str; } if(index == result_length - 1) { result[index] = '\0'; break; } } } static char* string_substr_before_tag_end(char *str, const char *tag) { char *tag_p = strstr(str, tag); if(tag_p) *tag_p = '\0'; return tag_p; } typedef void (*RssParseCallback)(const char *title, const char *link, void *userdata); static int parse_rss(char *str, char *rss_title_str, int rss_title_str_size, RssParseCallback parse_callback, void *userdata) { char *channel_start = strstr(str, ""); if(!channel_start) return 1; char *after_channel = channel_start + 9; char *rss_title = strstr(after_channel, ""); char *first_item = strstr(after_channel, "<item>"); if(!first_item) { rss_title += 7; string_substr_before_tag_end(rss_title, ""); xml_unescape(rss_title, rss_title_str, rss_title_str_size); return 0; } if(rss_title < first_item) { rss_title += 7; string_substr_before_tag_end(rss_title, ""); xml_unescape(rss_title, rss_title_str, rss_title_str_size); } else { rss_title_str[0] = '\0'; } char title_str[256]; char link_str[2084]; char *item = first_item; for(;;) { char *after_first_item = item + 6; char *item_end = strstr(after_first_item, ""); if(!item_end) return 1; char *item_title = strstr(after_first_item, ""); if(!item_title) return 1; if(item_title >= item_end) return 1; item_title += 7; char *after_title = string_substr_before_tag_end(item_title, ""); if(!after_title) return 1; after_title += 8; char *item_link = strstr(after_title, ""); if(!item_link) return 1; if(item_link >= item_end) return 1; item_link += 6; string_substr_before_tag_end(item_link, ""); xml_unescape(item_title, title_str, sizeof(title_str)); xml_unescape(item_link, link_str, sizeof(link_str)); parse_callback(title_str, link_str, userdata); item = strstr(item_end + 7, ""); if(!item) return 0; } } static void rss_parse_callback(const char *title, const char *link, void *userdata) { (void)userdata; fprintf(stderr, "title: |%s|, link: |%s|\n", title, link); } int add_rss(const char *name, const char *url, const char *rss_config_dir, const char *start_after) { (void)name; (void)rss_config_dir; (void)start_after; int result = 0; Buffer buffer; buffer_init(&buffer); int res = download_to_buffer(url, &buffer); if(res != 0) { fprintf(stderr, "Failed to download rss: %s\n", url); result = res; goto cleanup; } char rss_title[256]; res = parse_rss(buffer.data, rss_title, sizeof(rss_title), rss_parse_callback, NULL); if(res != 0) { fprintf(stderr, "Failed to parse rss for url: %s\n", url); result = res; goto cleanup; } fprintf(stderr, "rss title: |%s|\n", rss_title); cleanup: buffer_deinit(&buffer); return result; }