aboutsummaryrefslogtreecommitdiff
path: root/src/rss.c
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2020-07-13 15:59:30 +0200
committerdec05eba <dec05eba@protonmail.com>2020-07-13 15:59:30 +0200
commitae0520e57267dbd866fc8cd25f66f4e6af2ac118 (patch)
tree22788688f1b588c3ad00c1ce3fe13da68b3a9382 /src/rss.c
parenta1ca82847eb356c6b85ada2ac11f38d98f6e085e (diff)
Move c files into src directory
Diffstat (limited to 'src/rss.c')
-rw-r--r--src/rss.c176
1 files changed, 176 insertions, 0 deletions
diff --git a/src/rss.c b/src/rss.c
new file mode 100644
index 0000000..fdb932c
--- /dev/null
+++ b/src/rss.c
@@ -0,0 +1,176 @@
+#include "rss.h"
+#include "download.h"
+#include "buffer.h"
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static int is_alpha_lowercase(char c) {
+ return c >= 'a' && c <= 'z';
+}
+
+static int is_digit(char c) {
+ return c >= '0' && c <= '9';
+}
+
+static char* get_amp_end(char *str) {
+ for(;;) {
+ char c = *str;
+ if(is_alpha_lowercase(c) || is_digit(c) || c == '#')
+ ++str;
+ else if(c == ';' || c == '\0')
+ break;
+ }
+ return str;
+}
+
+static void xml_unescape(char *str, char *result, int result_length) {
+ int index = 0;
+ for(;;) {
+ char c = *str;
+ if(c == '&') {
+ char *amp_end = get_amp_end(str + 1);
+ char prev_char = *amp_end;
+ *amp_end = '\0';
+
+ if(str[1] == '#') {
+ result[index++] = atoi(str + 2);
+ } else {
+ if(strcmp(str + 1, "amp") == 0)
+ result[index++] = '&';
+ else if(strcmp(str + 1, "lt") == 0)
+ result[index++] = '<';
+ else if(strcmp(str + 1, "gt") == 0)
+ result[index++] = '>';
+ else if(strcmp(str + 1, "apos") == 0)
+ result[index++] = '\'';
+ }
+
+ *amp_end = prev_char;
+ str = amp_end;
+ if(prev_char != '\0')
+ ++str;
+ } else if(c == '\0') {
+ result[index] = '\0';
+ break;
+ } else {
+ result[index++] = c;
+ ++str;
+ }
+
+ if(index == result_length - 1) {
+ result[index] = '\0';
+ break;
+ }
+ }
+}
+
+static char* string_substr_before_tag_end(char *str, const char *tag) {
+ char *tag_p = strstr(str, tag);
+ if(tag_p)
+ *tag_p = '\0';
+ return tag_p;
+}
+
+typedef void (*RssParseCallback)(const char *title, const char *link, void *userdata);
+
+static int parse_rss(char *str, char *rss_title_str, int rss_title_str_size, RssParseCallback parse_callback, void *userdata) {
+ char *channel_start = strstr(str, "<channel>");
+ if(!channel_start)
+ return 1;
+
+ char *after_channel = channel_start + 9;
+
+ char *rss_title = strstr(after_channel, "<title>");
+ char *first_item = strstr(after_channel, "<item>");
+ if(!first_item) {
+ rss_title += 7;
+ string_substr_before_tag_end(rss_title, "</title>");
+ xml_unescape(rss_title, rss_title_str, rss_title_str_size);
+ return 0;
+ }
+
+ if(rss_title < first_item) {
+ rss_title += 7;
+ string_substr_before_tag_end(rss_title, "</title>");
+ xml_unescape(rss_title, rss_title_str, rss_title_str_size);
+ } else {
+ rss_title_str[0] = '\0';
+ }
+
+ char title_str[256];
+ char link_str[2084];
+
+ char *item = first_item;
+ for(;;) {
+ char *after_first_item = item + 6;
+ char *item_end = strstr(after_first_item, "</item>");
+ if(!item_end)
+ return 1;
+
+ char *item_title = strstr(after_first_item, "<title>");
+ if(!item_title)
+ return 1;
+
+ if(item_title >= item_end)
+ return 1;
+
+ item_title += 7;
+ char *after_title = string_substr_before_tag_end(item_title, "</title>");
+ if(!after_title)
+ return 1;
+
+ after_title += 8;
+ char *item_link = strstr(after_title, "<link>");
+ if(!item_link)
+ return 1;
+
+ if(item_link >= item_end)
+ return 1;
+
+ item_link += 6;
+ string_substr_before_tag_end(item_link, "</link>");
+
+ xml_unescape(item_title, title_str, sizeof(title_str));
+ xml_unescape(item_link, link_str, sizeof(link_str));
+ parse_callback(title_str, link_str, userdata);
+
+ item = strstr(item_end + 7, "<item>");
+ if(!item)
+ return 0;
+ }
+}
+
+static void rss_parse_callback(const char *title, const char *link, void *userdata) {
+ (void)userdata;
+ fprintf(stderr, "title: |%s|, link: |%s|\n", title, link);
+}
+
+int add_rss(const char *name, const char *url, const char *rss_config_dir, const char *start_after) {
+ (void)name;
+ (void)rss_config_dir;
+ (void)start_after;
+ int result = 0;
+
+ Buffer buffer;
+ buffer_init(&buffer);
+ int res = download_to_buffer(url, &buffer);
+ if(res != 0) {
+ fprintf(stderr, "Failed to download rss: %s\n", url);
+ result = res;
+ goto cleanup;
+ }
+
+ char rss_title[256];
+ res = parse_rss(buffer.data, rss_title, sizeof(rss_title), rss_parse_callback, NULL);
+ if(res != 0) {
+ fprintf(stderr, "Failed to parse rss for url: %s\n", url);
+ result = res;
+ goto cleanup;
+ }
+ fprintf(stderr, "rss title: |%s|\n", rss_title);
+
+ cleanup:
+ buffer_deinit(&buffer);
+ return result;
+}