diff options
author | dec05eba <dec05eba@protonmail.com> | 2020-07-13 15:57:10 +0200 |
---|---|---|
committer | dec05eba <dec05eba@protonmail.com> | 2020-07-13 15:57:10 +0200 |
commit | a1ca82847eb356c6b85ada2ac11f38d98f6e085e (patch) | |
tree | 8da10b7f5c409bcf0cde60b89c947900e1322f7d | |
parent | ee0d96fd180b235d4e87019d69e07cefde1e5546 (diff) |
Start on add_rss, add rss parser
-rw-r--r-- | buffer.h | 6 | ||||
-rwxr-xr-x | build.sh | 4 | ||||
-rw-r--r-- | download.c | 16 | ||||
-rw-r--r-- | download.h | 7 | ||||
-rw-r--r-- | fileutils.c | 35 | ||||
-rw-r--r-- | fileutils.h | 2 | ||||
-rw-r--r-- | main.c | 138 | ||||
-rw-r--r-- | program.c | 7 | ||||
-rw-r--r-- | program.h | 2 | ||||
-rw-r--r-- | rss.c | 176 | ||||
-rw-r--r-- | rss.h | 6 | ||||
-rw-r--r-- | transmission.c | 6 |
12 files changed, 387 insertions, 18 deletions
@@ -7,11 +7,13 @@ TODO: Optimize small size buffers by using data and size members (16 bytes on x86) instead of heap allocation */ -typedef struct { + +typedef struct Buffer Buffer; +struct Buffer { void *data; size_t size; size_t capacity; -} Buffer; +}; void buffer_init(Buffer *self); void buffer_deinit(Buffer *self); @@ -1,8 +1,8 @@ #!/bin/sh CFLAGS="-O0 -g3 -Wall -Wextra -Werror" -#CFLAGS="-O3 -s" +#CFLAGS="-O3 -s -flto" #LIBS="-lcurl" LIBS="" #gcc -musl-gcc -static main.c program.c alloc.c buffer.c fileutils.c transmission.c -o automedia $CFLAGS $LIBS +musl-gcc -static main.c program.c alloc.c buffer.c fileutils.c transmission.c rss.c download.c -o automedia $CFLAGS $LIBS diff --git a/download.c b/download.c new file mode 100644 index 0000000..6c8e33e --- /dev/null +++ b/download.c @@ -0,0 +1,16 @@ +#include "download.h" +#include "buffer.h" +#include "program.h" + +int download_to_buffer(const char *url, Buffer *buffer) { + const char *args[] = { + "curl", "-s", "-L", "-f", + "-H", "user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36", + "-H", "Accept-Language: en-US,en;q=0.5", + "--compressed", + "--", + url, + NULL + }; + return program_exec(args, program_buffer_write_callback, buffer); +} diff --git a/download.h b/download.h new file mode 100644 index 0000000..0682ba9 --- /dev/null +++ b/download.h @@ -0,0 +1,7 @@ +#ifndef DOWNLOAD_H +#define DOWNLOAD_H + +struct Buffer; +int download_to_buffer(const char *url, struct Buffer *buffer); + +#endif diff --git a/fileutils.c b/fileutils.c index 137373b..ea57550 100644 --- a/fileutils.c +++ b/fileutils.c @@ -3,9 +3,12 @@ #include <stdio.h> #include <stdlib.h> +#include <string.h> #include <errno.h> #include <pwd.h> #include <unistd.h> +#include <fcntl.h> +#include <sys/stat.h> const char* get_home_dir() { const char *home_dir = getenv("HOME"); @@ -45,3 +48,35 @@ int file_get_content(const char *filepath, char **data, long *size) { fclose(file); return result; } + +int create_directory_recursive(char *path) { + int path_len = strlen(path); + char *p = path; + char *end = path + path_len; + for(;;) { + char *slash_p = strchr(p, '/'); + + // Skips first '/', we don't want to try and create the root directory + if(slash_p == path) { + ++p; + continue; + } + + if(!slash_p) + slash_p = end; + + char prev_char = *slash_p; + *slash_p = '\0'; + int err = mkdir(path, S_IRWXU); + *slash_p = prev_char; + + if(err == -1 && errno != EEXIST) + return err; + + if(slash_p == end) + break; + else + p = slash_p + 1; + } + return 0; +} diff --git a/fileutils.h b/fileutils.h index 413648f..6c514bb 100644 --- a/fileutils.h +++ b/fileutils.h @@ -4,5 +4,7 @@ const char* get_home_dir(); /* Returns 0 on success */ int file_get_content(const char *filepath, char **data, long *size); +/* Returns 0 on success (if the directories are created or if the directories already exists) */ +int create_directory_recursive(char *path); #endif @@ -1,6 +1,8 @@ #include "buffer.h" #include "fileutils.h" #include "transmission.h" +#include "fileutils.h" +#include "rss.h" #include "json.h" #include <stdio.h> @@ -10,6 +12,8 @@ #include <dirent.h> +#define NAME_MAX_LEN 250 + static void usage(void) { fprintf(stderr, "usage: automedia COMMAND\n"); fprintf(stderr, "\n"); @@ -26,7 +30,7 @@ static void usage_add(void) { fprintf(stderr, " type The type should be either rss or html\n"); fprintf(stderr, " url The url to the rss or html\n"); fprintf(stderr, " filename The filename of an episode of an existing serie to start track. Currently only works with rss on https://nyaa.si\n"); - fprintf(stderr, " --name The display name to be used for the media. Optional for rss, in which case the name will be retries from rss TITLE, required for html\n"); + fprintf(stderr, " --name The display name to be used for the media. Optional for rss, in which case the name will be the rss TITLE, required for html. The name can't be longer than 250 characters\n"); fprintf(stderr, " --start-after The sync should start downloading media after this item. This --start-after value should be the title of the episode/chapter (Optional, default is to start from the first item)\n"); fprintf(stderr, "EXAMPLES\n"); fprintf(stderr, " automedia add rss 'https://nyaa.si/?page=rss&q=Tejina-senpai+1080p&c=0_0&f=0&u=HorribleSubs'\n"); @@ -180,6 +184,124 @@ static int compare_downloaded_item(const void *a, const void *b) { return list_data_a->timestamp - list_data_b->timestamp; } +static void string_replace(char *str, char old, char new) { + for(;;) { + char c = *str; + if(c == old) + *str = new; + else if(c == '\0') + break; + ++str; + } +} + +static char* lstrip(char *str) { + for(;;) { + char c = *str; + if(c != ' ' && c != '\t' && c != '\n') + break; + else if(c == '\0') + break; + ++str; + } + return str; +} + +static void rstrip(char *str) { + int len = strlen(str); + if(len == 0) + return; + + char *p = str + len - 1; + while(p != str) { + char c = *p; + if(c != ' ' && c != '\t' && c != '\n') + break; + --p; + } + + p[1] = '\0'; +} + +static char* strip(char *str) { + str = lstrip(str); + rstrip(str); + return str; +} + +static void command_add(int argc, char **argv, char *rss_config_dir, char *html_config_dir) { + if(argc < 2) + usage_add(); + + char *media_type = argv[0]; + char *media_url = argv[1]; + char *media_name = NULL; + char *start_after = NULL; + + const char *option = NULL; + for(int i = 2; i < argc; ++i) { + char *arg = argv[i]; + if(strcmp(arg, "--name") == 0 || strcmp(arg, "--start-after") == 0) { + if(option) + usage_add(); + option = arg; + } else { + if(!option) + usage_add(); + + if(strcmp(option, "--name") == 0) + media_name = arg; + else if(strcmp(option, "--start-after") == 0) + start_after = arg; + else { + fprintf(stderr, "Invalid option %s\n", option); + usage_add(); + } + + option = NULL; + } + } + + if(media_name) { + string_replace(media_name, '/', '_'); + media_name = strip(media_name); + + int media_name_len = strlen(media_name); + if(media_name_len > NAME_MAX_LEN) { + fprintf(stderr, "--name value can't be longer than %d characters\n", NAME_MAX_LEN); + exit(1); + } + } + + if(start_after) { + string_replace(start_after, '/', '_'); + start_after = strip(start_after); + } + + media_url = strip(media_url); + + if(strcmp(media_type, "rss") == 0) { + int res = create_directory_recursive(rss_config_dir); + if(res != 0) { + fprintf(stderr, "Failed to create %s, error: %s\n", rss_config_dir, strerror(res)); + exit(1); + } + add_rss(media_name, media_url, rss_config_dir, start_after); + } else if(strcmp(media_type, "html") == 0) { + (void)html_config_dir; + } else { + fprintf(stderr, "type should be either rss or html\n"); + usage_add(); + } +} + +static void command_sync(int argc, char **argv) { + if(argc < 2) + usage_sync(); + + (void)argv; +} + static void command_downloaded(const char *rss_config_dir, const char *html_config_dir) { char rss_tracked_dir[PATH_MAX]; strcpy(rss_tracked_dir, rss_config_dir); @@ -205,19 +327,19 @@ static void command_downloaded(const char *rss_config_dir, const char *html_conf buffer_deinit(&downloaded_items); } - +/* static void torrent_list_callback(int id, float percentage_finished, const char *name, void *userdata) { (void)userdata; fprintf(stderr, "id: |%d|, done: |%g|, name: |%s|\n", id, percentage_finished, name); } - +*/ int main(int argc, char **argv) { if(argc < 2) usage(); const char *home_dir = get_home_dir(); - char rss_config_dir[PATH_MAX];; + char rss_config_dir[PATH_MAX]; strcpy(rss_config_dir, home_dir); strcat(rss_config_dir, "/.config/automedia/rss"); @@ -227,9 +349,9 @@ int main(int argc, char **argv) { const char *command = argv[1]; if(strcmp(command, "add") == 0) { - usage_add(); + command_add(argc - 2, argv + 2, rss_config_dir, html_config_dir); } else if(strcmp(command, "sync") == 0) { - usage_sync(); + command_sync(argc - 2, argv + 2); } else if(strcmp(command, "downloaded") == 0) { command_downloaded(rss_config_dir, html_config_dir); } else { @@ -237,8 +359,8 @@ int main(int argc, char **argv) { usage(); } - transmission_get_all_torrents(torrent_list_callback, NULL); - printf("is transmission daemon running? %s\n", transmission_is_daemon_running() == 0 ? "yes" : "no"); + /*transmission_get_all_torrents(torrent_list_callback, NULL); + printf("is transmission daemon running? %s\n", transmission_is_daemon_running() == 0 ? "yes" : "no");*/ return 0; } @@ -1,4 +1,5 @@ #include "program.h" +#include "buffer.h" #include <unistd.h> #include <sys/wait.h> #include <sys/prctl.h> @@ -11,6 +12,12 @@ #define READ_END 0 #define WRITE_END 1 +int program_buffer_write_callback(char *data, int size, void *userdata) { + Buffer *buffer = userdata; + buffer_append(buffer, data, size); + return 0; +} + int program_exec(const char **args, ProgramOutputCallback output_callback, void *userdata) { /* 1 arguments */ if(args[0] == NULL) @@ -4,6 +4,8 @@ /* Return 0 if you want to continue reading. @data is null-terminated */ typedef int (*ProgramOutputCallback)(char *data, int size, void *userdata); +int program_buffer_write_callback(char *data, int size, void *userdata); + /* @args need to have at least 2 arguments. The first which is the program name and the last which is NULL, which indicates end of args @@ -0,0 +1,176 @@ +#include "rss.h" +#include "download.h" +#include "buffer.h" +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + +static int is_alpha_lowercase(char c) { + return c >= 'a' && c <= 'z'; +} + +static int is_digit(char c) { + return c >= '0' && c <= '9'; +} + +static char* get_amp_end(char *str) { + for(;;) { + char c = *str; + if(is_alpha_lowercase(c) || is_digit(c) || c == '#') + ++str; + else if(c == ';' || c == '\0') + break; + } + return str; +} + +static void xml_unescape(char *str, char *result, int result_length) { + int index = 0; + for(;;) { + char c = *str; + if(c == '&') { + char *amp_end = get_amp_end(str + 1); + char prev_char = *amp_end; + *amp_end = '\0'; + + if(str[1] == '#') { + result[index++] = atoi(str + 2); + } else { + if(strcmp(str + 1, "amp") == 0) + result[index++] = '&'; + else if(strcmp(str + 1, "lt") == 0) + result[index++] = '<'; + else if(strcmp(str + 1, "gt") == 0) + result[index++] = '>'; + else if(strcmp(str + 1, "apos") == 0) + result[index++] = '\''; + } + + *amp_end = prev_char; + str = amp_end; + if(prev_char != '\0') + ++str; + } else if(c == '\0') { + result[index] = '\0'; + break; + } else { + result[index++] = c; + ++str; + } + + if(index == result_length - 1) { + result[index] = '\0'; + break; + } + } +} + +static char* string_substr_before_tag_end(char *str, const char *tag) { + char *tag_p = strstr(str, tag); + if(tag_p) + *tag_p = '\0'; + return tag_p; +} + +typedef void (*RssParseCallback)(const char *title, const char *link, void *userdata); + +static int parse_rss(char *str, char *rss_title_str, int rss_title_str_size, RssParseCallback parse_callback, void *userdata) { + char *channel_start = strstr(str, "<channel>"); + if(!channel_start) + return 1; + + char *after_channel = channel_start + 9; + + char *rss_title = strstr(after_channel, "<title>"); + char *first_item = strstr(after_channel, "<item>"); + if(!first_item) { + rss_title += 7; + string_substr_before_tag_end(rss_title, "</title>"); + xml_unescape(rss_title, rss_title_str, rss_title_str_size); + return 0; + } + + if(rss_title < first_item) { + rss_title += 7; + string_substr_before_tag_end(rss_title, "</title>"); + xml_unescape(rss_title, rss_title_str, rss_title_str_size); + } else { + rss_title_str[0] = '\0'; + } + + char title_str[256]; + char link_str[2084]; + + char *item = first_item; + for(;;) { + char *after_first_item = item + 6; + char *item_end = strstr(after_first_item, "</item>"); + if(!item_end) + return 1; + + char *item_title = strstr(after_first_item, "<title>"); + if(!item_title) + return 1; + + if(item_title >= item_end) + return 1; + + item_title += 7; + char *after_title = string_substr_before_tag_end(item_title, "</title>"); + if(!after_title) + return 1; + + after_title += 8; + char *item_link = strstr(after_title, "<link>"); + if(!item_link) + return 1; + + if(item_link >= item_end) + return 1; + + item_link += 6; + string_substr_before_tag_end(item_link, "</link>"); + + xml_unescape(item_title, title_str, sizeof(title_str)); + xml_unescape(item_link, link_str, sizeof(link_str)); + parse_callback(title_str, link_str, userdata); + + item = strstr(item_end + 7, "<item>"); + if(!item) + return 0; + } +} + +static void rss_parse_callback(const char *title, const char *link, void *userdata) { + (void)userdata; + fprintf(stderr, "title: |%s|, link: |%s|\n", title, link); +} + +int add_rss(const char *name, const char *url, const char *rss_config_dir, const char *start_after) { + (void)name; + (void)rss_config_dir; + (void)start_after; + int result = 0; + + Buffer buffer; + buffer_init(&buffer); + int res = download_to_buffer(url, &buffer); + if(res != 0) { + fprintf(stderr, "Failed to download rss: %s\n", url); + result = res; + goto cleanup; + } + + char rss_title[256]; + res = parse_rss(buffer.data, rss_title, sizeof(rss_title), rss_parse_callback, NULL); + if(res != 0) { + fprintf(stderr, "Failed to parse rss for url: %s\n", url); + result = res; + goto cleanup; + } + fprintf(stderr, "rss title: |%s|\n", rss_title); + + cleanup: + buffer_deinit(&buffer); + return result; +} @@ -0,0 +1,6 @@ +#ifndef RSS_H +#define RSS_H + +int add_rss(const char *name, const char *url, const char *rss_config_dir, const char *start_after); + +#endif diff --git a/transmission.c b/transmission.c index 40552a7..0acb5a5 100644 --- a/transmission.c +++ b/transmission.c @@ -8,12 +8,6 @@ #define NUM_COLUMNS 10 -static int program_buffer_write_callback(char *data, int size, void *userdata) { - Buffer *buffer = userdata; - buffer_append(buffer, data, size); - return 0; -} - int transmission_is_daemon_running() { const char *args[] = { "transmission-remote", "-si", NULL }; return program_exec(args, NULL, NULL); |