aboutsummaryrefslogtreecommitdiff
path: root/src/plugins/Fourchan.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/Fourchan.cpp')
-rw-r--r--src/plugins/Fourchan.cpp112
1 files changed, 72 insertions, 40 deletions
diff --git a/src/plugins/Fourchan.cpp b/src/plugins/Fourchan.cpp
index c91bd0d..7cad54f 100644
--- a/src/plugins/Fourchan.cpp
+++ b/src/plugins/Fourchan.cpp
@@ -43,7 +43,7 @@ namespace QuickMedia {
};
DataView text; // Set when type is TEXT, QUOTE or QUOTELINK
- int64_t quote_postnumber; // Set when type is QUOTELINK
+ int64_t quote_postnumber = 0; // Set when type is QUOTELINK
Type type;
};
@@ -63,58 +63,90 @@ namespace QuickMedia {
return nullptr;
}
+ static void lstrip_newline(const char *str, size_t size, const char **output_str, size_t *output_size) {
+ size_t i = 0;
+ while(i < size && str[i] == '\n') {
+ ++i;
+ }
+ *output_str = str + i;
+ *output_size = size - i;
+ }
+
+ static void rstrip_newline(const char *str, size_t size, size_t *output_size) {
+ ssize_t i = size - 1;
+ while(i >= 0 && str[i] == '\n') {
+ --i;
+ }
+ *output_size = i + 1;
+ }
+
+ static void strip_newline(const char *str, size_t size, const char **output_str, size_t *output_size) {
+ lstrip_newline(str, size, output_str, output_size);
+ rstrip_newline(*output_str, *output_size, output_size);
+ }
+
using CommentPieceCallback = std::function<void(const CommentPiece&)>;
- static void extract_comment_pieces(TidyDoc doc, TidyNode node, CommentPieceCallback callback) {
+ static int extract_comment_pieces(TidyDoc doc, TidyNode node, CommentPieceCallback &callback) {
for(TidyNode child = tidyGetChild(node); child; child = tidyGetNext(child)) {
const char *node_name = tidyNodeGetName(child);
- if(node_name && strcmp(node_name, "wbr") == 0) {
- CommentPiece comment_piece;
- comment_piece.type = CommentPiece::Type::LINE_CONTINUE;
- comment_piece.text = { (char*)"", 0 };
- callback(comment_piece);
- continue;
- }
- TidyNodeType node_type = tidyNodeGetType(child);
- if(node_type == TidyNode_Start && node_name) {
- TidyNode text_node = tidyGetChild(child);
- //fprintf(stderr, "Child node name: %s, child text type: %d\n", node_name, tidyNodeGetType(text_node));
- if(tidyNodeGetType(text_node) == TidyNode_Text) {
- TidyBuffer tidy_buffer;
- tidyBufInit(&tidy_buffer);
- if(tidyNodeGetText(doc, text_node, &tidy_buffer)) {
+ if(node_name) {
+ if(strcmp(node_name, "br") == 0) {
+ CommentPiece comment_piece;
+ comment_piece.type = CommentPiece::Type::TEXT;
+ // Warning: Cast from const char* to char* ...
+ comment_piece.text = { (char*)"\n", 1 };
+ callback(comment_piece);
+ /*} else if(strcmp(node_name, "span") == 0) {
+ const char *span_class = get_attribute_value(child, "class");
+ //fprintf(stderr, "span class: %s\n", span_class);
+ if(span_class && strcmp(span_class, "quote") == 0) {
CommentPiece comment_piece;
- comment_piece.type = CommentPiece::Type::TEXT;
- comment_piece.text = { (char*)tidy_buffer.bp, tidy_buffer.size };
- if(strcmp(node_name, "span") == 0) {
- const char *span_class = get_attribute_value(child, "class");
- //fprintf(stderr, "span class: %s\n", span_class);
- if(span_class && strcmp(span_class, "quote") == 0)
- comment_piece.type = CommentPiece::Type::QUOTE;
- } else if(strcmp(node_name, "a") == 0) {
- const char *a_class = get_attribute_value(child, "class");
- const char *a_href = get_attribute_value(child, "href");
- //fprintf(stderr, "a class: %s, href: %s\n", a_class, a_href);
- if(a_class && a_href && strcmp(a_class, "quotelink") == 0 && strncmp(a_href, "#p", 2) == 0) {
- comment_piece.type = CommentPiece::Type::QUOTELINK;
- comment_piece.quote_postnumber = strtoll(a_href + 2, nullptr, 10);
- }
- }
+ comment_piece.type = CommentPiece::Type::QUOTE;
+ // Warning: Cast from const char* to char* ...
+ comment_piece.text = { (char*)"\n", 1 };
callback(comment_piece);
- }
- tidyBufFree(&tidy_buffer);
+ }*/
}
- } else if(node_type == TidyNode_Text) {
+ }
+
+ if(tidyNodeGetType(child) == TidyNode_Text) {
TidyBuffer tidy_buffer;
tidyBufInit(&tidy_buffer);
if(tidyNodeGetText(doc, child, &tidy_buffer)) {
+ const char *inner_text = (const char*)tidy_buffer.bp;
+ size_t inner_text_size = tidy_buffer.size;
+ strip_newline(inner_text, inner_text_size, &inner_text, &inner_text_size);
+
+ const char *node_name = tidyNodeGetName(node);
+ if(node_name && strcmp(node_name, "a") == 0) {
+ const char *a_class = get_attribute_value(node, "class");
+ const char *a_href = get_attribute_value(node, "href");
+ if(a_class && a_href && strcmp(a_class, "quotelink") == 0 && strncmp(a_href, "#p", 2) == 0) {
+ CommentPiece comment_piece;
+ comment_piece.type = CommentPiece::Type::QUOTELINK;
+ comment_piece.quote_postnumber = strtoll(a_href + 2, nullptr, 10);
+ // Warning: Cast from const char* to char* ...
+ comment_piece.text = { (char*)inner_text, inner_text_size };
+ callback(comment_piece);
+ tidyBufFree(&tidy_buffer);
+ continue;
+ }
+ }
+
CommentPiece comment_piece;
comment_piece.type = CommentPiece::Type::TEXT;
- comment_piece.text = { (char*)tidy_buffer.bp, tidy_buffer.size };
+ // Warning: Cast from const char* to char* ...
+ comment_piece.text = { (char*)inner_text, inner_text_size };
callback(comment_piece);
}
tidyBufFree(&tidy_buffer);
+ } else {
+ int res = extract_comment_pieces(doc, child, callback);
+ if(res != 0)
+ return res;
}
}
+ return 0;
}
static void extract_comment_pieces(const char *html_source, size_t size, CommentPieceCallback callback) {
@@ -129,7 +161,7 @@ namespace QuickMedia {
comment_piece.text = { (char*)html_source, size };
callback(comment_piece);
} else {
- extract_comment_pieces(doc, tidyGetBody(doc), std::move(callback));
+ extract_comment_pieces(doc, tidyGetRoot(doc), callback);
}
tidyRelease(doc);
}
@@ -241,8 +273,8 @@ namespace QuickMedia {
comment_text.append(cp.text.data, cp.text.size);
break;
case CommentPiece::Type::QUOTE:
- comment_text += '>';
- comment_text.append(cp.text.data, cp.text.size);
+ //comment_text += '>';
+ //comment_text.append(cp.text.data, cp.text.size);
//comment_text += '\n';
break;
case CommentPiece::Type::QUOTELINK: {