From e422322650f9cb057937182987071438f9c79e84 Mon Sep 17 00:00:00 2001 From: dec05eba Date: Fri, 30 Oct 2020 09:49:54 +0100 Subject: Add support for korean and all chinese characters --- TODO | 3 +-- include/Text.hpp | 4 ++-- src/Text.cpp | 41 +++++++++++++++++++++++++++++++---------- 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/TODO b/TODO index 733e38c..d87feb1 100644 --- a/TODO +++ b/TODO @@ -17,8 +17,7 @@ Allow deleting watch history with delete key (and show confirmation). Add navigation to nyaa.si submitter torrents. Create a large texture and add downloaded images to it. This will save memory usage because sfml has to use power of two textures (and so does opengl internally) for textures, so if you have multiple textures they will use more memory than one large texture with the same texture data. Use fallback cjk font for regular sf::Text as well (search, tabs, chapter name when viewing a page, path in file-manager, etc). -Fix some japanese fonts not rendering (full width alphanumeric?). -Also add support for full chinese and korean range. +Fix some japanese fonts not rendering (half width alphanumeric?). Resize text vertex arrays to 0 when not visible on screen to reduce memory usage. Text already does this but its done incorrectly (copied from dchat codebase). (Is this really necessary?). Extract thumbnail from images that are being downloaded, while its downloading and show that while the full image is downloading (upscaled, or with blurhash). Add setting to disable sending typing events to the server (matrix). diff --git a/include/Text.hpp b/include/Text.hpp index f24e79d..7ce7fa3 100644 --- a/include/Text.hpp +++ b/include/Text.hpp @@ -38,13 +38,13 @@ namespace QuickMedia }; TextElement() {} - TextElement(const StringViewUtf32 &_text, Type _type) : text(_text), type(_type), is_japanese(false) {} + TextElement(const StringViewUtf32 &_text, Type _type) : text(_text), type(_type), is_cjk(false) {} StringViewUtf32 text; sf::Vector2f position; Type type; //bool ownLine; // Currently only used for emoji, to make emoji bigger when it's the only thing on a line - bool is_japanese; + bool is_cjk; }; struct VertexRef { diff --git a/src/Text.cpp b/src/Text.cpp index 51ff054..360f290 100644 --- a/src/Text.cpp +++ b/src/Text.cpp @@ -202,17 +202,38 @@ namespace QuickMedia || (codepoint >= 0xFF5F && codepoint <= 0xFF9F); // Katakana and punctuation (half width) } - static size_t find_end_of_japanese(const sf::Uint32 *str, size_t size) { + static bool is_korean_codepoint(sf::Uint32 codepoint) { + return codepoint >= 0xAC00 && codepoint <= 0xD7A3; + } + + // TODO: Is there a more efficient way to do this? maybe chinese characters have a specific bit-pattern? + static bool is_chinese_codepoint(sf::Uint32 codepoint) { + return (codepoint >= 0x4E00 && codepoint <= 0x9FFF) // CJK Unified Ideographs + || (codepoint >= 0x3400 && codepoint <= 0x4DBF) // CJK Unified Ideographs Extension A + || (codepoint >= 0x20000 && codepoint <= 0x2A6DF) // CJK Unified Ideographs Extension B + || (codepoint >= 0x2A700 && codepoint <= 0x2B73F) // CJK Unified Ideographs Extension C + || (codepoint >= 0x2B740 && codepoint <= 0x2B81F) // CJK Unified Ideographs Extension D + || (codepoint >= 0x2B820 && codepoint <= 0x2CEAF) // CJK Unified Ideographs Extension E + || (codepoint >= 0xF900 && codepoint <= 0xFAFF) // CJK Compatibility Ideographs + || (codepoint >= 0x2F800 && codepoint <= 0x2FA1F); // CJK Compatibility Ideographs Supplement + } + + // TODO: Merge chinese, japanese and korean codepoints into one function since they share ranges + static bool is_cjk_codepoint(sf::Uint32 codepoint) { + return is_chinese_codepoint(codepoint) || is_japanese_codepoint(codepoint) || is_korean_codepoint(codepoint); + } + + static size_t find_end_of_cjk(const sf::Uint32 *str, size_t size) { for(size_t i = 0; i < size; ++i) { - if(!is_japanese_codepoint(str[i])) + if(!is_cjk_codepoint(str[i])) return i; } return size; } - static size_t find_end_of_non_japanese(const sf::Uint32 *str, size_t size) { + static size_t find_end_of_non_cjk(const sf::Uint32 *str, size_t size) { for(size_t i = 0; i < size; ++i) { - if(is_japanese_codepoint(str[i])) + if(is_cjk_codepoint(str[i])) return i; } return size; @@ -224,13 +245,13 @@ namespace QuickMedia size_t size = str.getSize(); while(index < size) { size_t offset; - bool is_japanese = is_japanese_codepoint(str[index]); - if(is_japanese) - offset = find_end_of_japanese(str.getData() + index + 1, size - index - 1); + bool is_cjk = is_cjk_codepoint(str[index]); + if(is_cjk) + offset = find_end_of_cjk(str.getData() + index + 1, size - index - 1); else - offset = find_end_of_non_japanese(str.getData() + index + 1, size - index - 1); + offset = find_end_of_non_cjk(str.getData() + index + 1, size - index - 1); textElements.push_back({ StringViewUtf32(str.getData() + index, offset + 1), TextElement::Type::TEXT }); - textElements.back().is_japanese = is_japanese; + textElements.back().is_cjk = is_cjk; index += 1 + offset; } } @@ -302,7 +323,7 @@ namespace QuickMedia TextElement &textElement = textElements[textElementIndex]; const sf::Font *ff = font; int vertices_index = 0; - if(textElement.is_japanese) { + if(textElement.is_cjk) { ff = cjk_font; vertices_index = 1; } -- cgit v1.2.3