diff options
author | dec05eba <dec05eba@protonmail.com> | 2021-11-28 09:03:23 +0100 |
---|---|---|
committer | dec05eba <dec05eba@protonmail.com> | 2021-11-28 09:03:23 +0100 |
commit | 8792ee2cc5b501f0611e6304f529226a495825db (patch) | |
tree | 38d98dfd7bbcc5d48d13271a9e8923dd7b61dd91 /src | |
parent | 993eea20151d881735c667757e3b64e4f85ac687 (diff) |
Add utf8 index to offset function
Diffstat (limited to 'src')
-rw-r--r-- | src/system/utf8.c | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/src/system/utf8.c b/src/system/utf8.c index 5d5ec79..d38b292 100644 --- a/src/system/utf8.c +++ b/src/system/utf8.c @@ -90,3 +90,24 @@ size_t mgl_utf8_get_start_of_codepoint(const unsigned char *str, size_t size, si return 0; } + +/* TODO: Optimize (remove branching, etc) */ +size_t mgl_utf8_index_to_offset(const unsigned char *str, size_t size, size_t index) { + size_t codepoint_index = 0; + for(size_t i = 0; i < size;) { + const unsigned char *cp = &str[i]; + uint32_t codepoint; + size_t clen; + if(!mgl_utf8_decode(cp, size - i, &codepoint, &clen)) { + codepoint = *cp; + clen = 1; + } + + if(codepoint_index >= index) + return i; + + i += clen; + ++codepoint_index; + } + return size; +} |