aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordec05eba <dec05eba@protonmail.com>2021-11-28 09:03:23 +0100
committerdec05eba <dec05eba@protonmail.com>2021-11-28 09:03:23 +0100
commit8792ee2cc5b501f0611e6304f529226a495825db (patch)
tree38d98dfd7bbcc5d48d13271a9e8923dd7b61dd91 /src
parent993eea20151d881735c667757e3b64e4f85ac687 (diff)
Add utf8 index to offset function
Diffstat (limited to 'src')
-rw-r--r--src/system/utf8.c21
1 files changed, 21 insertions, 0 deletions
diff --git a/src/system/utf8.c b/src/system/utf8.c
index 5d5ec79..d38b292 100644
--- a/src/system/utf8.c
+++ b/src/system/utf8.c
@@ -90,3 +90,24 @@ size_t mgl_utf8_get_start_of_codepoint(const unsigned char *str, size_t size, si
return 0;
}
+
+/* TODO: Optimize (remove branching, etc) */
+size_t mgl_utf8_index_to_offset(const unsigned char *str, size_t size, size_t index) {
+ size_t codepoint_index = 0;
+ for(size_t i = 0; i < size;) {
+ const unsigned char *cp = &str[i];
+ uint32_t codepoint;
+ size_t clen;
+ if(!mgl_utf8_decode(cp, size - i, &codepoint, &clen)) {
+ codepoint = *cp;
+ clen = 1;
+ }
+
+ if(codepoint_index >= index)
+ return i;
+
+ i += clen;
+ ++codepoint_index;
+ }
+ return size;
+}