From 8792ee2cc5b501f0611e6304f529226a495825db Mon Sep 17 00:00:00 2001
From: dec05eba <dec05eba@protonmail.com>
Date: Sun, 28 Nov 2021 09:03:23 +0100
Subject: Add utf8 index to offset function

---
 include/mgl/system/utf8.h |  2 ++
 src/system/utf8.c         | 21 +++++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/include/mgl/system/utf8.h b/include/mgl/system/utf8.h
index 794884d..f6fe150 100644
--- a/include/mgl/system/utf8.h
+++ b/include/mgl/system/utf8.h
@@ -16,6 +16,8 @@ bool mgl_utf8_decode(const unsigned char *str, size_t size, uint32_t *decoded_co
     Returns 0 if start of codepoint is not found.
 */
 size_t mgl_utf8_get_start_of_codepoint(const unsigned char *str, size_t size, size_t offset);
+/* Returns |size| if not found */
+size_t mgl_utf8_index_to_offset(const unsigned char *str, size_t size, size_t index);
 
 #endif /* MGL_UTF8_H */
 
diff --git a/src/system/utf8.c b/src/system/utf8.c
index 5d5ec79..d38b292 100644
--- a/src/system/utf8.c
+++ b/src/system/utf8.c
@@ -90,3 +90,24 @@ size_t mgl_utf8_get_start_of_codepoint(const unsigned char *str, size_t size, si
 
     return 0;
 }
+
+/* TODO: Optimize (remove branching, etc) */
+size_t mgl_utf8_index_to_offset(const unsigned char *str, size_t size, size_t index) {
+    size_t codepoint_index = 0;
+    for(size_t i = 0; i < size;) {
+        const unsigned char *cp = &str[i];
+        uint32_t codepoint;
+        size_t clen;
+        if(!mgl_utf8_decode(cp, size - i, &codepoint, &clen)) {
+            codepoint = *cp;
+            clen = 1;
+        }
+
+        if(codepoint_index >= index)
+            return i;
+
+        i += clen;
+        ++codepoint_index;
+    }
+    return size;
+}
-- 
cgit v1.2.3-70-g09d2