diff options
-rw-r--r-- | include/mgl/system/utf8.h | 2 | ||||
-rw-r--r-- | src/system/utf8.c | 21 |
2 files changed, 23 insertions, 0 deletions
diff --git a/include/mgl/system/utf8.h b/include/mgl/system/utf8.h index 794884d..f6fe150 100644 --- a/include/mgl/system/utf8.h +++ b/include/mgl/system/utf8.h @@ -16,6 +16,8 @@ bool mgl_utf8_decode(const unsigned char *str, size_t size, uint32_t *decoded_co Returns 0 if start of codepoint is not found. */ size_t mgl_utf8_get_start_of_codepoint(const unsigned char *str, size_t size, size_t offset); +/* Returns |size| if not found */ +size_t mgl_utf8_index_to_offset(const unsigned char *str, size_t size, size_t index); #endif /* MGL_UTF8_H */ diff --git a/src/system/utf8.c b/src/system/utf8.c index 5d5ec79..d38b292 100644 --- a/src/system/utf8.c +++ b/src/system/utf8.c @@ -90,3 +90,24 @@ size_t mgl_utf8_get_start_of_codepoint(const unsigned char *str, size_t size, si return 0; } + +/* TODO: Optimize (remove branching, etc) */ +size_t mgl_utf8_index_to_offset(const unsigned char *str, size_t size, size_t index) { + size_t codepoint_index = 0; + for(size_t i = 0; i < size;) { + const unsigned char *cp = &str[i]; + uint32_t codepoint; + size_t clen; + if(!mgl_utf8_decode(cp, size - i, &codepoint, &clen)) { + codepoint = *cp; + clen = 1; + } + + if(codepoint_index >= index) + return i; + + i += clen; + ++codepoint_index; + } + return size; +} |