From 8792ee2cc5b501f0611e6304f529226a495825db Mon Sep 17 00:00:00 2001 From: dec05eba Date: Sun, 28 Nov 2021 09:03:23 +0100 Subject: Add utf8 index to offset function --- src/system/utf8.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'src/system') diff --git a/src/system/utf8.c b/src/system/utf8.c index 5d5ec79..d38b292 100644 --- a/src/system/utf8.c +++ b/src/system/utf8.c @@ -90,3 +90,24 @@ size_t mgl_utf8_get_start_of_codepoint(const unsigned char *str, size_t size, si return 0; } + +/* TODO: Optimize (remove branching, etc) */ +size_t mgl_utf8_index_to_offset(const unsigned char *str, size_t size, size_t index) { + size_t codepoint_index = 0; + for(size_t i = 0; i < size;) { + const unsigned char *cp = &str[i]; + uint32_t codepoint; + size_t clen; + if(!mgl_utf8_decode(cp, size - i, &codepoint, &clen)) { + codepoint = *cp; + clen = 1; + } + + if(codepoint_index >= index) + return i; + + i += clen; + ++codepoint_index; + } + return size; +} -- cgit v1.2.3