#include "../../include/mgl/system/utf8.h" static inline bool utf8_get_codepoint_length(unsigned char b, size_t *codepoint_length) { if((b & 0x80) == 0) { *codepoint_length = 1; return true; } else if((b & 0xE0) == 0xC0) { *codepoint_length = 2; return true; } else if((b & 0xF0) == 0xE0) { *codepoint_length = 3; return true; } else if((b & 0xF8) == 0xF0) { *codepoint_length = 4; return true; } else { return false; } } /* TODO: Optimize (remove branching, etc) */ bool mgl_utf8_decode(const unsigned char *str, size_t size, uint32_t *decoded_codepoint, size_t *codepoint_length) { if(size == 0) return false; size_t clen; if(!utf8_get_codepoint_length(str[0], &clen)) return false; if(size < clen) return false; for(size_t i = 1; i < clen; ++i) { if((str[i] & 0xC0) != 0x80) return false; } uint32_t codepoint; switch(clen) { case 1: codepoint = (uint32_t)(str[0] & 0x7F); break; case 2: codepoint = ((uint32_t)(str[0] & 0x1F) << 6); codepoint |= (uint32_t)(str[1] & 0x3F); break; case 3: codepoint = ((uint32_t)(str[0] & 0x0F) << 12); codepoint |= ((uint32_t)(str[1] & 0x3F) << 6); codepoint |= (uint32_t)(str[2] & 0x3F); break; case 4: codepoint = ((uint32_t)(str[0] & 0x07) << 18); codepoint |= ((uint32_t)(str[1] & 0x3F) << 12); codepoint |= ((uint32_t)(str[2] & 0x3F) << 6); codepoint |= (uint32_t)(str[3] & 0x3F); break; } *codepoint_length = clen; *decoded_codepoint = codepoint; return true; }