aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
m---------depends/mgl0
-rw-r--r--include/mglpp/system/Utf8.hpp8
-rw-r--r--src/system/Utf8.cpp4
3 files changed, 12 insertions, 0 deletions
diff --git a/depends/mgl b/depends/mgl
-Subproject ccd0e65a0ddccd9c52d4c075ec1cad41ae7edb4
+Subproject c18f87ad13da518af5ff245dbce2a9e608097ea
diff --git a/include/mglpp/system/Utf8.hpp b/include/mglpp/system/Utf8.hpp
index 4f6a39a..ba1b90e 100644
--- a/include/mglpp/system/Utf8.hpp
+++ b/include/mglpp/system/Utf8.hpp
@@ -5,7 +5,15 @@
#include <stdint.h>
namespace mgl {
+ // Returns false on failure. |decoded_codepoint| is set to |str[0]| if size > 0 and |codepoint_length| is set to 1
bool utf8_decode(const unsigned char *str, size_t size, uint32_t *decoded_codepoint, size_t *codepoint_length);
+ /*
+ |str| should be the start of the utf8 string and |size| is the size of the string.
+ Returns the index of the start of the codepoint that starts at or before |offset|,
+ or if the string contains invalid utf8 then the index to the invalid character is returned.
+ Returns 0 if start of codepoint is not found.
+ */
+ size_t utf8_get_start_of_codepoint(const unsigned char *str, size_t size, size_t offset);
}
#endif /* MGLPP_UTF8_HPP */
diff --git a/src/system/Utf8.cpp b/src/system/Utf8.cpp
index be9b2af..a416477 100644
--- a/src/system/Utf8.cpp
+++ b/src/system/Utf8.cpp
@@ -8,4 +8,8 @@ namespace mgl {
bool utf8_decode(const unsigned char *str, size_t size, uint32_t *decoded_codepoint, size_t *codepoint_length) {
return mgl_utf8_decode(str, size, decoded_codepoint, codepoint_length);
}
+
+ size_t utf8_get_start_of_codepoint(const unsigned char *str, size_t size, size_t offset) {
+ return mgl_utf8_get_start_of_codepoint(str, size, offset);
+ }
} \ No newline at end of file