diff options
author | Damir Jelić <poljar@termina.org.uk> | 2019-06-18 13:38:22 +0200 |
---|---|---|
committer | Damir Jelić <poljar@termina.org.uk> | 2019-06-18 13:44:22 +0200 |
commit | e1a4e6ebf1568935a57ba8cec48e43dd7c1ebcd3 (patch) | |
tree | 069b4d432dd9305bde33391aea9727d30caed246 /python/olm | |
parent | 327d6ac0eb788429dcbb0b012037ea30020a14fe (diff) |
compat: Add a method to convert bytes to a string that handles unicode errors.
Diffstat (limited to 'python/olm')
-rw-r--r-- | python/olm/_compat.py | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/python/olm/_compat.py b/python/olm/_compat.py index 91e4d1b..d81bdb5 100644 --- a/python/olm/_compat.py +++ b/python/olm/_compat.py @@ -18,6 +18,8 @@ from builtins import bytes, str from typing import AnyStr +from future.utils import bytes_to_native_str, native_str + try: import secrets URANDOM = secrets.token_bytes # pragma: no cover @@ -44,3 +46,24 @@ def to_bytes(string): return bytes(string, "utf-8") raise TypeError("Invalid type {}".format(type(string))) + + +def to_native_str(byte_string, errors="replace"): + """Turn a byte string into a native string decoding it as UTF-8. + + Args: + byte_string (bytes): The bytestring that will be converted to a native + string. + errors (str, optional): The error handling scheme that should be used + to handle unicode decode errors. Can be one of "strict" (raise an + UnicodeDecodeError exception, "ignore" (remove the offending + characters), "replace" (replace the offending character with + U+FFFD), "xmlcharrefreplace" as well as any other name registered + with codecs.register_error that can handle UnicodeEncodeErrors. + + Returns the decoded native string. + """ + try: + return native_str(byte_string, errors=errors) + except TypeError: + return bytes(byte_string).decode(errors=errors) |