diff options
author | Matthew Hodgson <matthew@matrix.org> | 2019-06-22 17:06:02 +0000 |
---|---|---|
committer | Matthew Hodgson <matthew@matrix.org> | 2019-06-22 17:06:02 +0000 |
commit | ae38f2c5a0db711ef573276bc745ee2384a197fa (patch) | |
tree | 6029aafbda99fe85c3fac43db2646b446d564917 /python/olm/_compat.py | |
parent | 25662564d415b9d5486f1915c9d46e5851b058d0 (diff) | |
parent | 61175c969b1de3ecd8c25478c69d6d1883dfa211 (diff) |
Merge branch 'python/unicode_decode_errors' into 'master'
Python unicode decode errors when decrypting.
See merge request matrix-org/olm!4
Diffstat (limited to 'python/olm/_compat.py')
-rw-r--r-- | python/olm/_compat.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/python/olm/_compat.py b/python/olm/_compat.py index 91e4d1b..2ceaa33 100644 --- a/python/olm/_compat.py +++ b/python/olm/_compat.py @@ -44,3 +44,24 @@ def to_bytes(string): return bytes(string, "utf-8") raise TypeError("Invalid type {}".format(type(string))) + + +def to_unicode_str(byte_string, errors="replace"): + """Turn a byte string into a unicode string. + + Should be used everywhere where the input byte string might not be trusted + and may contain invalid unicode values. + + Args: + byte_string (bytes): The bytestring that will be converted to a native + string. + errors (str, optional): The error handling scheme that should be used + to handle unicode decode errors. Can be one of "strict" (raise an + UnicodeDecodeError exception, "ignore" (remove the offending + characters), "replace" (replace the offending character with + U+FFFD), "xmlcharrefreplace" as well as any other name registered + with codecs.register_error that can handle UnicodeEncodeErrors. + + Returns the decoded native string. + """ + return byte_string.decode(encoding="utf-8", errors=errors) |