aboutsummaryrefslogtreecommitdiff
path: root/python/olm/_compat.py
diff options
context:
space:
mode:
authorMatthew Hodgson <matthew@matrix.org>2019-06-22 17:06:02 +0000
committerMatthew Hodgson <matthew@matrix.org>2019-06-22 17:06:02 +0000
commitae38f2c5a0db711ef573276bc745ee2384a197fa (patch)
tree6029aafbda99fe85c3fac43db2646b446d564917 /python/olm/_compat.py
parent25662564d415b9d5486f1915c9d46e5851b058d0 (diff)
parent61175c969b1de3ecd8c25478c69d6d1883dfa211 (diff)
Merge branch 'python/unicode_decode_errors' into 'master'
Python unicode decode errors when decrypting. See merge request matrix-org/olm!4
Diffstat (limited to 'python/olm/_compat.py')
-rw-r--r--python/olm/_compat.py21
1 files changed, 21 insertions, 0 deletions
diff --git a/python/olm/_compat.py b/python/olm/_compat.py
index 91e4d1b..2ceaa33 100644
--- a/python/olm/_compat.py
+++ b/python/olm/_compat.py
@@ -44,3 +44,24 @@ def to_bytes(string):
return bytes(string, "utf-8")
raise TypeError("Invalid type {}".format(type(string)))
+
+
+def to_unicode_str(byte_string, errors="replace"):
+ """Turn a byte string into a unicode string.
+
+ Should be used everywhere where the input byte string might not be trusted
+ and may contain invalid unicode values.
+
+ Args:
+ byte_string (bytes): The bytestring that will be converted to a native
+ string.
+ errors (str, optional): The error handling scheme that should be used
+ to handle unicode decode errors. Can be one of "strict" (raise an
+ UnicodeDecodeError exception, "ignore" (remove the offending
+ characters), "replace" (replace the offending character with
+ U+FFFD), "xmlcharrefreplace" as well as any other name registered
+ with codecs.register_error that can handle UnicodeEncodeErrors.
+
+ Returns the decoded native string.
+ """
+ return byte_string.decode(encoding="utf-8", errors=errors)