olm: Allow decryption functions to define how to handle unicode decode errors.

This patch changes the decryption functions not to fail if there was an unicode decode error while converting the decrypted bytes plaintext into a native python string. Characters that cannot be decoded as unicode are now replaced with the unicode replacement character (U+FFFD). The old behaviour of raising an UnicodeDecodeError can be achieved by passing the "strict" error handling scheme to the decrypt function.
author: Damir Jelić <poljar@termina.org.uk> 2019-06-18 13:46:57 +0200
committer: Damir Jelić <poljar@termina.org.uk> 2019-06-18 13:50:46 +0200
commit: 2f5590bf38e5995a36f770c04cfbf31eb9070eca (patch)
tree: 299e68cd3b9ea33f212cd1a7c4fcf0bbbca4bfc8 /python/olm/group_session.py
parent: e1a4e6ebf1568935a57ba8cec48e43dd7c1ebcd3 (diff)
1 files changed, 14 insertions, 7 deletions
diff --git a/python/olm/group_session.py b/python/olm/group_session.py
index 737d9ef..88f87f0 100644
--- a/python/olm/group_session.py
+++ b/python/olm/group_session.py
@@ -33,7 +33,7 @@ from future.utils import bytes_to_native_str
 # pylint: disable=no-name-in-module
 from _libolm import ffi, lib  # type: ignore
 
-from ._compat import URANDOM, to_bytearray, to_bytes
+from ._compat import URANDOM, to_bytearray, to_bytes, to_native_str
 from ._finalize import track_for_finalization
 
 
@@ -176,8 +176,8 @@ class InboundGroupSession(object):
 
         raise OlmGroupSessionError(last_error)
 
-    def decrypt(self, ciphertext):
-        # type: (AnyStr) -> Tuple[str, int]
+    def decrypt(self, ciphertext, errors="replace"):
+        # type: (AnyStr, str) -> Tuple[str, int]
         """Decrypt a message
 
         Returns a tuple of the decrypted plain-text and the message index of
@@ -197,6 +197,13 @@ class InboundGroupSession(object):
         Args:
             ciphertext(str): Base64 encoded ciphertext containing the encrypted
                 message
+            unicode_errors(str, optional): The error handling scheme to use for
+                unicode decoding errors. The default is "replace" meaning that
+                the character that was unable to decode will be replaced with
+                the unicode replacement character (U+FFFD). Other possible
+                values are "strict", "ignore" and "xmlcharrefreplace" as well
+                as any other name registered with codecs.register_error that
+                can handle UnicodeEncodeErrors.
         """
         if not ciphertext:
             raise ValueError("Ciphertext can't be empty.")
@@ -223,10 +230,10 @@ class InboundGroupSession(object):
 
         self._check_error(plaintext_length)
 
-        plaintext = bytes_to_native_str(ffi.unpack(
-            plaintext_buffer,
-            plaintext_length
-        ))
+        plaintext = to_native_str(
+            ffi.unpack(plaintext_buffer, plaintext_length),
+            errors=errors
+        )
 
         # clear out copies of the plaintext
         lib.memset(plaintext_buffer, 0, max_plaintext_length)
author	Damir Jelić <poljar@termina.org.uk>	2019-06-18 13:46:57 +0200
committer	Damir Jelić <poljar@termina.org.uk>	2019-06-18 13:50:46 +0200
commit	2f5590bf38e5995a36f770c04cfbf31eb9070eca (patch)
tree	299e68cd3b9ea33f212cd1a7c4fcf0bbbca4bfc8 /python/olm/group_session.py
parent	e1a4e6ebf1568935a57ba8cec48e43dd7c1ebcd3 (diff)