From 2f5590bf38e5995a36f770c04cfbf31eb9070eca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Damir=20Jeli=C4=87?= <poljar@termina.org.uk>
Date: Tue, 18 Jun 2019 13:46:57 +0200
Subject: olm: Allow decryption functions to define how to handle unicode
 decode errors.

This patch changes the decryption functions not to fail if there was an
unicode decode error while converting the decrypted bytes plaintext into
a native python string.

Characters that cannot be decoded as unicode are now replaced with the
unicode replacement character (U+FFFD).

The old behaviour of raising an UnicodeDecodeError can be achieved by
passing the "strict" error handling scheme to the decrypt function.
---
 python/olm/group_session.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

(limited to 'python/olm/group_session.py')

diff --git a/python/olm/group_session.py b/python/olm/group_session.py
index 737d9ef..88f87f0 100644
--- a/python/olm/group_session.py
+++ b/python/olm/group_session.py
@@ -33,7 +33,7 @@ from future.utils import bytes_to_native_str
 # pylint: disable=no-name-in-module
 from _libolm import ffi, lib  # type: ignore
 
-from ._compat import URANDOM, to_bytearray, to_bytes
+from ._compat import URANDOM, to_bytearray, to_bytes, to_native_str
 from ._finalize import track_for_finalization
 
 
@@ -176,8 +176,8 @@ class InboundGroupSession(object):
 
         raise OlmGroupSessionError(last_error)
 
-    def decrypt(self, ciphertext):
-        # type: (AnyStr) -> Tuple[str, int]
+    def decrypt(self, ciphertext, errors="replace"):
+        # type: (AnyStr, str) -> Tuple[str, int]
         """Decrypt a message
 
         Returns a tuple of the decrypted plain-text and the message index of
@@ -197,6 +197,13 @@ class InboundGroupSession(object):
         Args:
             ciphertext(str): Base64 encoded ciphertext containing the encrypted
                 message
+            unicode_errors(str, optional): The error handling scheme to use for
+                unicode decoding errors. The default is "replace" meaning that
+                the character that was unable to decode will be replaced with
+                the unicode replacement character (U+FFFD). Other possible
+                values are "strict", "ignore" and "xmlcharrefreplace" as well
+                as any other name registered with codecs.register_error that
+                can handle UnicodeEncodeErrors.
         """
         if not ciphertext:
             raise ValueError("Ciphertext can't be empty.")
@@ -223,10 +230,10 @@ class InboundGroupSession(object):
 
         self._check_error(plaintext_length)
 
-        plaintext = bytes_to_native_str(ffi.unpack(
-            plaintext_buffer,
-            plaintext_length
-        ))
+        plaintext = to_native_str(
+            ffi.unpack(plaintext_buffer, plaintext_length),
+            errors=errors
+        )
 
         # clear out copies of the plaintext
         lib.memset(plaintext_buffer, 0, max_plaintext_length)
-- 
cgit v1.2.3


From 5e24c605d2926e23273089058741fe69e1b3030a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Damir=20Jeli=C4=87?= <poljar@termina.org.uk>
Date: Wed, 19 Jun 2019 14:45:20 +0200
Subject: _compat: Change the to_native_str into a to_unicode_str function.

The to_native_str function was supposed to produce Unicode decoded
native strings for python2 and python3.

Upon further consideration this doesn't make much sense since under
python2 it would need to decode the bytes into a Unicode string and turn
it back into a python2 str.

The ability to use the replacement character requires us to use a
Unicode string under python2 as well.
---
 python/olm/group_session.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'python/olm/group_session.py')

diff --git a/python/olm/group_session.py b/python/olm/group_session.py
index 88f87f0..313e5fa 100644
--- a/python/olm/group_session.py
+++ b/python/olm/group_session.py
@@ -33,7 +33,7 @@ from future.utils import bytes_to_native_str
 # pylint: disable=no-name-in-module
 from _libolm import ffi, lib  # type: ignore
 
-from ._compat import URANDOM, to_bytearray, to_bytes, to_native_str
+from ._compat import URANDOM, to_bytearray, to_bytes, to_unicode_str
 from ._finalize import track_for_finalization
 
 
@@ -230,7 +230,7 @@ class InboundGroupSession(object):
 
         self._check_error(plaintext_length)
 
-        plaintext = to_native_str(
+        plaintext = to_unicode_str(
             ffi.unpack(plaintext_buffer, plaintext_length),
             errors=errors
         )
-- 
cgit v1.2.3


From 7538a1eccf99106712a17cc85adacdf27c4a8e8a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Damir=20Jeli=C4=87?= <poljar@termina.org.uk>
Date: Thu, 20 Jun 2019 12:16:37 +0200
Subject: olm: Rename the errors function argument in the decryption functions.

---
 python/olm/group_session.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'python/olm/group_session.py')

diff --git a/python/olm/group_session.py b/python/olm/group_session.py
index 313e5fa..5068192 100644
--- a/python/olm/group_session.py
+++ b/python/olm/group_session.py
@@ -176,7 +176,7 @@ class InboundGroupSession(object):
 
         raise OlmGroupSessionError(last_error)
 
-    def decrypt(self, ciphertext, errors="replace"):
+    def decrypt(self, ciphertext, unicode_errors="replace"):
         # type: (AnyStr, str) -> Tuple[str, int]
         """Decrypt a message
 
@@ -232,7 +232,7 @@ class InboundGroupSession(object):
 
         plaintext = to_unicode_str(
             ffi.unpack(plaintext_buffer, plaintext_length),
-            errors=errors
+            errors=unicode_errors
         )
 
         # clear out copies of the plaintext
-- 
cgit v1.2.3