Merge pull request matplotlib#20634 from jkseppan/type1-decrypt

jkseppan · web-flow · commit 57db5a738405 · 2021-07-22T18:56:19.000+03:00
Implement Type-1 decryption
diff --git a/doc/api/next_api_changes/behavior/20634-JKS.rst b/doc/api/next_api_changes/behavior/20634-JKS.rst
@@ -0,0 +1,8 @@
+``Type1Font`` objects now decrypt the encrypted part
+----------------------------------------------------
+
+Type 1 fonts have a large part of their code encrypted as an obsolete
+copy-protection measure. This part is now available decrypted as the
+``decrypted`` attribute of :class:`~matplotlib.type1font.Type1Font`.
+This decrypted data is not yet parsed, but this is a prerequisite for
+implementing subsetting.
diff --git a/lib/matplotlib/tests/test_type1font.py b/lib/matplotlib/tests/test_type1font.py
@@ -15,6 +15,8 @@ def test_Type1Font():
     assert font.parts[2] == rawdata[0x8985:0x8ba6]
     assert font.parts[1:] == slanted.parts[1:]
     assert font.parts[1:] == condensed.parts[1:]
+    assert font.decrypted.startswith(b'dup\n/Private 18 dict dup begin')
+    assert font.decrypted.endswith(b'mark currentfile closefile\n')
 
     differ = difflib.Differ()
     diff = list(differ.compare(
@@ -67,3 +69,11 @@ def test_overprecision():
     assert matrix == '0.001 0 0.000167 0.001 0 0'
     # and here we had -9.48090361795083
     assert angle == '-9.4809'
+
+
+def test_encrypt_decrypt_roundtrip():
+    data = b'this is my plaintext \0\1\2\3'
+    encrypted = t1f.Type1Font._encrypt(data, 'eexec')
+    decrypted = t1f.Type1Font._decrypt(encrypted, 'eexec')
+    assert encrypted != decrypted
+    assert data == decrypted
diff --git a/lib/matplotlib/type1font.py b/lib/matplotlib/type1font.py
@@ -24,13 +24,16 @@
 import binascii
 import enum
 import itertools
+import logging
 import re
 import struct
 
 import numpy as np
 
 from matplotlib.cbook import _format_approx
+from . import _api
 
+_log = logging.getLogger(__name__)
 
 # token types
 _TokenType = enum.Enum('_TokenType',
@@ -46,10 +49,12 @@ class Type1Font:
     parts : tuple
         A 3-tuple of the cleartext part, the encrypted part, and the finale of
         zeros.
+    decrypted : bytes
+        The decrypted form of parts[1].
     prop : dict[str, Any]
         A dictionary of font properties.
     """
-    __slots__ = ('parts', 'prop')
+    __slots__ = ('parts', 'decrypted', 'prop')
 
     def __init__(self, input):
         """
@@ -68,6 +73,7 @@ def __init__(self, input):
                 data = self._read(file)
             self.parts = self._split(data)
 
+        self.decrypted = self._decrypt(self.parts[1], 'eexec')
         self._parse()
 
     def _read(self, file):
@@ -125,20 +131,71 @@ def _split(self, data):
                 zeros -= 1
             idx -= 1
         if zeros:
-            raise RuntimeError('Insufficiently many zeros in Type 1 font')
+            # this may have been a problem on old implementations that
+            # used the zeros as necessary padding
+            _log.info('Insufficiently many zeros in Type 1 font')
 
         # Convert encrypted part to binary (if we read a pfb file, we may end
         # up converting binary to hexadecimal to binary again; but if we read
         # a pfa file, this part is already in hex, and I am not quite sure if
         # even the pfb format guarantees that it will be in binary).
-        binary = binascii.unhexlify(data[len1:idx+1])
+        idx1 = len1 + ((idx - len1 + 2) & ~1)  # ensure an even number of bytes
+        binary = binascii.unhexlify(data[len1:idx1])
 
         return data[:len1], binary, data[idx+1:]
 
     _whitespace_or_comment_re = re.compile(br'[\0\t\r\014\n ]+|%[^\r\n\v]*')
     _token_re = re.compile(br'/{0,2}[^]\0\t\r\v\n ()<>{}/%[]+')
     _instring_re = re.compile(br'[()\\]')
 
+    @staticmethod
+    def _decrypt(ciphertext, key, ndiscard=4):
+        """
+        Decrypt ciphertext using the Type-1 font algorithm
+
+        The algorithm is described in Adobe's "Adobe Type 1 Font Format".
+        The key argument can be an integer, or one of the strings
+        'eexec' and 'charstring', which map to the key specified for the
+        corresponding part of Type-1 fonts.
+
+        The ndiscard argument should be an integer, usually 4.
+        That number of bytes is discarded from the beginning of plaintext.
+        """
+
+        key = _api.check_getitem({'eexec': 55665, 'charstring': 4330}, key=key)
+        plaintext = []
+        for byte in ciphertext:
+            plaintext.append(byte ^ (key >> 8))
+            key = ((key+byte) * 52845 + 22719) & 0xffff
+
+        return bytes(plaintext[ndiscard:])
+
+    @staticmethod
+    def _encrypt(plaintext, key, ndiscard=4):
+        """
+        Encrypt plaintext using the Type-1 font algorithm
+
+        The algorithm is described in Adobe's "Adobe Type 1 Font Format".
+        The key argument can be an integer, or one of the strings
+        'eexec' and 'charstring', which map to the key specified for the
+        corresponding part of Type-1 fonts.
+
+        The ndiscard argument should be an integer, usually 4. That
+        number of bytes is prepended to the plaintext before encryption.
+        This function prepends NUL bytes for reproducibility, even though
+        the original algorithm uses random bytes, presumably to avoid
+        cryptanalysis.
+        """
+
+        key = _api.check_getitem({'eexec': 55665, 'charstring': 4330}, key=key)
+        ciphertext = []
+        for byte in b'\0' * ndiscard + plaintext:
+            c = byte ^ (key >> 8)
+            ciphertext.append(c)
+            key = ((key + c) * 52845 + 22719) & 0xffff
+
+        return bytes(ciphertext)
+
     @classmethod
     def _tokens(cls, text):
         """