Skip to content

Commit 57db5a7

Browse files
authored
Merge pull request matplotlib#20634 from jkseppan/type1-decrypt
Implement Type-1 decryption
2 parents 73b7abf + e095ccd commit 57db5a7

File tree

3 files changed

+78
-3
lines changed

3 files changed

+78
-3
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
``Type1Font`` objects now decrypt the encrypted part
2+
----------------------------------------------------
3+
4+
Type 1 fonts have a large part of their code encrypted as an obsolete
5+
copy-protection measure. This part is now available decrypted as the
6+
``decrypted`` attribute of :class:`~matplotlib.type1font.Type1Font`.
7+
This decrypted data is not yet parsed, but this is a prerequisite for
8+
implementing subsetting.

lib/matplotlib/tests/test_type1font.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ def test_Type1Font():
1515
assert font.parts[2] == rawdata[0x8985:0x8ba6]
1616
assert font.parts[1:] == slanted.parts[1:]
1717
assert font.parts[1:] == condensed.parts[1:]
18+
assert font.decrypted.startswith(b'dup\n/Private 18 dict dup begin')
19+
assert font.decrypted.endswith(b'mark currentfile closefile\n')
1820

1921
differ = difflib.Differ()
2022
diff = list(differ.compare(
@@ -67,3 +69,11 @@ def test_overprecision():
6769
assert matrix == '0.001 0 0.000167 0.001 0 0'
6870
# and here we had -9.48090361795083
6971
assert angle == '-9.4809'
72+
73+
74+
def test_encrypt_decrypt_roundtrip():
75+
data = b'this is my plaintext \0\1\2\3'
76+
encrypted = t1f.Type1Font._encrypt(data, 'eexec')
77+
decrypted = t1f.Type1Font._decrypt(encrypted, 'eexec')
78+
assert encrypted != decrypted
79+
assert data == decrypted

lib/matplotlib/type1font.py

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,16 @@
2424
import binascii
2525
import enum
2626
import itertools
27+
import logging
2728
import re
2829
import struct
2930

3031
import numpy as np
3132

3233
from matplotlib.cbook import _format_approx
34+
from . import _api
3335

36+
_log = logging.getLogger(__name__)
3437

3538
# token types
3639
_TokenType = enum.Enum('_TokenType',
@@ -46,10 +49,12 @@ class Type1Font:
4649
parts : tuple
4750
A 3-tuple of the cleartext part, the encrypted part, and the finale of
4851
zeros.
52+
decrypted : bytes
53+
The decrypted form of parts[1].
4954
prop : dict[str, Any]
5055
A dictionary of font properties.
5156
"""
52-
__slots__ = ('parts', 'prop')
57+
__slots__ = ('parts', 'decrypted', 'prop')
5358

5459
def __init__(self, input):
5560
"""
@@ -68,6 +73,7 @@ def __init__(self, input):
6873
data = self._read(file)
6974
self.parts = self._split(data)
7075

76+
self.decrypted = self._decrypt(self.parts[1], 'eexec')
7177
self._parse()
7278

7379
def _read(self, file):
@@ -125,20 +131,71 @@ def _split(self, data):
125131
zeros -= 1
126132
idx -= 1
127133
if zeros:
128-
raise RuntimeError('Insufficiently many zeros in Type 1 font')
134+
# this may have been a problem on old implementations that
135+
# used the zeros as necessary padding
136+
_log.info('Insufficiently many zeros in Type 1 font')
129137

130138
# Convert encrypted part to binary (if we read a pfb file, we may end
131139
# up converting binary to hexadecimal to binary again; but if we read
132140
# a pfa file, this part is already in hex, and I am not quite sure if
133141
# even the pfb format guarantees that it will be in binary).
134-
binary = binascii.unhexlify(data[len1:idx+1])
142+
idx1 = len1 + ((idx - len1 + 2) & ~1) # ensure an even number of bytes
143+
binary = binascii.unhexlify(data[len1:idx1])
135144

136145
return data[:len1], binary, data[idx+1:]
137146

138147
_whitespace_or_comment_re = re.compile(br'[\0\t\r\014\n ]+|%[^\r\n\v]*')
139148
_token_re = re.compile(br'/{0,2}[^]\0\t\r\v\n ()<>{}/%[]+')
140149
_instring_re = re.compile(br'[()\\]')
141150

151+
@staticmethod
152+
def _decrypt(ciphertext, key, ndiscard=4):
153+
"""
154+
Decrypt ciphertext using the Type-1 font algorithm
155+
156+
The algorithm is described in Adobe's "Adobe Type 1 Font Format".
157+
The key argument can be an integer, or one of the strings
158+
'eexec' and 'charstring', which map to the key specified for the
159+
corresponding part of Type-1 fonts.
160+
161+
The ndiscard argument should be an integer, usually 4.
162+
That number of bytes is discarded from the beginning of plaintext.
163+
"""
164+
165+
key = _api.check_getitem({'eexec': 55665, 'charstring': 4330}, key=key)
166+
plaintext = []
167+
for byte in ciphertext:
168+
plaintext.append(byte ^ (key >> 8))
169+
key = ((key+byte) * 52845 + 22719) & 0xffff
170+
171+
return bytes(plaintext[ndiscard:])
172+
173+
@staticmethod
174+
def _encrypt(plaintext, key, ndiscard=4):
175+
"""
176+
Encrypt plaintext using the Type-1 font algorithm
177+
178+
The algorithm is described in Adobe's "Adobe Type 1 Font Format".
179+
The key argument can be an integer, or one of the strings
180+
'eexec' and 'charstring', which map to the key specified for the
181+
corresponding part of Type-1 fonts.
182+
183+
The ndiscard argument should be an integer, usually 4. That
184+
number of bytes is prepended to the plaintext before encryption.
185+
This function prepends NUL bytes for reproducibility, even though
186+
the original algorithm uses random bytes, presumably to avoid
187+
cryptanalysis.
188+
"""
189+
190+
key = _api.check_getitem({'eexec': 55665, 'charstring': 4330}, key=key)
191+
ciphertext = []
192+
for byte in b'\0' * ndiscard + plaintext:
193+
c = byte ^ (key >> 8)
194+
ciphertext.append(c)
195+
key = ((key + c) * 52845 + 22719) & 0xffff
196+
197+
return bytes(ciphertext)
198+
142199
@classmethod
143200
def _tokens(cls, text):
144201
"""

0 commit comments

Comments
 (0)