@@ -114,16 +114,21 @@ class CharacterTracker:
114
114
----------
115
115
subset_size : int
116
116
The size at which characters are grouped into subsets.
117
- used : dict[tuple[str, int], dict[CharacterCodeType, GlyphIndexType]]
117
+ used : dict
118
118
A dictionary of font files to character maps.
119
119
120
- The key is a font filename and subset within that font .
120
+ The key is a font filename.
121
121
122
- The value is a dictionary mapping a character code to a glyph index. Note this
123
- mapping is the inverse of FreeType, which maps glyph indices to character codes.
122
+ The value is a list of dictionaries, each mapping at most *subset_size*
123
+ character codes to glyph indices. Note this mapping is the inverse of FreeType,
124
+ which maps glyph indices to character codes.
124
125
125
126
If *subset_size* is not set, then there will only be one subset per font
126
127
filename.
128
+ glyph_map : dict
129
+ A dictionary of font files to glyph maps. The glyph map is from (character code,
130
+ glyph index)-pairs to (subset index, subset character code)-pairs. You probably
131
+ will want to use the `.subset_to_unicode` method instead of this attribute.
127
132
"""
128
133
129
134
def __init__ (self , subset_size : int = 0 ):
@@ -134,7 +139,10 @@ def __init__(self, subset_size: int = 0):
134
139
The maximum size that is supported for an embedded font. If provided, then
135
140
characters will be grouped into these sized subsets.
136
141
"""
137
- self .used : dict [tuple [str , int ], dict [CharacterCodeType , GlyphIndexType ]] = {}
142
+ self .used : dict [str , list [dict [CharacterCodeType , GlyphIndexType ]]] = {}
143
+ self .glyph_map : dict [str ,
144
+ dict [tuple [CharacterCodeType , GlyphIndexType ],
145
+ tuple [int , CharacterCodeType ]]] = {}
138
146
self .subset_size = subset_size
139
147
140
148
def track (self , font : FT2Font , s : str ) -> list [tuple [int , CharacterCodeType ]]:
@@ -186,22 +194,39 @@ def track_glyph(
186
194
The character code within the above subset. If *subset_size* was not
187
195
specified on this instance, then this is just *charcode* unmodified.
188
196
"""
197
+ glyph_map = self .glyph_map .setdefault (font .fname , {})
198
+ key = (charcode , glyph )
199
+ if key in glyph_map :
200
+ return glyph_map [key ]
201
+
202
+ subset_maps = self .used .setdefault (font .fname , [{}])
203
+ # Default to preserving the character code as it was.
204
+ subset = 0
205
+ subset_charcode = charcode
206
+ use_next_charmap = False
189
207
if self .subset_size != 0 :
190
- subset = charcode // self .subset_size
191
- subset_charcode = charcode % self .subset_size
192
- else :
193
- subset = 0
194
- subset_charcode = charcode
195
- self .used .setdefault ((font .fname , subset ), {})[subset_charcode ] = glyph
208
+ # But start filling a new subset if outside the first block; this preserves
209
+ # ASCII (for Type 3) or the Basic Multilingual Plane (for Type 42).
210
+ if charcode >= self .subset_size :
211
+ use_next_charmap = True
212
+ if use_next_charmap :
213
+ if len (subset_maps ) == 1 or len (subset_maps [- 1 ]) == self .subset_size :
214
+ subset_maps .append ({})
215
+ subset = len (subset_maps ) - 1
216
+ subset_charcode = len (subset_maps [- 1 ])
217
+ subset_maps [subset ][subset_charcode ] = glyph
218
+ glyph_map [key ] = (subset , subset_charcode )
196
219
return (subset , subset_charcode )
197
220
198
- def subset_to_unicode (self , index : int ,
221
+ def subset_to_unicode (self , fontname : str , index : int ,
199
222
charcode : CharacterCodeType ) -> CharacterCodeType :
200
223
"""
201
224
Map a subset index and character code to a Unicode character code.
202
225
203
226
Parameters
204
227
----------
228
+ fontname : str
229
+ The name of the font, from the *used* dictionary key.
205
230
index : int
206
231
The subset index within a font.
207
232
charcode : CharacterCodeType
@@ -212,7 +237,11 @@ def subset_to_unicode(self, index: int,
212
237
CharacterCodeType
213
238
The Unicode character code corresponding to the subsetted one.
214
239
"""
215
- return index * self .subset_size + charcode
240
+ search = (index , charcode )
241
+ for orig_info , subset_info in self .glyph_map [fontname ].items ():
242
+ if search == subset_info :
243
+ return orig_info [0 ]
244
+ raise ValueError (f'{ charcode } does not exist in { fontname } subset { index } ' )
216
245
217
246
218
247
class RendererPDFPSBase (RendererBase ):
0 commit comments