@@ -103,6 +103,58 @@ def font_as_file(font):
103
103
return fh
104
104
105
105
106
+ class GlyphMap :
107
+ """
108
+ A two-way glyph mapping.
109
+
110
+ The forward glyph map is from (character code, glyph index)-pairs to (subset index,
111
+ subset character code)-pairs.
112
+
113
+ The inverse glyph map is from to (subset index, subset character code)-pairs to
114
+ (character code, glyph index)-pairs.
115
+ """
116
+
117
+ def __init__ (self ) -> None :
118
+ self ._forward : dict [tuple [CharacterCodeType , GlyphIndexType ],
119
+ tuple [int , CharacterCodeType ]] = {}
120
+ self ._inverse : dict [tuple [int , CharacterCodeType ],
121
+ tuple [CharacterCodeType , GlyphIndexType ]] = {}
122
+
123
+ def get (self , charcode : CharacterCodeType ,
124
+ glyph_index : GlyphIndexType ) -> tuple [int , CharacterCodeType ] | None :
125
+ """
126
+ Get the forward mapping from a (character code, glyph index)-pair.
127
+
128
+ This may return *None* if the pair is not currently mapped.
129
+ """
130
+ return self ._forward .get ((charcode , glyph_index ))
131
+
132
+ def iget (self , subset : int ,
133
+ subset_charcode : CharacterCodeType ) -> tuple [CharacterCodeType ,
134
+ GlyphIndexType ]:
135
+ """Get the inverse mapping from a (subset, subset charcode)-pair."""
136
+ return self ._inverse [(subset , subset_charcode )]
137
+
138
+ def add (self , charcode : CharacterCodeType , glyph_index : GlyphIndexType , subset : int ,
139
+ subset_charcode : CharacterCodeType ) -> None :
140
+ """
141
+ Add a mapping to this instance.
142
+
143
+ Parameters
144
+ ----------
145
+ charcode : CharacterCodeType
146
+ The character code to record.
147
+ glyph : GlyphIndexType
148
+ The corresponding glyph index to record.
149
+ subset : int
150
+ The subset in which the subset character code resides.
151
+ subset_charcode : CharacterCodeType
152
+ The subset character code within the above subset.
153
+ """
154
+ self ._forward [(charcode , glyph_index )] = (subset , subset_charcode )
155
+ self ._inverse [(subset , subset_charcode )] = (charcode , glyph_index )
156
+
157
+
106
158
class CharacterTracker :
107
159
"""
108
160
Helper for font subsetting by the PDF and PS backends.
@@ -114,16 +166,20 @@ class CharacterTracker:
114
166
----------
115
167
subset_size : int
116
168
The size at which characters are grouped into subsets.
117
- used : dict[tuple[str, int], dict[CharacterCodeType, GlyphIndexType]]
169
+ used : dict
118
170
A dictionary of font files to character maps.
119
171
120
- The key is a font filename and subset within that font .
172
+ The key is a font filename.
121
173
122
- The value is a dictionary mapping a character code to a glyph index. Note this
123
- mapping is the inverse of FreeType, which maps glyph indices to character codes.
174
+ The value is a list of dictionaries, each mapping at most *subset_size*
175
+ character codes to glyph indices. Note this mapping is the inverse of FreeType,
176
+ which maps glyph indices to character codes.
124
177
125
178
If *subset_size* is not set, then there will only be one subset per font
126
179
filename.
180
+ glyph_maps : dict
181
+ A dictionary of font files to glyph maps. You probably will want to use the
182
+ `.subset_to_unicode` method instead of this attribute.
127
183
"""
128
184
129
185
def __init__ (self , subset_size : int = 0 ):
@@ -134,7 +190,8 @@ def __init__(self, subset_size: int = 0):
134
190
The maximum size that is supported for an embedded font. If provided, then
135
191
characters will be grouped into these sized subsets.
136
192
"""
137
- self .used : dict [tuple [str , int ], dict [CharacterCodeType , GlyphIndexType ]] = {}
193
+ self .used : dict [str , list [dict [CharacterCodeType , GlyphIndexType ]]] = {}
194
+ self .glyph_maps : dict [str , GlyphMap ] = {}
138
195
self .subset_size = subset_size
139
196
140
197
def track (self , font : FT2Font , s : str ) -> list [tuple [int , CharacterCodeType ]]:
@@ -186,33 +243,50 @@ def track_glyph(
186
243
The character code within the above subset. If *subset_size* was not
187
244
specified on this instance, then this is just *charcode* unmodified.
188
245
"""
189
- if self .subset_size != 0 :
190
- subset = charcode // self .subset_size
191
- subset_charcode = charcode % self .subset_size
246
+ glyph_map = self .glyph_maps .setdefault (font .fname , GlyphMap ())
247
+ if result := glyph_map .get (charcode , glyph ):
248
+ return result
249
+
250
+ subset_maps = self .used .setdefault (font .fname , [{}])
251
+ # Default to preserving the character code as it was.
252
+ use_next_charmap = (
253
+ self .subset_size != 0
254
+ # But start filling a new subset if outside the first block; this preserves
255
+ # ASCII (for Type 3) or the Basic Multilingual Plane (for Type 42).
256
+ and charcode >= self .subset_size
257
+ )
258
+ if use_next_charmap :
259
+ if len (subset_maps ) == 1 or len (subset_maps [- 1 ]) == self .subset_size :
260
+ subset_maps .append ({})
261
+ subset = len (subset_maps ) - 1
262
+ subset_charcode = len (subset_maps [- 1 ])
192
263
else :
193
264
subset = 0
194
265
subset_charcode = charcode
195
- self .used .setdefault ((font .fname , subset ), {})[subset_charcode ] = glyph
266
+ subset_maps [subset ][subset_charcode ] = glyph
267
+ glyph_map .add (charcode , glyph , subset , subset_charcode )
196
268
return (subset , subset_charcode )
197
269
198
- def subset_to_unicode (self , index : int ,
199
- charcode : CharacterCodeType ) -> CharacterCodeType :
270
+ def subset_to_unicode (self , fontname : str , subset : int ,
271
+ subset_charcode : CharacterCodeType ) -> CharacterCodeType :
200
272
"""
201
273
Map a subset index and character code to a Unicode character code.
202
274
203
275
Parameters
204
276
----------
205
- index : int
277
+ fontname : str
278
+ The name of the font, from the *used* dictionary key.
279
+ subset : int
206
280
The subset index within a font.
207
- charcode : CharacterCodeType
281
+ subset_charcode : CharacterCodeType
208
282
The character code within a subset to map back.
209
283
210
284
Returns
211
285
-------
212
286
CharacterCodeType
213
287
The Unicode character code corresponding to the subsetted one.
214
288
"""
215
- return index * self .subset_size + charcode
289
+ return self .glyph_maps [ fontname ]. iget ( subset , subset_charcode )[ 0 ]
216
290
217
291
218
292
class RendererPDFPSBase (RendererBase ):
0 commit comments