22
22
from fontTools .ttLib import TTFont
23
23
24
24
25
+ _FONT_MAX_GLYPH = {
26
+ 3 : 256 ,
27
+ 42 : 65536 ,
28
+ }
29
+
30
+
25
31
@functools .lru_cache (50 )
26
32
def _cached_get_afm_from_fname (fname ):
27
33
with open (fname , "rb" ) as fh :
@@ -103,6 +109,57 @@ def font_as_file(font):
103
109
return fh
104
110
105
111
112
+ class GlyphMap :
113
+ """
114
+ A two-way glyph mapping.
115
+
116
+ The forward glyph map is from (character string, glyph index)-pairs to
117
+ (subset index, subset character code)-pairs.
118
+
119
+ The inverse glyph map is from to (subset index, subset character code)-pairs to
120
+ (character string, glyph index)-pairs.
121
+ """
122
+
123
+ def __init__ (self ) -> None :
124
+ self ._forward : dict [tuple [CharacterCodeType , GlyphIndexType ],
125
+ tuple [int , CharacterCodeType ]] = {}
126
+ self ._inverse : dict [tuple [int , CharacterCodeType ],
127
+ tuple [CharacterCodeType , GlyphIndexType ]] = {}
128
+
129
+ def get (self , charcodes : str ,
130
+ glyph_index : GlyphIndexType ) -> tuple [int , CharacterCodeType ] | None :
131
+ """
132
+ Get the forward mapping from a (character string, glyph index)-pair.
133
+
134
+ This may return *None* if the pair is not currently mapped.
135
+ """
136
+ return self ._forward .get ((charcodes , glyph_index ))
137
+
138
+ def iget (self , subset : int ,
139
+ subset_charcode : CharacterCodeType ) -> tuple [str , GlyphIndexType ]:
140
+ """Get the inverse mapping from a (subset, subset charcode)-pair."""
141
+ return self ._inverse [(subset , subset_charcode )]
142
+
143
+ def add (self , charcode : str , glyph_index : GlyphIndexType , subset : int ,
144
+ subset_charcode : CharacterCodeType ) -> None :
145
+ """
146
+ Add a mapping to this instance.
147
+
148
+ Parameters
149
+ ----------
150
+ charcode : CharacterCodeType
151
+ The character code to record.
152
+ glyph : GlyphIndexType
153
+ The corresponding glyph index to record.
154
+ subset : int
155
+ The subset in which the subset character code resides.
156
+ subset_charcode : CharacterCodeType
157
+ The subset character code within the above subset.
158
+ """
159
+ self ._forward [(charcode , glyph_index )] = (subset , subset_charcode )
160
+ self ._inverse [(subset , subset_charcode )] = (charcode , glyph_index )
161
+
162
+
106
163
class CharacterTracker :
107
164
"""
108
165
Helper for font subsetting by the PDF and PS backends.
@@ -114,16 +171,20 @@ class CharacterTracker:
114
171
----------
115
172
subset_size : int
116
173
The size at which characters are grouped into subsets.
117
- used : dict[tuple[str, int], dict[CharacterCodeType, GlyphIndexType]]
174
+ used : dict
118
175
A dictionary of font files to character maps.
119
176
120
- The key is a font filename and subset within that font .
177
+ The key is a font filename.
121
178
122
- The value is a dictionary mapping a character code to a glyph index. Note this
123
- mapping is the inverse of FreeType, which maps glyph indices to character codes.
179
+ The value is a list of dictionaries, each mapping at most *subset_size*
180
+ character codes to glyph indices. Note this mapping is the inverse of FreeType,
181
+ which maps glyph indices to character codes.
124
182
125
183
If *subset_size* is not set, then there will only be one subset per font
126
184
filename.
185
+ glyph_maps : dict
186
+ A dictionary of font files to glyph maps. You probably will want to use the
187
+ `.subset_to_unicode` method instead of this attribute.
127
188
"""
128
189
129
190
def __init__ (self , subset_size : int = 0 ):
@@ -134,7 +195,8 @@ def __init__(self, subset_size: int = 0):
134
195
The maximum size that is supported for an embedded font. If provided, then
135
196
characters will be grouped into these sized subsets.
136
197
"""
137
- self .used : dict [tuple [str , int ], dict [CharacterCodeType , GlyphIndexType ]] = {}
198
+ self .used : dict [str , list [dict [CharacterCodeType , GlyphIndexType ]]] = {}
199
+ self .glyph_maps : dict [str , GlyphMap ] = {}
138
200
self .subset_size = subset_size
139
201
140
202
def track (self , font : FT2Font , s : str ) -> list [tuple [int , CharacterCodeType ]]:
@@ -157,33 +219,24 @@ def track(self, font: FT2Font, s: str) -> list[tuple[int, CharacterCodeType]]:
157
219
whole). If *subset_size* is not specified, then the subset will always be 0
158
220
and the character codes will be returned from the string unchanged.
159
221
"""
160
- font_glyphs = []
161
- char_to_font = font ._get_fontmap (s )
162
- for _c , _f in char_to_font .items ():
163
- charcode = ord (_c )
164
- glyph_index = _f .get_char_index (charcode )
165
- if self .subset_size != 0 :
166
- subset = charcode // self .subset_size
167
- subset_charcode = charcode % self .subset_size
168
- else :
169
- subset = 0
170
- subset_charcode = charcode
171
- self .used .setdefault ((_f .fname , subset ), {})[subset_charcode ] = glyph_index
172
- font_glyphs .append ((subset , subset_charcode ))
173
- return font_glyphs
174
-
175
- def track_glyph (
176
- self , font : FT2Font , charcode : CharacterCodeType ,
177
- glyph : GlyphIndexType ) -> tuple [int , CharacterCodeType ]:
222
+ return [
223
+ self .track_glyph (f , ord (c ), f .get_char_index (ord (c )))
224
+ for c , f in font ._get_fontmap (s ).items ()
225
+ ]
226
+
227
+ def track_glyph (self , font : FT2Font , chars : str | CharacterCodeType ,
228
+ glyph : GlyphIndexType ) -> tuple [int , CharacterCodeType ]:
178
229
"""
179
230
Record character code *charcode* at glyph index *glyph* as using font *font*.
180
231
181
232
Parameters
182
233
----------
183
234
font : FT2Font
184
235
A font that is being used for the provided string.
185
- charcode : CharacterCodeType
186
- The character code to record.
236
+ chars : str or CharacterCodeType
237
+ The character(s) to record. This may be a single character code, or multiple
238
+ characters in a string, if the glyph maps to several characters. It will be
239
+ normalized to a string internally.
187
240
glyph : GlyphIndexType
188
241
The corresponding glyph index to record.
189
242
@@ -196,33 +249,64 @@ def track_glyph(
196
249
The character code within the above subset. If *subset_size* was not
197
250
specified on this instance, then this is just *charcode* unmodified.
198
251
"""
199
- if self .subset_size != 0 :
200
- subset = charcode // self .subset_size
201
- subset_charcode = charcode % self .subset_size
252
+ if isinstance (chars , str ):
253
+ charcode = ord (chars [0 ])
254
+ else :
255
+ charcode = chars
256
+ chars = chr (chars )
257
+
258
+ glyph_map = self .glyph_maps .setdefault (font .fname , GlyphMap ())
259
+ if result := glyph_map .get (chars , glyph ):
260
+ return result
261
+
262
+ subset_maps = self .used .setdefault (font .fname , [{}])
263
+ use_next_charmap = (
264
+ # Multi-character glyphs always go in the non-0 subset.
265
+ len (chars ) > 1 or
266
+ # Default to preserving the character code as it was.
267
+ self .subset_size != 0
268
+ and (
269
+ # But start filling a new subset if outside the first block; this
270
+ # preserves ASCII (for Type 3) or the Basic Multilingual Plane (for
271
+ # Type 42).
272
+ charcode >= self .subset_size
273
+ # Or, use a new subset if the character code is already mapped for the
274
+ # first block. This means it's using an alternate glyph.
275
+ or charcode in subset_maps [0 ]
276
+ )
277
+ )
278
+ if use_next_charmap :
279
+ if len (subset_maps ) == 1 or len (subset_maps [- 1 ]) == self .subset_size :
280
+ subset_maps .append ({})
281
+ subset = len (subset_maps ) - 1
282
+ subset_charcode = len (subset_maps [- 1 ])
202
283
else :
203
284
subset = 0
204
285
subset_charcode = charcode
205
- self .used .setdefault ((font .fname , subset ), {})[subset_charcode ] = glyph
286
+ subset_maps [subset ][subset_charcode ] = glyph
287
+ glyph_map .add (chars , glyph , subset , subset_charcode )
206
288
return (subset , subset_charcode )
207
289
208
- def subset_to_unicode (self , index : int ,
209
- charcode : CharacterCodeType ) -> CharacterCodeType :
290
+ def subset_to_unicode (self , fontname : str , subset : int ,
291
+ subset_charcode : CharacterCodeType ) -> str :
210
292
"""
211
293
Map a subset index and character code to a Unicode character code.
212
294
213
295
Parameters
214
296
----------
215
- index : int
297
+ fontname : str
298
+ The name of the font, from the *used* dictionary key.
299
+ subset : int
216
300
The subset index within a font.
217
- charcode : CharacterCodeType
301
+ subset_charcode : CharacterCodeType
218
302
The character code within a subset to map back.
219
303
220
304
Returns
221
305
-------
222
- CharacterCodeType
223
- The Unicode character code corresponding to the subsetted one .
306
+ str
307
+ The Unicode character(s) corresponding to the subsetted character code .
224
308
"""
225
- return index * self .subset_size + charcode
309
+ return self .glyph_maps [ fontname ]. iget ( subset , subset_charcode )[ 0 ]
226
310
227
311
228
312
class RendererPDFPSBase (RendererBase ):
0 commit comments