Skip to content

Commit ed4ca6c

Browse files
authored
Merge pull request matplotlib#30608 from QuLogic/simpler-track
Prepare `CharacterTracker` for advanced font features
2 parents a1ed4ef + ed5e074 commit ed4ca6c

File tree

3 files changed

+180
-85
lines changed

3 files changed

+180
-85
lines changed

lib/matplotlib/backends/_backend_pdf_ps.py

Lines changed: 120 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@
2222
from fontTools.ttLib import TTFont
2323

2424

25+
_FONT_MAX_GLYPH = {
26+
3: 256,
27+
42: 65536,
28+
}
29+
30+
2531
@functools.lru_cache(50)
2632
def _cached_get_afm_from_fname(fname):
2733
with open(fname, "rb") as fh:
@@ -103,6 +109,57 @@ def font_as_file(font):
103109
return fh
104110

105111

112+
class GlyphMap:
113+
"""
114+
A two-way glyph mapping.
115+
116+
The forward glyph map is from (character string, glyph index)-pairs to
117+
(subset index, subset character code)-pairs.
118+
119+
The inverse glyph map is from to (subset index, subset character code)-pairs to
120+
(character string, glyph index)-pairs.
121+
"""
122+
123+
def __init__(self) -> None:
124+
self._forward: dict[tuple[CharacterCodeType, GlyphIndexType],
125+
tuple[int, CharacterCodeType]] = {}
126+
self._inverse: dict[tuple[int, CharacterCodeType],
127+
tuple[CharacterCodeType, GlyphIndexType]] = {}
128+
129+
def get(self, charcodes: str,
130+
glyph_index: GlyphIndexType) -> tuple[int, CharacterCodeType] | None:
131+
"""
132+
Get the forward mapping from a (character string, glyph index)-pair.
133+
134+
This may return *None* if the pair is not currently mapped.
135+
"""
136+
return self._forward.get((charcodes, glyph_index))
137+
138+
def iget(self, subset: int,
139+
subset_charcode: CharacterCodeType) -> tuple[str, GlyphIndexType]:
140+
"""Get the inverse mapping from a (subset, subset charcode)-pair."""
141+
return self._inverse[(subset, subset_charcode)]
142+
143+
def add(self, charcode: str, glyph_index: GlyphIndexType, subset: int,
144+
subset_charcode: CharacterCodeType) -> None:
145+
"""
146+
Add a mapping to this instance.
147+
148+
Parameters
149+
----------
150+
charcode : CharacterCodeType
151+
The character code to record.
152+
glyph : GlyphIndexType
153+
The corresponding glyph index to record.
154+
subset : int
155+
The subset in which the subset character code resides.
156+
subset_charcode : CharacterCodeType
157+
The subset character code within the above subset.
158+
"""
159+
self._forward[(charcode, glyph_index)] = (subset, subset_charcode)
160+
self._inverse[(subset, subset_charcode)] = (charcode, glyph_index)
161+
162+
106163
class CharacterTracker:
107164
"""
108165
Helper for font subsetting by the PDF and PS backends.
@@ -114,16 +171,20 @@ class CharacterTracker:
114171
----------
115172
subset_size : int
116173
The size at which characters are grouped into subsets.
117-
used : dict[tuple[str, int], dict[CharacterCodeType, GlyphIndexType]]
174+
used : dict
118175
A dictionary of font files to character maps.
119176
120-
The key is a font filename and subset within that font.
177+
The key is a font filename.
121178
122-
The value is a dictionary mapping a character code to a glyph index. Note this
123-
mapping is the inverse of FreeType, which maps glyph indices to character codes.
179+
The value is a list of dictionaries, each mapping at most *subset_size*
180+
character codes to glyph indices. Note this mapping is the inverse of FreeType,
181+
which maps glyph indices to character codes.
124182
125183
If *subset_size* is not set, then there will only be one subset per font
126184
filename.
185+
glyph_maps : dict
186+
A dictionary of font files to glyph maps. You probably will want to use the
187+
`.subset_to_unicode` method instead of this attribute.
127188
"""
128189

129190
def __init__(self, subset_size: int = 0):
@@ -134,7 +195,8 @@ def __init__(self, subset_size: int = 0):
134195
The maximum size that is supported for an embedded font. If provided, then
135196
characters will be grouped into these sized subsets.
136197
"""
137-
self.used: dict[tuple[str, int], dict[CharacterCodeType, GlyphIndexType]] = {}
198+
self.used: dict[str, list[dict[CharacterCodeType, GlyphIndexType]]] = {}
199+
self.glyph_maps: dict[str, GlyphMap] = {}
138200
self.subset_size = subset_size
139201

140202
def track(self, font: FT2Font, s: str) -> list[tuple[int, CharacterCodeType]]:
@@ -157,33 +219,24 @@ def track(self, font: FT2Font, s: str) -> list[tuple[int, CharacterCodeType]]:
157219
whole). If *subset_size* is not specified, then the subset will always be 0
158220
and the character codes will be returned from the string unchanged.
159221
"""
160-
font_glyphs = []
161-
char_to_font = font._get_fontmap(s)
162-
for _c, _f in char_to_font.items():
163-
charcode = ord(_c)
164-
glyph_index = _f.get_char_index(charcode)
165-
if self.subset_size != 0:
166-
subset = charcode // self.subset_size
167-
subset_charcode = charcode % self.subset_size
168-
else:
169-
subset = 0
170-
subset_charcode = charcode
171-
self.used.setdefault((_f.fname, subset), {})[subset_charcode] = glyph_index
172-
font_glyphs.append((subset, subset_charcode))
173-
return font_glyphs
174-
175-
def track_glyph(
176-
self, font: FT2Font, charcode: CharacterCodeType,
177-
glyph: GlyphIndexType) -> tuple[int, CharacterCodeType]:
222+
return [
223+
self.track_glyph(f, ord(c), f.get_char_index(ord(c)))
224+
for c, f in font._get_fontmap(s).items()
225+
]
226+
227+
def track_glyph(self, font: FT2Font, chars: str | CharacterCodeType,
228+
glyph: GlyphIndexType) -> tuple[int, CharacterCodeType]:
178229
"""
179230
Record character code *charcode* at glyph index *glyph* as using font *font*.
180231
181232
Parameters
182233
----------
183234
font : FT2Font
184235
A font that is being used for the provided string.
185-
charcode : CharacterCodeType
186-
The character code to record.
236+
chars : str or CharacterCodeType
237+
The character(s) to record. This may be a single character code, or multiple
238+
characters in a string, if the glyph maps to several characters. It will be
239+
normalized to a string internally.
187240
glyph : GlyphIndexType
188241
The corresponding glyph index to record.
189242
@@ -196,33 +249,64 @@ def track_glyph(
196249
The character code within the above subset. If *subset_size* was not
197250
specified on this instance, then this is just *charcode* unmodified.
198251
"""
199-
if self.subset_size != 0:
200-
subset = charcode // self.subset_size
201-
subset_charcode = charcode % self.subset_size
252+
if isinstance(chars, str):
253+
charcode = ord(chars[0])
254+
else:
255+
charcode = chars
256+
chars = chr(chars)
257+
258+
glyph_map = self.glyph_maps.setdefault(font.fname, GlyphMap())
259+
if result := glyph_map.get(chars, glyph):
260+
return result
261+
262+
subset_maps = self.used.setdefault(font.fname, [{}])
263+
use_next_charmap = (
264+
# Multi-character glyphs always go in the non-0 subset.
265+
len(chars) > 1 or
266+
# Default to preserving the character code as it was.
267+
self.subset_size != 0
268+
and (
269+
# But start filling a new subset if outside the first block; this
270+
# preserves ASCII (for Type 3) or the Basic Multilingual Plane (for
271+
# Type 42).
272+
charcode >= self.subset_size
273+
# Or, use a new subset if the character code is already mapped for the
274+
# first block. This means it's using an alternate glyph.
275+
or charcode in subset_maps[0]
276+
)
277+
)
278+
if use_next_charmap:
279+
if len(subset_maps) == 1 or len(subset_maps[-1]) == self.subset_size:
280+
subset_maps.append({})
281+
subset = len(subset_maps) - 1
282+
subset_charcode = len(subset_maps[-1])
202283
else:
203284
subset = 0
204285
subset_charcode = charcode
205-
self.used.setdefault((font.fname, subset), {})[subset_charcode] = glyph
286+
subset_maps[subset][subset_charcode] = glyph
287+
glyph_map.add(chars, glyph, subset, subset_charcode)
206288
return (subset, subset_charcode)
207289

208-
def subset_to_unicode(self, index: int,
209-
charcode: CharacterCodeType) -> CharacterCodeType:
290+
def subset_to_unicode(self, fontname: str, subset: int,
291+
subset_charcode: CharacterCodeType) -> str:
210292
"""
211293
Map a subset index and character code to a Unicode character code.
212294
213295
Parameters
214296
----------
215-
index : int
297+
fontname : str
298+
The name of the font, from the *used* dictionary key.
299+
subset : int
216300
The subset index within a font.
217-
charcode : CharacterCodeType
301+
subset_charcode : CharacterCodeType
218302
The character code within a subset to map back.
219303
220304
Returns
221305
-------
222-
CharacterCodeType
223-
The Unicode character code corresponding to the subsetted one.
306+
str
307+
The Unicode character(s) corresponding to the subsetted character code.
224308
"""
225-
return index * self.subset_size + charcode
309+
return self.glyph_maps[fontname].iget(subset, subset_charcode)[0]
226310

227311

228312
class RendererPDFPSBase(RendererBase):

lib/matplotlib/backends/backend_pdf.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -368,12 +368,6 @@ def pdfRepr(obj):
368368
"objects")
369369

370370

371-
_FONT_MAX_GLYPH = {
372-
3: 256,
373-
42: 65536,
374-
}
375-
376-
377371
class Reference:
378372
"""
379373
PDF reference object.
@@ -691,7 +685,7 @@ def __init__(self, filename, metadata=None):
691685
self._fontNames = {} # maps filenames to internal font names
692686
self._dviFontInfo = {} # maps pdf names to dvifonts
693687
self._character_tracker = _backend_pdf_ps.CharacterTracker(
694-
_FONT_MAX_GLYPH.get(mpl.rcParams['pdf.fonttype'], 0))
688+
_backend_pdf_ps._FONT_MAX_GLYPH.get(mpl.rcParams['ps.fonttype'], 0))
695689

696690
self.alphaStates = {} # maps alpha values to graphics state objects
697691
self._alpha_state_seq = (Name(f'A{i}') for i in itertools.count(1))
@@ -948,9 +942,8 @@ def writeFonts(self):
948942
else:
949943
# a normal TrueType font
950944
_log.debug('Writing TrueType font.')
951-
charmap = self._character_tracker.used.get((filename, subset))
952-
if charmap:
953-
fonts[Fx] = self.embedTTF(filename, subset, charmap)
945+
charmap = self._character_tracker.used[filename][subset]
946+
fonts[Fx] = self.embedTTF(filename, subset, charmap)
954947
self.writeObject(self.fontObject, fonts)
955948

956949
def _write_afm_font(self, filename):
@@ -992,8 +985,12 @@ def _embedTeXFont(self, dvifont):
992985

993986
# Reduce the font to only the glyphs used in the document, get the encoding
994987
# for that subset, and compute various properties based on the encoding.
995-
charmap = self._character_tracker.used[(dvifont.fname, 0)]
996-
chars = frozenset(charmap.keys())
988+
charmap = self._character_tracker.used[dvifont.fname][0]
989+
chars = {
990+
# DVI type 1 fonts always map single glyph to single character.
991+
ord(self._character_tracker.subset_to_unicode(dvifont.fname, 0, ccode))
992+
for ccode in charmap
993+
}
997994
t1font = t1font.subset(chars, self._get_subset_prefix(charmap.values()))
998995
fontdict['BaseFont'] = Name(t1font.prop['FontName'])
999996
# createType1Descriptor writes the font data as a side effect
@@ -1144,14 +1141,16 @@ def generate_unicode_cmap(subset_index, charmap):
11441141
unicode_groups[-1][1] = ccode
11451142
last_ccode = ccode
11461143

1144+
def _to_unicode(ccode):
1145+
chars = self._character_tracker.subset_to_unicode(
1146+
filename, subset_index, ccode)
1147+
hexstr = chars.encode('utf-16be').hex()
1148+
return f'<{hexstr}>'
1149+
11471150
width = 2 if fonttype == 3 else 4
11481151
unicode_bfrange = []
11491152
for start, end in unicode_groups:
1150-
real_start = self._character_tracker.subset_to_unicode(subset_index,
1151-
start)
1152-
real_end = self._character_tracker.subset_to_unicode(subset_index, end)
1153-
real_values = ' '.join('<%s>' % chr(x).encode('utf-16be').hex()
1154-
for x in range(real_start, real_end+1))
1153+
real_values = ' '.join(_to_unicode(x) for x in range(start, end+1))
11551154
unicode_bfrange.append(
11561155
f'<{start:0{width}x}> <{end:0{width}x}> [{real_values}]')
11571156
unicode_cmap = (self._identityToUnicodeCMap %
@@ -2325,7 +2324,7 @@ def output_singlebyte_chunk(kerns_or_chars):
23252324
for item in _text_helpers.layout(s, font, kern_mode=Kerning.UNFITTED,
23262325
language=language):
23272326
subset, charcode = self.file._character_tracker.track_glyph(
2328-
item.ft_object, ord(item.char), item.glyph_index)
2327+
item.ft_object, item.char, item.glyph_index)
23292328
if (item.ft_object, subset) != prev_font:
23302329
if singlebyte_chunk:
23312330
output_singlebyte_chunk(singlebyte_chunk)

0 commit comments

Comments
 (0)