Skip to content

Commit 8d651a6

Browse files
committed
Merge branch 'simpler-track' into branch libraqm-vector
2 parents 062b130 + 662ac58 commit 8d651a6

File tree

3 files changed

+147
-61
lines changed

3 files changed

+147
-61
lines changed

lib/matplotlib/backends/_backend_pdf_ps.py

Lines changed: 114 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,57 @@ def font_as_file(font):
103103
return fh
104104

105105

106+
class GlyphMap:
107+
"""
108+
A two-way glyph mapping.
109+
110+
The forward glyph map is from (character string, glyph index)-pairs to
111+
(subset index, subset character code)-pairs.
112+
113+
The inverse glyph map is from to (subset index, subset character code)-pairs to
114+
(character string, glyph index)-pairs.
115+
"""
116+
117+
def __init__(self) -> None:
118+
self._forward: dict[tuple[CharacterCodeType, GlyphIndexType],
119+
tuple[int, CharacterCodeType]] = {}
120+
self._inverse: dict[tuple[int, CharacterCodeType],
121+
tuple[CharacterCodeType, GlyphIndexType]] = {}
122+
123+
def get(self, charcodes: str,
124+
glyph_index: GlyphIndexType) -> tuple[int, CharacterCodeType] | None:
125+
"""
126+
Get the forward mapping from a (character string, glyph index)-pair.
127+
128+
This may return *None* if the pair is not currently mapped.
129+
"""
130+
return self._forward.get((charcodes, glyph_index))
131+
132+
def iget(self, subset: int,
133+
subset_charcode: CharacterCodeType) -> tuple[str, GlyphIndexType]:
134+
"""Get the inverse mapping from a (subset, subset charcode)-pair."""
135+
return self._inverse[(subset, subset_charcode)]
136+
137+
def add(self, charcode: str, glyph_index: GlyphIndexType, subset: int,
138+
subset_charcode: CharacterCodeType) -> None:
139+
"""
140+
Add a mapping to this instance.
141+
142+
Parameters
143+
----------
144+
charcode : CharacterCodeType
145+
The character code to record.
146+
glyph : GlyphIndexType
147+
The corresponding glyph index to record.
148+
subset : int
149+
The subset in which the subset character code resides.
150+
subset_charcode : CharacterCodeType
151+
The subset character code within the above subset.
152+
"""
153+
self._forward[(charcode, glyph_index)] = (subset, subset_charcode)
154+
self._inverse[(subset, subset_charcode)] = (charcode, glyph_index)
155+
156+
106157
class CharacterTracker:
107158
"""
108159
Helper for font subsetting by the PDF and PS backends.
@@ -114,16 +165,20 @@ class CharacterTracker:
114165
----------
115166
subset_size : int
116167
The size at which characters are grouped into subsets.
117-
used : dict[tuple[str, int], dict[CharacterCodeType, GlyphIndexType]]
168+
used : dict
118169
A dictionary of font files to character maps.
119170
120-
The key is a font filename and subset within that font.
171+
The key is a font filename.
121172
122-
The value is a dictionary mapping a character code to a glyph index. Note this
123-
mapping is the inverse of FreeType, which maps glyph indices to character codes.
173+
The value is a list of dictionaries, each mapping at most *subset_size*
174+
character codes to glyph indices. Note this mapping is the inverse of FreeType,
175+
which maps glyph indices to character codes.
124176
125177
If *subset_size* is not set, then there will only be one subset per font
126178
filename.
179+
glyph_maps : dict
180+
A dictionary of font files to glyph maps. You probably will want to use the
181+
`.subset_to_unicode` method instead of this attribute.
127182
"""
128183

129184
def __init__(self, subset_size: int = 0):
@@ -134,7 +189,8 @@ def __init__(self, subset_size: int = 0):
134189
The maximum size that is supported for an embedded font. If provided, then
135190
characters will be grouped into these sized subsets.
136191
"""
137-
self.used: dict[tuple[str, int], dict[CharacterCodeType, GlyphIndexType]] = {}
192+
self.used: dict[str, list[dict[CharacterCodeType, GlyphIndexType]]] = {}
193+
self.glyph_maps: dict[str, GlyphMap] = {}
138194
self.subset_size = subset_size
139195

140196
def track(self, font: FT2Font, s: str) -> list[tuple[int, CharacterCodeType]]:
@@ -157,33 +213,24 @@ def track(self, font: FT2Font, s: str) -> list[tuple[int, CharacterCodeType]]:
157213
whole). If *subset_size* is not specified, then the subset will always be 0
158214
and the character codes will be returned from the string unchanged.
159215
"""
160-
font_glyphs = []
161-
char_to_font = font._get_fontmap(s)
162-
for _c, _f in char_to_font.items():
163-
charcode = ord(_c)
164-
glyph_index = _f.get_char_index(charcode)
165-
if self.subset_size != 0:
166-
subset = charcode // self.subset_size
167-
subset_charcode = charcode % self.subset_size
168-
else:
169-
subset = 0
170-
subset_charcode = charcode
171-
self.used.setdefault((_f.fname, subset), {})[subset_charcode] = glyph_index
172-
font_glyphs.append((subset, subset_charcode))
173-
return font_glyphs
174-
175-
def track_glyph(
176-
self, font: FT2Font, charcode: CharacterCodeType,
177-
glyph: GlyphIndexType) -> tuple[int, CharacterCodeType]:
216+
return [
217+
self.track_glyph(f, ord(c), f.get_char_index(ord(c)))
218+
for c, f in font._get_fontmap(s).items()
219+
]
220+
221+
def track_glyph(self, font: FT2Font, chars: str | CharacterCodeType,
222+
glyph: GlyphIndexType) -> tuple[int, CharacterCodeType]:
178223
"""
179224
Record character code *charcode* at glyph index *glyph* as using font *font*.
180225
181226
Parameters
182227
----------
183228
font : FT2Font
184229
A font that is being used for the provided string.
185-
charcode : CharacterCodeType
186-
The character code to record.
230+
chars : str or CharacterCodeType
231+
The character(s) to record. This may be a single character code, or multiple
232+
characters in a string, if the glyph maps to several characters. It will be
233+
normalized to a string internally.
187234
glyph : GlyphIndexType
188235
The corresponding glyph index to record.
189236
@@ -196,33 +243,64 @@ def track_glyph(
196243
The character code within the above subset. If *subset_size* was not
197244
specified on this instance, then this is just *charcode* unmodified.
198245
"""
199-
if self.subset_size != 0:
200-
subset = charcode // self.subset_size
201-
subset_charcode = charcode % self.subset_size
246+
if isinstance(chars, str):
247+
charcode = ord(chars[0])
248+
else:
249+
charcode = chars
250+
chars = chr(chars)
251+
252+
glyph_map = self.glyph_maps.setdefault(font.fname, GlyphMap())
253+
if result := glyph_map.get(chars, glyph):
254+
return result
255+
256+
subset_maps = self.used.setdefault(font.fname, [{}])
257+
use_next_charmap = (
258+
# Multi-character glyphs always go in the non-0 subset.
259+
len(chars) > 1 or
260+
# Default to preserving the character code as it was.
261+
self.subset_size != 0
262+
and (
263+
# But start filling a new subset if outside the first block; this
264+
# preserves ASCII (for Type 3) or the Basic Multilingual Plane (for
265+
# Type 42).
266+
charcode >= self.subset_size
267+
# Or, use a new subset if the character code is already mapped for the
268+
# first block. This means it's using an alternate glyph.
269+
or charcode in subset_maps[0]
270+
)
271+
)
272+
if use_next_charmap:
273+
if len(subset_maps) == 1 or len(subset_maps[-1]) == self.subset_size:
274+
subset_maps.append({})
275+
subset = len(subset_maps) - 1
276+
subset_charcode = len(subset_maps[-1])
202277
else:
203278
subset = 0
204279
subset_charcode = charcode
205-
self.used.setdefault((font.fname, subset), {})[subset_charcode] = glyph
280+
subset_maps[subset][subset_charcode] = glyph
281+
glyph_map.add(chars, glyph, subset, subset_charcode)
206282
return (subset, subset_charcode)
207283

208-
def subset_to_unicode(self, index: int,
209-
charcode: CharacterCodeType) -> CharacterCodeType:
284+
def subset_to_unicode(self, fontname: str, subset: int,
285+
subset_charcode: CharacterCodeType) -> str:
210286
"""
211287
Map a subset index and character code to a Unicode character code.
212288
213289
Parameters
214290
----------
215-
index : int
291+
fontname : str
292+
The name of the font, from the *used* dictionary key.
293+
subset : int
216294
The subset index within a font.
217-
charcode : CharacterCodeType
295+
subset_charcode : CharacterCodeType
218296
The character code within a subset to map back.
219297
220298
Returns
221299
-------
222-
CharacterCodeType
223-
The Unicode character code corresponding to the subsetted one.
300+
str
301+
The Unicode character(s) corresponding to the subsetted character code.
224302
"""
225-
return index * self.subset_size + charcode
303+
return self.glyph_maps[fontname].iget(subset, subset_charcode)[0]
226304

227305

228306
class RendererPDFPSBase(RendererBase):

lib/matplotlib/backends/backend_pdf.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -948,9 +948,11 @@ def writeFonts(self):
948948
else:
949949
# a normal TrueType font
950950
_log.debug('Writing TrueType font.')
951-
charmap = self._character_tracker.used.get((filename, subset))
952-
if charmap:
953-
fonts[Fx] = self.embedTTF(filename, subset, charmap)
951+
charmaps = self._character_tracker.used.get(filename, [])
952+
if charmaps:
953+
charmap = charmaps[subset]
954+
if charmap:
955+
fonts[Fx] = self.embedTTF(filename, subset, charmap)
954956
self.writeObject(self.fontObject, fonts)
955957

956958
def _write_afm_font(self, filename):
@@ -992,8 +994,12 @@ def _embedTeXFont(self, dvifont):
992994

993995
# Reduce the font to only the glyphs used in the document, get the encoding
994996
# for that subset, and compute various properties based on the encoding.
995-
charmap = self._character_tracker.used[(dvifont.fname, 0)]
996-
chars = frozenset(charmap.keys())
997+
charmap = self._character_tracker.used[dvifont.fname][0]
998+
chars = {
999+
# DVI fonts always map single glyph to single character.
1000+
ord(self._character_tracker.subset_to_unicode(dvifont.fname, 0, ccode))
1001+
for ccode in charmap
1002+
}
9971003
t1font = t1font.subset(chars, self._get_subset_prefix(charmap.values()))
9981004
fontdict['BaseFont'] = Name(t1font.prop['FontName'])
9991005
# createType1Descriptor writes the font data as a side effect
@@ -1144,14 +1150,16 @@ def generate_unicode_cmap(subset_index, charmap):
11441150
unicode_groups[-1][1] = ccode
11451151
last_ccode = ccode
11461152

1153+
def _to_unicode(ccode):
1154+
chars = self._character_tracker.subset_to_unicode(
1155+
filename, subset_index, ccode)
1156+
hexstr = chars.encode('utf-16be').hex()
1157+
return f'<{hexstr}>'
1158+
11471159
width = 2 if fonttype == 3 else 4
11481160
unicode_bfrange = []
11491161
for start, end in unicode_groups:
1150-
real_start = self._character_tracker.subset_to_unicode(subset_index,
1151-
start)
1152-
real_end = self._character_tracker.subset_to_unicode(subset_index, end)
1153-
real_values = ' '.join('<%s>' % chr(x).encode('utf-16be').hex()
1154-
for x in range(real_start, real_end+1))
1162+
real_values = ' '.join(_to_unicode(x) for x in range(start, end+1))
11551163
unicode_bfrange.append(
11561164
f'<{start:0{width}x}> <{end:0{width}x}> [{real_values}]')
11571165
unicode_cmap = (self._identityToUnicodeCMap %
@@ -2330,7 +2338,7 @@ def output_singlebyte_chunk(kerns_or_chars):
23302338
kern_mode=Kerning.UNFITTED,
23312339
language=language):
23322340
subset, charcode = self.file._character_tracker.track_glyph(
2333-
item.ft_object, ord(item.char), item.glyph_index)
2341+
item.ft_object, item.char, item.glyph_index)
23342342
if (item.ft_object, subset) != prev_font:
23352343
if singlebyte_chunk:
23362344
output_singlebyte_chunk(singlebyte_chunk)

lib/matplotlib/backends/backend_ps.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,24 +1070,24 @@ def print_figure_impl(fh):
10701070
Ndict = len(_psDefs)
10711071
print("%%BeginProlog", file=fh)
10721072
if not mpl.rcParams['ps.useafm']:
1073-
Ndict += len(ps_renderer._character_tracker.used)
1073+
Ndict += sum(map(len, ps_renderer._character_tracker.used.values()))
10741074
print("/mpldict %d dict def" % Ndict, file=fh)
10751075
print("mpldict begin", file=fh)
10761076
print("\n".join(_psDefs), file=fh)
10771077
if not mpl.rcParams['ps.useafm']:
1078-
for (font, subset_index), charmap in \
1079-
ps_renderer._character_tracker.used.items():
1080-
if not charmap:
1081-
continue
1082-
fonttype = mpl.rcParams['ps.fonttype']
1083-
# Can't use more than 255 chars from a single Type 3 font.
1084-
if len(charmap) > 255:
1085-
fonttype = 42
1086-
fh.flush()
1087-
if fonttype == 3:
1088-
fh.write(_font_to_ps_type3(font, charmap.values()))
1089-
else: # Type 42 only.
1090-
_font_to_ps_type42(font, charmap.values(), fh)
1078+
for font, subsets in ps_renderer._character_tracker.used.items():
1079+
for charmap in subsets:
1080+
if not charmap:
1081+
continue
1082+
fonttype = mpl.rcParams['ps.fonttype']
1083+
# Can't use more than 255 chars from a single Type 3 font.
1084+
if len(charmap) > 255:
1085+
fonttype = 42
1086+
fh.flush()
1087+
if fonttype == 3:
1088+
fh.write(_font_to_ps_type3(font, charmap.values()))
1089+
else: # Type 42 only.
1090+
_font_to_ps_type42(font, charmap.values(), fh)
10911091
print("end", file=fh)
10921092
print("%%EndProlog", file=fh)
10931093

0 commit comments

Comments
 (0)