Skip to content

Commit dbd689f

Browse files
committed
pdf/ps: Track full character map in CharacterTracker
By tracking both character codes and glyph indices, we can handle producing multiple font subsets if needed by a file format.
1 parent 8b22e7a commit dbd689f

File tree

3 files changed

+118
-41
lines changed

3 files changed

+118
-41
lines changed

lib/matplotlib/backends/_backend_pdf_ps.py

Lines changed: 93 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919

2020
if typing.TYPE_CHECKING:
21-
from .ft2font import FT2Font, GlyphIndexType
21+
from .ft2font import CharacterCodeType, FT2Font, GlyphIndexType
2222
from fontTools.ttLib import TTFont
2323

2424

@@ -107,23 +107,102 @@ class CharacterTracker:
107107
"""
108108
Helper for font subsetting by the PDF and PS backends.
109109
110-
Maintains a mapping of font paths to the set of glyphs that are being used from that
111-
font.
112-
"""
110+
Maintains a mapping of font paths to the set of characters and glyphs that are being
111+
used from that font.
113112
114-
def __init__(self) -> None:
115-
self.used: dict[str, set[GlyphIndexType]] = {}
113+
Attributes
114+
----------
115+
subset_size : int
116+
The size at which characters are grouped into subsets.
117+
used : dict[tuple[str, int], dict[CharacterCodeType, GlyphIndexType]]
118+
A dictionary of font files to character maps. The key is a font filename and
119+
subset within that font. The value is a dictionary mapping a character code to a
120+
glyph index. If *subset_size* is not set, then there will only be one subset per
121+
font filename.
122+
"""
116123

117-
def track(self, font: FT2Font, s: str) -> None:
118-
"""Record that string *s* is being typeset using font *font*."""
124+
def __init__(self, subset_size: int = 0):
125+
"""
126+
Parameters
127+
----------
128+
subset_size : int, optional
129+
The maximum size that is supported for an embedded font. If provided, then
130+
characters will be grouped into these sized subsets.
131+
"""
132+
self.used: dict[tuple[str, int], dict[CharacterCodeType, GlyphIndexType]] = {}
133+
self.subset_size = subset_size
134+
135+
def track(self, font: FT2Font, s: str) -> list[tuple[int, CharacterCodeType]]:
136+
"""
137+
Record that string *s* is being typeset using font *font*.
138+
139+
Parameters
140+
----------
141+
font : FT2Font
142+
A font that is being used for the provided string.
143+
s : str
144+
The string that should be marked as tracked by the provided font.
145+
146+
Returns
147+
-------
148+
list[tuple[int, CharacterCodeType]]
149+
A list of subset and character code pairs corresponding to the input string.
150+
If a *subset_size* is specified on this instance, then the character code
151+
will correspond with the given subset (and not necessarily the string as a
152+
whole). If *subset_size* is not specified, then the subset will always be 0
153+
and the character codes will be returned from the string unchanged.
154+
"""
155+
font_glyphs = []
119156
char_to_font = font._get_fontmap(s)
120157
for _c, _f in char_to_font.items():
121-
glyph_index = _f.get_char_index(ord(_c))
122-
self.used.setdefault(_f.fname, set()).add(glyph_index)
123-
124-
def track_glyph(self, font: FT2Font, glyph: GlyphIndexType) -> None:
125-
"""Record that glyph index *glyph* is being typeset using font *font*."""
126-
self.used.setdefault(font.fname, set()).add(glyph)
158+
charcode = ord(_c)
159+
glyph_index = _f.get_char_index(charcode)
160+
if self.subset_size != 0:
161+
subset = charcode // self.subset_size
162+
subset_charcode = charcode % self.subset_size
163+
else:
164+
subset = 0
165+
subset_charcode = charcode
166+
self.used.setdefault((_f.fname, subset), {})[subset_charcode] = glyph_index
167+
font_glyphs.append((subset, subset_charcode))
168+
return font_glyphs
169+
170+
def track_glyph(
171+
self, font: FT2Font, glyph: GlyphIndexType,
172+
charcode: CharacterCodeType | None = None) -> tuple[int, CharacterCodeType]:
173+
"""
174+
Record character code *charcode* at glyph index *glyph* as using font *font*.
175+
176+
Parameters
177+
----------
178+
font : FT2Font
179+
A font that is being used for the provided string.
180+
glyph : GlyphIndexType
181+
The corresponding glyph index to record.
182+
charcode : CharacterCodeType, optional
183+
The character code to record. If not given, assume it's the same as the
184+
glyph index.
185+
186+
Returns
187+
-------
188+
subset : int
189+
The subset in which the returned character code resides. If *subset_size*
190+
was not specified on this instance, then this is always 0.
191+
subset_charcode : CharacterCodeType
192+
The character code within the above subset. If *subset_size* was not
193+
specified on this instance, then this is just *charcode* unmodified.
194+
"""
195+
if charcode is None:
196+
# Assume we don't care, so use a correspondingly unique value.
197+
charcode = typing.cast('CharacterCodeType', glyph)
198+
if self.subset_size != 0:
199+
subset = charcode // self.subset_size
200+
subset_charcode = charcode % self.subset_size
201+
else:
202+
subset = 0
203+
subset_charcode = charcode
204+
self.used.setdefault((font.fname, subset), {})[subset_charcode] = glyph
205+
return (subset, subset_charcode)
127206

128207

129208
class RendererPDFPSBase(RendererBase):

lib/matplotlib/backends/backend_pdf.py

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -960,9 +960,9 @@ def writeFonts(self):
960960
else:
961961
# a normal TrueType font
962962
_log.debug('Writing TrueType font.')
963-
glyphs = self._character_tracker.used.get(filename)
964-
if glyphs:
965-
fonts[Fx] = self.embedTTF(filename, glyphs)
963+
charmap = self._character_tracker.used.get((filename, 0))
964+
if charmap:
965+
fonts[Fx] = self.embedTTF(filename, charmap)
966966
self.writeObject(self.fontObject, fonts)
967967

968968
def _write_afm_font(self, filename):
@@ -1004,8 +1004,8 @@ def _embedTeXFont(self, dvifont):
10041004

10051005
# Reduce the font to only the glyphs used in the document, get the encoding
10061006
# for that subset, and compute various properties based on the encoding.
1007-
chars = frozenset(self._character_tracker.used[dvifont.fname])
1008-
t1font = t1font.subset(chars, self._get_subset_prefix(chars))
1007+
glyphs = frozenset(self._character_tracker.used[(dvifont.fname, 0)].values())
1008+
t1font = t1font.subset(glyphs, self._get_subset_prefix(glyphs))
10091009
fontdict['BaseFont'] = Name(t1font.prop['FontName'])
10101010
# createType1Descriptor writes the font data as a side effect
10111011
fontdict['FontDescriptor'] = self.createType1Descriptor(t1font)
@@ -1136,7 +1136,7 @@ def _get_xobject_glyph_name(self, filename, glyph_name):
11361136
end
11371137
end"""
11381138

1139-
def embedTTF(self, filename, glyphs):
1139+
def embedTTF(self, filename, charmap):
11401140
"""Embed the TTF font from the named file into the document."""
11411141
font = get_font(filename)
11421142
fonttype = mpl.rcParams['pdf.fonttype']
@@ -1152,7 +1152,7 @@ def cvt(length, upe=font.units_per_EM, nearest=True):
11521152
else:
11531153
return math.ceil(value)
11541154

1155-
def embedTTFType3(font, glyphs, descriptor):
1155+
def embedTTFType3(font, charmap, descriptor):
11561156
"""The Type 3-specific part of embedding a Truetype font"""
11571157
widthsObject = self.reserveObject('font widths')
11581158
fontdescObject = self.reserveObject('font descriptor')
@@ -1199,10 +1199,8 @@ def get_char_width(charcode):
11991199
# that we need from this font.
12001200
differences = []
12011201
multi_byte_chars = set()
1202-
charmap = {gind: ccode for ccode, gind in font.get_charmap().items()}
1203-
for gind in glyphs:
1202+
for ccode, gind in charmap.items():
12041203
glyph_name = font.get_glyph_name(gind)
1205-
ccode = charmap.get(gind)
12061204
if ccode is not None and ccode <= 255:
12071205
differences.append((ccode, glyph_name))
12081206
else:
@@ -1217,7 +1215,7 @@ def get_char_width(charcode):
12171215
last_c = c
12181216

12191217
# Make the charprocs array.
1220-
rawcharprocs = _get_pdf_charprocs(filename, glyphs)
1218+
rawcharprocs = _get_pdf_charprocs(filename, charmap.values())
12211219
charprocs = {}
12221220
for charname in sorted(rawcharprocs):
12231221
stream = rawcharprocs[charname]
@@ -1254,7 +1252,7 @@ def get_char_width(charcode):
12541252

12551253
return fontdictObject
12561254

1257-
def embedTTFType42(font, glyphs, descriptor):
1255+
def embedTTFType42(font, charmap, descriptor):
12581256
"""The Type 42-specific part of embedding a Truetype font"""
12591257
fontdescObject = self.reserveObject('font descriptor')
12601258
cidFontDictObject = self.reserveObject('CID font dictionary')
@@ -1264,8 +1262,9 @@ def embedTTFType42(font, glyphs, descriptor):
12641262
wObject = self.reserveObject('Type 0 widths')
12651263
toUnicodeMapObject = self.reserveObject('ToUnicode map')
12661264

1267-
_log.debug("SUBSET %s characters: %s", filename, glyphs)
1268-
with _backend_pdf_ps.get_glyphs_subset(filename, glyphs) as subset:
1265+
_log.debug("SUBSET %s characters: %s", filename, charmap)
1266+
with _backend_pdf_ps.get_glyphs_subset(filename,
1267+
charmap.values()) as subset:
12691268
fontdata = _backend_pdf_ps.font_as_file(subset)
12701269
_log.debug(
12711270
"SUBSET %s %d -> %d", filename,
@@ -1313,11 +1312,9 @@ def embedTTFType42(font, glyphs, descriptor):
13131312
cid_to_gid_map = ['\0'] * 65536
13141313
widths = []
13151314
max_ccode = 0
1316-
charmap = {gind: ccode for ccode, gind in font.get_charmap().items()}
1317-
for gind in glyphs:
1315+
for ccode, gind in charmap.items():
13181316
glyph = font.load_glyph(gind,
13191317
flags=LoadFlags.NO_SCALE | LoadFlags.NO_HINTING)
1320-
ccode = charmap[gind]
13211318
widths.append((ccode, cvt(glyph.horiAdvance)))
13221319
if ccode < 65536:
13231320
cid_to_gid_map[ccode] = chr(gind)
@@ -1356,8 +1353,8 @@ def embedTTFType42(font, glyphs, descriptor):
13561353

13571354
# Add XObjects for unsupported chars
13581355
glyph_indices = [
1359-
glyph_index for glyph_index in glyphs
1360-
if not _font_supports_glyph(fonttype, charmap[glyph_index])
1356+
glyph_index for ccode, glyph_index in charmap.items()
1357+
if not _font_supports_glyph(fonttype, ccode)
13611358
]
13621359

13631360
bbox = [cvt(x, nearest=False) for x in full_font.bbox]
@@ -1443,9 +1440,9 @@ def embedTTFType42(font, glyphs, descriptor):
14431440
}
14441441

14451442
if fonttype == 3:
1446-
return embedTTFType3(font, glyphs, descriptor)
1443+
return embedTTFType3(font, charmap, descriptor)
14471444
elif fonttype == 42:
1448-
return embedTTFType42(font, glyphs, descriptor)
1445+
return embedTTFType42(font, charmap, descriptor)
14491446

14501447
def alphaState(self, alpha):
14511448
"""Return name of an ExtGState that sets alpha to the given value."""
@@ -2210,7 +2207,7 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
22102207

22112208
self.file.output(Op.begin_text)
22122209
for font, fontsize, ccode, glyph_index, ox, oy in glyphs:
2213-
self.file._character_tracker.track_glyph(font, glyph_index)
2210+
self.file._character_tracker.track_glyph(font, glyph_index, ccode)
22142211
fontname = font.fname
22152212
if not _font_supports_glyph(fonttype, ccode):
22162213
# Unsupported chars (i.e. multibyte in Type 3 or beyond BMP in

lib/matplotlib/backends/backend_ps.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,18 +1069,19 @@ def print_figure_impl(fh):
10691069
print("mpldict begin", file=fh)
10701070
print("\n".join(_psDefs), file=fh)
10711071
if not mpl.rcParams['ps.useafm']:
1072-
for font_path, glyphs in ps_renderer._character_tracker.used.items():
1073-
if not glyphs:
1072+
for (font, subset_index), charmap in \
1073+
ps_renderer._character_tracker.used.items():
1074+
if not charmap:
10741075
continue
10751076
fonttype = mpl.rcParams['ps.fonttype']
10761077
# Can't use more than 255 chars from a single Type 3 font.
1077-
if len(glyphs) > 255:
1078+
if len(charmap) > 255:
10781079
fonttype = 42
10791080
fh.flush()
10801081
if fonttype == 3:
1081-
fh.write(_font_to_ps_type3(font_path, glyphs))
1082+
fh.write(_font_to_ps_type3(font, charmap.values()))
10821083
else: # Type 42 only.
1083-
_font_to_ps_type42(font_path, glyphs, fh)
1084+
_font_to_ps_type42(font, charmap.values(), fh)
10841085
print("end", file=fh)
10851086
print("%%EndProlog", file=fh)
10861087

0 commit comments

Comments
 (0)