Skip to content

Commit 04d28e9

Browse files
committed
Parse PsfontMap entries on-demand.
See previous commit for description of pdftex.map. The vast majority of entries (dozens of thousands) in pdftex.map actually end up being unused, and their parsing is just wasted. This patch takes advantage of the fact that we can quickly recover the tex font name from pdftex.map entries (it's just the first word), so we can very quickly build a mapping of tex font names to unparsed pdftex.map entries, and then only parse the few entries that we'll need on-demand. This speeds up e.g. ``` python -c 'from pylab import *; rcParams["text.usetex"] = True; plot(); savefig("/tmp/test.pdf")' ``` by ~700ms (~20%) on the matplotlib macos.
1 parent 2d5883f commit 04d28e9

File tree

2 files changed

+63
-64
lines changed

2 files changed

+63
-64
lines changed

lib/matplotlib/dviread.py

Lines changed: 60 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -838,24 +838,30 @@ class PsfontsMap:
838838
{'slant': 0.16700000000000001}
839839
>>> entry.filename
840840
"""
841-
__slots__ = ('_font', '_filename')
841+
__slots__ = ('_filename', '_unparsed', '_parsed')
842842

843843
# Create a filename -> PsfontsMap cache, so that calling
844844
# `PsfontsMap(filename)` with the same filename a second time immediately
845845
# returns the same object.
846846
@lru_cache()
847847
def __new__(cls, filename):
848848
self = object.__new__(cls)
849-
self._font = {}
850849
self._filename = os.fsdecode(filename)
850+
# Some TeX distributions have enormous pdftex.map files which would
851+
# take hundreds of milliseconds to parse, but it is easy enough to just
852+
# store the unparsed lines (keyed by the first word, which is the
853+
# texname) and parse them on-demand.
851854
with open(filename, 'rb') as file:
852-
self._parse(file)
855+
self._unparsed = {line.split(b' ', 1)[0]: line for line in file}
856+
self._parsed = {}
853857
return self
854858

855859
def __getitem__(self, texname):
856860
assert isinstance(texname, bytes)
861+
if texname in self._unparsed:
862+
self._parse_and_cache_line(self._unparsed.pop(texname))
857863
try:
858-
result = self._font[texname]
864+
return self._parsed[texname]
859865
except KeyError:
860866
fmt = ('A PostScript file for the font whose TeX name is "{0}" '
861867
'could not be found in the file "{1}". The dviread module '
@@ -864,21 +870,14 @@ def __getitem__(self, texname):
864870
'This problem can often be solved by installing '
865871
'a suitable PostScript font package in your (TeX) '
866872
'package manager.')
867-
msg = fmt.format(texname.decode('ascii'), self._filename)
868-
msg = textwrap.fill(msg, break_on_hyphens=False,
869-
break_long_words=False)
870-
_log.info(msg)
873+
_log.info(textwrap.fill(
874+
fmt.format(texname.decode('ascii'), self._filename),
875+
break_on_hyphens=False, break_long_words=False))
871876
raise
872-
fn, enc = result.filename, result.encoding
873-
if fn is not None and not fn.startswith(b'/'):
874-
fn = find_tex_file(fn)
875-
if enc is not None and not enc.startswith(b'/'):
876-
enc = find_tex_file(result.encoding)
877-
return result._replace(filename=fn, encoding=enc)
878-
879-
def _parse(self, file):
877+
878+
def _parse_and_cache_line(self, line):
880879
"""
881-
Parse the font mapping file.
880+
Parse a line in the font mapping file.
882881
883882
The format is (partially) documented at
884883
http://mirrors.ctan.org/systems/doc/pdftex/manual/pdftex-a.pdf
@@ -904,50 +903,50 @@ def _parse(self, file):
904903
# entries are probably mistakes but they have occurred.
905904
# http://tex.stackexchange.com/questions/10826/
906905

907-
word_re = re.compile(br'"([^"]*)(?:"|$)|(\S+)')
908-
for line in file:
909-
if not line or line.startswith((b" ", b"%", b"*", b";", b"#")):
910-
continue
911-
tfmname = basename = special = encodingfile = fontfile = None
912-
matches = word_re.finditer(line)
913-
for match in matches:
914-
quoted, unquoted = match.groups()
915-
if unquoted:
916-
if unquoted.startswith(b"<<"): # font
917-
fontfile = unquoted[2:]
918-
elif unquoted.startswith(b"<["): # encoding
919-
encodingfile = unquoted[2:]
920-
elif unquoted.startswith(b"<"): # font or encoding
921-
if unquoted == b"<":
922-
word = next(filter(None, next(matches).groups()))
923-
if unquoted.endswith(b".enc"):
924-
encodingfile = word
925-
else:
926-
fontfile = word
927-
else:
928-
if unquoted.endswith(b".enc"):
929-
encodingfile = unquoted[1:]
930-
else:
931-
fontfile = unquoted[1:]
932-
elif tfmname is None:
933-
tfmname = unquoted
934-
elif basename is None:
935-
basename = unquoted
936-
elif quoted:
937-
special = quoted
938-
if basename is None:
939-
basename = tfmname
940-
effects = {}
941-
if special:
942-
words = reversed(special.split())
943-
for word in words:
944-
if word == b"SlantFont":
945-
effects["slant"] = float(next(words))
946-
elif word == b"ExtendFont":
947-
effects["extend"] = float(next(words))
948-
self._font[tfmname] = PsFont(
949-
texname=tfmname, psname=basename, effects=effects,
950-
encoding=encodingfile, filename=fontfile)
906+
if not line or line.startswith((b" ", b"%", b"*", b";", b"#")):
907+
return
908+
tfmname = basename = special = encodingfile = fontfile = None
909+
matches = re.finditer(br'"([^"]*)(?:"|$)|(\S+)', line)
910+
for match in matches:
911+
quoted, unquoted = match.groups()
912+
if unquoted:
913+
if unquoted.startswith(b"<<"): # font
914+
fontfile = unquoted[2:]
915+
elif unquoted.startswith(b"<["): # encoding
916+
encodingfile = unquoted[2:]
917+
elif unquoted.startswith(b"<"): # font or encoding
918+
word = (
919+
# <foo => foo
920+
unquoted[1:]
921+
# < by itself => read the next word
922+
or next(filter(None, next(matches).groups())))
923+
if word.endswith(b".enc"):
924+
encodingfile = word
925+
else:
926+
fontfile = word
927+
elif tfmname is None:
928+
tfmname = unquoted
929+
elif basename is None:
930+
basename = unquoted
931+
elif quoted:
932+
special = quoted
933+
if basename is None:
934+
basename = tfmname
935+
effects = {}
936+
if special:
937+
words = reversed(special.split())
938+
for word in words:
939+
if word == b"SlantFont":
940+
effects["slant"] = float(next(words))
941+
elif word == b"ExtendFont":
942+
effects["extend"] = float(next(words))
943+
if encodingfile is not None and not encodingfile.startswith(b"/"):
944+
encodingfile = find_tex_file(encodingfile)
945+
if fontfile is not None and not fontfile.startswith(b"/"):
946+
fontfile = find_tex_file(fontfile)
947+
self._parsed[tfmname] = PsFont(
948+
texname=tfmname, psname=basename, effects=effects,
949+
encoding=encodingfile, filename=fontfile)
951950

952951

953952
@_api.deprecated("3.3")
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
% used by test_dviread.py
22
TeXfont1 PSfont1 <font1.pfb <font1.enc
33
TeXfont2 PSfont2 <font2.enc <font2.pfa
4-
TeXfont3 PSfont3 "1.23 UnknownEffect" <[enc3.foo <font3.pfa
4+
TeXfont3 PSfont3 "1.23 UnknownEffect" <[enc3.foo < font3.pfa
55
TeXfont4 PSfont4 "-0.1 SlantFont 2.2 ExtendFont" <font4.enc <font4.pfa
66
TeXfont5 PSfont5 <encoding1.enc <encoding2.enc <font5.pfb
77
TeXfont6 PSfont6
8-
TeXfont7 PSfont7 <font7.enc
9-
TeXfont8 PSfont8 <font8.pfb
8+
TeXfont7 PSfont7 < font7.enc
9+
TeXfont8 PSfont8 <<font8.pfb
1010
TeXfont9 </absolute/font9.pfb

0 commit comments

Comments
 (0)