@@ -828,24 +828,30 @@ class PsfontsMap:
828828 {'slant': 0.16700000000000001}
829829 >>> entry.filename
830830 """
831- __slots__ = ('_font ' , '_filename ' )
831+ __slots__ = ('_filename ' , '_unparsed' , '_parsed ' )
832832
833833 # Create a filename -> PsfontsMap cache, so that calling
834834 # `PsfontsMap(filename)` with the same filename a second time immediately
835835 # returns the same object.
836836 @lru_cache ()
837837 def __new__ (cls , filename ):
838838 self = object .__new__ (cls )
839- self ._font = {}
840839 self ._filename = os .fsdecode (filename )
840+ # Some TeX distributions have enormous pdftex.map files which would
841+ # take hundreds of milliseconds to parse, but it is easy enough to just
842+ # store the unparsed lines (keyed by the first word, which is the
843+ # texname) and parse them on-demand.
841844 with open (filename , 'rb' ) as file :
842- self ._parse (file )
845+ self ._unparsed = {line .split (b' ' , 1 )[0 ]: line for line in file }
846+ self ._parsed = {}
843847 return self
844848
845849 def __getitem__ (self , texname ):
846850 assert isinstance (texname , bytes )
851+ if texname in self ._unparsed :
852+ self ._parse_and_cache_line (self ._unparsed .pop (texname ))
847853 try :
848- result = self ._font [texname ]
854+ return self ._parsed [texname ]
849855 except KeyError :
850856 fmt = ('A PostScript file for the font whose TeX name is "{0}" '
851857 'could not be found in the file "{1}". The dviread module '
@@ -854,100 +860,83 @@ def __getitem__(self, texname):
854860 'This problem can often be solved by installing '
855861 'a suitable PostScript font package in your (TeX) '
856862 'package manager.' )
857- msg = fmt .format (texname .decode ('ascii' ), self ._filename )
858- msg = textwrap .fill (msg , break_on_hyphens = False ,
859- break_long_words = False )
860- _log .info (msg )
863+ _log .info (textwrap .fill (
864+ fmt .format (texname .decode ('ascii' ), self ._filename ),
865+ break_on_hyphens = False , break_long_words = False ))
861866 raise
862- fn , enc = result .filename , result .encoding
863- if fn is not None and not fn .startswith (b'/' ):
864- fn = find_tex_file (fn )
865- if enc is not None and not enc .startswith (b'/' ):
866- enc = find_tex_file (result .encoding )
867- return result ._replace (filename = fn , encoding = enc )
868-
869- def _parse (self , file ):
870- """
871- Parse the font mapping file.
872-
873- The format is, AFAIK: texname fontname [effects and filenames]
874- Effects are PostScript snippets like ".177 SlantFont",
875- filenames begin with one or two less-than signs. A filename
876- ending in enc is an encoding file, other filenames are font
877- files. This can be overridden with a left bracket: <[foobar
878- indicates an encoding file named foobar.
879867
880- There is some difference between <foo.pfb and <<bar.pfb in
881- subsetting, but I have no example of << in my TeX installation.
868+ def _parse_and_cache_line (self , line ):
869+ """
870+ Parse a line in the font mapping file.
871+
872+ The format is (partially) documented at
873+ http://mirrors.ctan.org/systems/doc/pdftex/manual/pdftex-a.pdf
874+ https://tug.org/texinfohtml/dvips.html#psfonts_002emap
875+ Each line can have the following fields:
876+
877+ - tfmname (first, only required field),
878+ - psname (defaults to tfmname, must come immediately after tfmname if
879+ present),
880+ - fontflags (integer, must come immediately after psname if present,
881+ ignored by us),
882+ - special (SlantFont and ExtendFont, only field that is double-quoted),
883+ - fontfile, encodingfile (optional, prefixed by <, <<, or <[; << always
884+ precedes a font, <[ always precedes an encoding, < can precede either
885+ but then an encoding file must have extension .enc; < and << also
886+ request different font subsetting behaviors but we ignore that; < can
887+ be separated from the filename by whitespace).
888+
889+ special, fontfile, and encodingfile can appear in any order.
882890 """
883891 # If the map file specifies multiple encodings for a font, we
884892 # follow pdfTeX in choosing the last one specified. Such
885893 # entries are probably mistakes but they have occurred.
886894 # http://tex.stackexchange.com/questions/10826/
887- # http://article.gmane.org/gmane.comp.tex.pdftex/4914
888-
889- empty_re = re .compile (br'%|\s*$' )
890- word_re = re .compile (
891- br'''(?x) (?:
892- "<\[ (?P<enc1> [^"]+ )" | # quoted encoding marked by [
893- "< (?P<enc2> [^"]+.enc)" | # quoted encoding, ends in .enc
894- "<<? (?P<file1> [^"]+ )" | # quoted font file name
895- " (?P<eff1> [^"]+ )" | # quoted effects or font name
896- <\[ (?P<enc3> \S+ ) | # encoding marked by [
897- < (?P<enc4> \S+ .enc) | # encoding, ends in .enc
898- <<? (?P<file2> \S+ ) | # font file name
899- (?P<eff2> \S+ ) # effects or font name
900- )''' )
901- effects_re = re .compile (
902- br'''(?x) (?P<slant> -?[0-9]*(?:\.[0-9]+)) \s* SlantFont
903- | (?P<extend>-?[0-9]*(?:\.[0-9]+)) \s* ExtendFont''' )
904-
905- lines = (line .strip ()
906- for line in file
907- if not empty_re .match (line ))
908- for line in lines :
909- effects , encoding , filename = b'' , None , None
910- words = word_re .finditer (line )
911-
912- # The named groups are mutually exclusive and are
913- # referenced below at an estimated order of probability of
914- # occurrence based on looking at my copy of pdftex.map.
915- # The font names are probably unquoted:
916- w = next (words )
917- texname = w .group ('eff2' ) or w .group ('eff1' )
918- w = next (words )
919- psname = w .group ('eff2' ) or w .group ('eff1' )
920-
921- for w in words :
922- # Any effects are almost always quoted:
923- eff = w .group ('eff1' ) or w .group ('eff2' )
924- if eff :
925- effects = eff
926- continue
927- # Encoding files usually have the .enc suffix
928- # and almost never need quoting:
929- enc = (w .group ('enc4' ) or w .group ('enc3' ) or
930- w .group ('enc2' ) or w .group ('enc1' ))
931- if enc :
932- if encoding is not None :
933- _log .debug ('Multiple encodings for %s = %s' ,
934- texname , psname )
935- encoding = enc
936- continue
937- # File names are probably unquoted:
938- filename = w .group ('file2' ) or w .group ('file1' )
939-
940- effects_dict = {}
941- for match in effects_re .finditer (effects ):
942- slant = match .group ('slant' )
943- if slant :
944- effects_dict ['slant' ] = float (slant )
945- else :
946- effects_dict ['extend' ] = float (match .group ('extend' ))
947895
948- self ._font [texname ] = PsFont (
949- texname = texname , psname = psname , effects = effects_dict ,
950- encoding = encoding , filename = filename )
896+ if not line or line .startswith ((b" " , b"%" , b"*" , b";" , b"#" )):
897+ return
898+ tfmname = basename = special = encodingfile = fontfile = None
899+ matches = re .finditer (br'"([^"]*)(?:"|$)|(\S+)' , line )
900+ for match in matches :
901+ quoted , unquoted = match .groups ()
902+ if unquoted :
903+ if unquoted .startswith (b"<<" ): # font
904+ fontfile = unquoted [2 :]
905+ elif unquoted .startswith (b"<[" ): # encoding
906+ encodingfile = unquoted [2 :]
907+ elif unquoted .startswith (b"<" ): # font or encoding
908+ word = (
909+ # <foo => foo
910+ unquoted [1 :]
911+ # < by itself => read the next word
912+ or next (filter (None , next (matches ).groups ())))
913+ if word .endswith (b".enc" ):
914+ encodingfile = word
915+ else :
916+ fontfile = word
917+ elif tfmname is None :
918+ tfmname = unquoted
919+ elif basename is None :
920+ basename = unquoted
921+ elif quoted :
922+ special = quoted
923+ if basename is None :
924+ basename = tfmname
925+ effects = {}
926+ if special :
927+ words = reversed (special .split ())
928+ for word in words :
929+ if word == b"SlantFont" :
930+ effects ["slant" ] = float (next (words ))
931+ elif word == b"ExtendFont" :
932+ effects ["extend" ] = float (next (words ))
933+ if encodingfile is not None and not encodingfile .startswith (b"/" ):
934+ encodingfile = find_tex_file (encodingfile )
935+ if fontfile is not None and not fontfile .startswith (b"/" ):
936+ fontfile = find_tex_file (fontfile )
937+ self ._parsed [tfmname ] = PsFont (
938+ texname = tfmname , psname = basename , effects = effects ,
939+ encoding = encodingfile , filename = fontfile )
951940
952941
953942# Note: this function should ultimately replace the Encoding class, which
0 commit comments