Skip to content

Commit 6b22967

Browse files
committed
[fontscan] tweak font resolution order to make it more principled
1 parent 221cb64 commit 6b22967

File tree

5 files changed

+246
-39
lines changed

5 files changed

+246
-39
lines changed

fontscan/fontmap.go

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -506,18 +506,17 @@ func (fm *FontMap) resolveForLang(candidates []int, lang LangID) *font.Face {
506506
// ResolveFace select a font based on the current query (set by [FontMap.SetQuery] and [FontMap.SetScript]),
507507
// and supporting the given rune, applying CSS font selection rules.
508508
//
509-
// Fonts are tried in 3 steps :
509+
// Fonts are tried with the following steps :
510510
//
511511
// 1 - Only fonts matching exacly one of the [Query.Families] are considered; the list
512512
// is prunned to keep the best match with [Query.Aspect]
513513
// 2 - Fallback fonts are considered, that is fonts with similar families and fonts
514-
// supporting the current script; the list is also prunned according to [Query.Aspect]
515-
// 3 - All fonts matching the current script (set by [FontMap.SetScript]) are tried,
514+
// supporting the current script; the list is also prunned according to [Query.Aspect]4
515+
// 3 - Fonts added manually by [AddFont] and [AddFace] will be searched,
516+
// in the order in which they were added.
517+
// 4 - All fonts matching the current script (set by [FontMap.SetScript]) are tried,
516518
// ignoring [Query.Aspect]
517519
//
518-
// The fonts added manually by [AddFont] and [AddFace]
519-
// will be searched in the order in which they were added.
520-
//
521520
// If no fonts match after these steps, an arbitrary face will be returned.
522521
// This face will be nil only if the underlying font database is empty,
523522
// or if the file system is broken; otherwise the returned [font.Face] is always valid.
@@ -546,8 +545,13 @@ func (fm *FontMap) ResolveFace(r rune) (face *font.Face) {
546545
return face
547546
}
548547

549-
// no need to check from user provided fonts, since the one supporting the given script
550-
// are already added in fallback fonts
548+
// try manually loaded faces even if the typeface doesn't match, looking for matching aspects
549+
// and rune coverage.
550+
// Note that, when [SetScript] has been called, this step is actually not needed,
551+
// since the fonts supporting the given script are already added in [withFallback] fonts
552+
if face := fm.resolveForRune(fm.candidates.manual, r); face != nil {
553+
return face
554+
}
551555

552556
fm.logger.Printf("No font matched for aspect %v, script %s, and rune %U (%c) -> searching by script coverage only", fm.query.Aspect, fm.script, r, r)
553557
scriptCandidates := fm.scriptMap[fm.script]

fontscan/fontmap_test.go

Lines changed: 87 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ import (
66
"io"
77
"log"
88
"os"
9+
"os/exec"
910
"path/filepath"
11+
"strings"
1012
"testing"
1113
"time"
1214

@@ -359,30 +361,109 @@ func TestFindSytemFont(t *testing.T) {
359361
tu.Assert(t, !ok) // user provided font are ignored
360362
}
361363

362-
func TestResolve_ScriptBengali(t *testing.T) {
364+
// the following tests use a "linux" font configuration
365+
func newSampleFontmap() *FontMap {
363366
fm := NewFontMap(log.New(io.Discard, "", 0))
364-
fm.appendFootprints(bengaliFontSet...)
367+
fm.appendFootprints(linuxSampleFontSet...)
368+
for _, fp := range linuxSampleFontSet {
369+
fm.cache(fp, &font.Face{Font: new(font.Font)}) // we need a new pointer for each file
370+
}
371+
return fm
372+
}
373+
374+
func TestDumpSystemFonts(t *testing.T) {
375+
t.Skip()
376+
fontset, err := SystemFonts(nil, os.TempDir())
377+
tu.AssertNoErr(t, err)
378+
379+
var trimmed fontSet
380+
for _, fp := range fontset {
381+
switch fp.Family {
382+
case "nimbussans", "lohitbengali", "lohitdevanagari", "lohitodia",
383+
"notoloopedthai", "notosanskhmer", "khmeros", "khmerossystem",
384+
"freeserif", "freesans", "freemono", "dejavu", "dejavusans":
385+
trimmed = append(trimmed, fp)
386+
}
387+
}
388+
code := fmt.Sprintf(`
389+
package fontscan
390+
import "github.com/go-text/typesetting/font"
391+
392+
// extracted from a linux system
393+
var linuxSampleFontSet =
394+
%#v`, trimmed)
395+
code = strings.ReplaceAll(code, "fontscan.", "")
396+
code = strings.ReplaceAll(code, "Footprint{", "\n{")
397+
code = strings.ReplaceAll(code, ", Index:0x0, Instance:0x0", "")
398+
code = strings.ReplaceAll(code, ", isUserProvided:false", "")
399+
code = strings.ReplaceAll(code, "Location:", "\nLocation:")
400+
code = strings.ReplaceAll(code, "Runes:", "\nRunes:")
401+
code = strings.ReplaceAll(code, "Langs:", "\nLangs:")
402+
code = strings.ReplaceAll(code, "Aspect:", "\nAspect:")
403+
404+
err = os.WriteFile("fontmap_sample_test.go", []byte(code), os.ModePerm)
405+
tu.AssertNoErr(t, err)
406+
407+
err = exec.Command("goimports", "-w", "fontmap_sample_test.go").Run()
408+
tu.AssertNoErr(t, err)
409+
}
410+
411+
func TestResolve_ScriptBengali(t *testing.T) {
412+
fm := newSampleFontmap()
365413

366414
// make sure the same font is selected for a given script, when possible
367415
text := []rune("হয় না।")
368416
fm.SetQuery(Query{Families: []string{"Nimbus Sans"}})
369417
runs := (&shaping.Segmenter{}).Split(shaping.Input{Text: text, RunEnd: len(text)}, fm)
370418
tu.Assert(t, len(runs) == 1)
371-
// only one font is loaded, so there is no Location.File collision
372419
family, _ := fm.FontMetadata(runs[0].Face.Font)
373420
tu.Assert(t, family == "lohitbengali")
374421
}
375422

376423
func TestResolve_ScriptThaana(t *testing.T) {
377-
fm := NewFontMap(log.New(io.Discard, "", 0))
378-
fm.appendFootprints(thaanaFontSet...)
424+
fm := newSampleFontmap()
379425

380426
// make sure the same font is selected for a given script, when possible
381427
text := []rune("އުފަންވަނީ، ދަރަޖަ")
382428
fm.SetQuery(Query{Families: []string{"Nimbus Sans"}})
383429
runs := (&shaping.Segmenter{}).Split(shaping.Input{Text: text, RunEnd: len(text)}, fm)
384430
tu.Assert(t, len(runs) == 1)
385-
// only one font is loaded, so there is no Location.File collision
386431
family, _ := fm.FontMetadata(runs[0].Face.Font)
387432
tu.Assert(t, family == "freeserif")
433+
tu.Assert(t, strings.HasSuffix(fm.FontLocation(runs[0].Face.Font).File, "FreeSerif.ttf"))
434+
}
435+
436+
func TestResolve_SciptGujarati(t *testing.T) {
437+
fm := newSampleFontmap()
438+
439+
text := []rune("ମୁଁ କାଚ ଖାଇପାରେ ଏବଂ ତାହା ମୋର କ୍ଷତି କରିନଥାଏ।")
440+
fm.SetQuery(Query{Families: []string{"Nimbus Sans"}})
441+
runs := (&shaping.Segmenter{}).Split(shaping.Input{Text: text, RunEnd: len(text)}, fm)
442+
tu.Assert(t, len(runs) == 1)
443+
family, _ := fm.FontMetadata(runs[0].Face.Font)
444+
tu.Assert(t, family == "lohitodia")
445+
}
446+
447+
func TestResolve_SciptArabic(t *testing.T) {
448+
fm := newSampleFontmap()
449+
450+
text := []rune("میں کانچ کھا سکتا ہوں اور مجھے تکلیف نہیں ہوتی ۔")
451+
fm.SetQuery(Query{Families: []string{"Nimbus Sans"}})
452+
runs := (&shaping.Segmenter{}).Split(shaping.Input{Text: text, RunEnd: len(text)}, fm)
453+
tu.Assert(t, len(runs) == 10)
454+
family0, _ := fm.FontMetadata(runs[0].Face.Font)
455+
family1, _ := fm.FontMetadata(runs[1].Face.Font)
456+
tu.Assert(t, family0 == "dejavusans")
457+
tu.Assert(t, family1 == "freeserif")
458+
}
459+
460+
func TestResolve_SciptKhmer(t *testing.T) {
461+
fm := newSampleFontmap()
462+
463+
text := []rune("ខ្ញុំអាចញុំកញ្ចក់បាន ដោយគ្មានបញ្ហារ")
464+
fm.SetQuery(Query{Families: []string{"Nimbus Sans"}})
465+
runs := (&shaping.Segmenter{}).Split(shaping.Input{Text: text, RunEnd: len(text)}, fm)
466+
tu.Assert(t, len(runs) == 1)
467+
family, _ := fm.FontMetadata(runs[0].Face.Font)
468+
tu.Assert(t, family == "khmeros")
388469
}

fontscan/footprint.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package fontscan
33
import (
44
"fmt"
55
"os"
6+
"path/filepath"
7+
"strings"
68

79
"github.com/go-text/typesetting/font"
810
ot "github.com/go-text/typesetting/font/opentype"
@@ -98,6 +100,22 @@ func newFootprintFromLoader(ld *ot.Loader, isUserProvided bool, buffer scanBuffe
98100
return out, buffer, nil
99101
}
100102

103+
// returns true for .ttf and .ttc font files
104+
func (fp *Footprint) isTruetypeHint() bool {
105+
switch strings.ToLower(filepath.Ext(fp.Location.File)) {
106+
case ".ttf", ".ttc":
107+
return true
108+
default:
109+
return false
110+
}
111+
}
112+
113+
// isMonoHint returns true if "mono" is included in the family name
114+
// this is not very precise but much more efficient than using [font.Font.IsMonospace]
115+
func (fp *Footprint) isMonoHint() bool {
116+
return strings.Contains(fp.Family, "mono")
117+
}
118+
101119
// loadFromDisk assume the footprint location refers to the file system
102120
func (fp *Footprint) loadFromDisk() (*font.Face, error) {
103121
location := fp.Location

fontscan/langset.go

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,3 +116,93 @@ func (ls *LangSet) deserializeFrom(data []byte) (int, error) {
116116
}
117117
return langSetSize, nil
118118
}
119+
120+
// This map returns a language tag that is reasonably
121+
// representative of the script. This will usually be the
122+
// most widely spoken or used language written in that script:
123+
// for instance, the sample language for `Cyrillic`
124+
// is 'ru' (Russian), the sample language for `Arabic` is 'ar'.
125+
//
126+
// For some scripts, no sample language will be returned because there
127+
// is no language that is sufficiently representative. The best
128+
// example of this is `Han`, where various different
129+
// variants of written Chinese, Japanese, and Korean all use
130+
// significantly different sets of Han characters and forms
131+
// of shared characters. No sample language can be provided
132+
// for many historical scripts as well.
133+
//
134+
// inspired by pango/pango-language.c
135+
var scriptToLang = map[language.Script]LangID{
136+
language.Arabic: langAr,
137+
language.Armenian: langHy,
138+
language.Bengali: langBn,
139+
// Used primarily in Taiwan, but not part of the standard
140+
// zh-tw orthography
141+
language.Bopomofo: 0,
142+
language.Cherokee: langChr,
143+
language.Coptic: langCop,
144+
language.Cyrillic: langRu,
145+
// Deseret was used to write English
146+
language.Deseret: 0,
147+
language.Devanagari: langHi,
148+
language.Ethiopic: langAm,
149+
language.Georgian: langKa,
150+
language.Gothic: 0,
151+
language.Greek: langEl,
152+
language.Gujarati: langGu,
153+
language.Gurmukhi: langPa,
154+
language.Han: 0,
155+
language.Hangul: langKo,
156+
language.Hebrew: langHe,
157+
language.Hiragana: langJa,
158+
language.Kannada: langKn,
159+
language.Katakana: langJa,
160+
language.Khmer: langKm,
161+
language.Lao: langLo,
162+
language.Latin: langEn,
163+
language.Malayalam: langMl,
164+
language.Mongolian: langMn,
165+
language.Myanmar: langMy,
166+
// Ogham was used to write old Irish
167+
language.Ogham: 0,
168+
language.Old_Italic: 0,
169+
language.Oriya: langOr,
170+
language.Runic: 0,
171+
language.Sinhala: langSi,
172+
language.Syriac: langSyr,
173+
language.Tamil: langTa,
174+
language.Telugu: langTe,
175+
language.Thaana: langDv,
176+
language.Thai: langTh,
177+
language.Tibetan: langBo,
178+
language.Canadian_Aboriginal: langIu,
179+
language.Yi: 0,
180+
language.Tagalog: langTl,
181+
// Phillipino languages/scripts
182+
language.Hanunoo: langHnn,
183+
language.Buhid: langBku,
184+
language.Tagbanwa: langTbw,
185+
186+
language.Braille: 0,
187+
language.Cypriot: 0,
188+
language.Limbu: 0,
189+
// Used for Somali (so) in the past
190+
language.Osmanya: 0,
191+
// The Shavian alphabet was designed for English
192+
language.Shavian: 0,
193+
language.Linear_B: 0,
194+
language.Tai_Le: 0,
195+
language.Ugaritic: langUga,
196+
197+
language.New_Tai_Lue: 0,
198+
language.Buginese: langBug,
199+
// The original script for Old Church Slavonic (chu), later
200+
// written with Cyrillic
201+
language.Glagolitic: 0,
202+
// Used for for Berber (ber), but Arabic script is more common
203+
language.Tifinagh: 0,
204+
language.Syloti_Nagri: langSyl,
205+
language.Old_Persian: langPeo,
206+
207+
language.Nko: langNqo,
208+
}

0 commit comments

Comments
 (0)