Skip to content

Commit da59b91

Browse files
committed
git push origin masterMerge branch 'jkrumbiegel-jk/more-robust-font-matching'
2 parents 27b9d45 + 0e29c9d commit da59b91

File tree

1 file changed

+84
-59
lines changed

1 file changed

+84
-59
lines changed

src/findfonts.jl

Lines changed: 84 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -29,75 +29,80 @@ else
2929
end
3030
end
3131

32-
family_name(x::String) = replace(lowercase(x), ' ' => "") # normalize
33-
3432
function family_name(x::FTFont)
35-
family_name(x.family_name)
33+
lowercase(x.family_name)
3634
end
3735

3836
function style_name(x::FTFont)
3937
lowercase(x.style_name)
4038
end
4139

40+
const REGULAR_STYLES = ("regular", "normal", "medium", "standard", "roman", "book")
41+
42+
4243
"""
43-
Match a font using the user-specified search string, by increasing the score
44-
for each part that appears in the font family + style name, and decreasing it
45-
for each part that doesn't. The function also prefers shorter font names when
46-
encountering similar scores.
44+
Match a font using the user-specified search string. Each part of the search string
45+
is searched in the family name first which has to match once to include the font
46+
in the candidate list. For fonts with a family match the style
47+
name is matched next. For fonts with the same family and style name scores, regular
48+
fonts are preferred (any font that is "regular", "normal", "medium", "standard" or "roman")
49+
and as a last tie-breaker, shorter overall font names are preferred.
4750
4851
4952
Example:
5053
5154
If we had only four fonts:
52-
- Helvetica
53-
- Helvetica Neue
55+
- Helvetica Italic
56+
- Helvetica Regular
57+
- Helvetica Neue Regular
5458
- Helvetica Neue Light
55-
- Times New Roman
5659
5760
Then this is how this function would match different search strings:
58-
- "helvetica" => Helvetica
59-
- "helv" => Helvetica
60-
- "HeLvEtIcA" => Helvetica
61-
- "helvetica neue" => Helvetica Neue
62-
- "tica eue" => Helvetica Neue
61+
- "helvetica" => Helvetica Regular
62+
- "helv" => Helvetica Regular
63+
- "HeLvEtIcA" => Helvetica Regular
64+
- "helvetica italic" => Helvetica Italic
65+
- "helve ita" => Helvetica Italic
66+
- "helvetica neue" => Helvetica Neue Regular
67+
- "tica eue" => Helvetica Neue Regular
6368
- "helvetica light" => Helvetica Neue Light
6469
- "light" => Helvetica Neue Light
65-
- "helvetica bold" => Helvetica
66-
- "helvetica neue bold" => Helvetica Neue
67-
- "times" => Times New Roman
68-
- "times new roman" => Times New Roman
70+
- "helvetica bold" => Helvetica Regular
71+
- "helvetica neue bold" => Helvetica Neue Regular
72+
- "times" => no match
6973
- "arial" => no match
7074
"""
71-
function match_font(face::FTFont, searchparts)
75+
function match_font(face::FTFont, searchparts)::Tuple{Int, Int, Bool, Int}
76+
7277
fname = family_name(face)
7378
sname = style_name(face)
74-
# Regular should get selected / full match if we dont specificy any styling!
75-
full_name = if sname == "regular"
76-
"$fname"
77-
else
78-
"$fname $sname"
79+
is_regular_style = any(occursin(s, sname) for s in REGULAR_STYLES)
80+
81+
fontlength_penalty = -(length(fname) + length(sname))
82+
83+
84+
family_matches = any(occursin(part, fname) for part in searchparts)
85+
86+
# return early if family name doesn't have a match
87+
family_matches || return (0, 0, is_regular_style, fontlength_penalty)
88+
89+
family_score = sum(length(part) for part in searchparts if occursin(part, fname))
90+
91+
# now enhance the score with style information
92+
remaining_parts = filter(part -> !occursin(part, fname), searchparts)
93+
94+
if isempty(remaining_parts)
95+
return (family_score, 0, is_regular_style, fontlength_penalty)
7996
end
80-
full_name == "" && return 0
81-
# count letters of parts that occurred in the font name positively and those that didn't negatively.
82-
# we assume that the user knows at least parts of the name and doesn't misspell them
83-
# but they might not know the exact name, especially for long font names, or they
84-
# might simply not want to be forced to spell it out completely.
85-
# therefore we let each part we can find count towards a font, and each that
86-
# doesn't match against it, therefore rejecting fonts that mismatch more parts
87-
# than they match. this heuristic should be good enough to provide a hassle-free
88-
# font selection experience where most spellings that are expected to work, work.
89-
match_score = sum(map(part -> (2 * occursin(part, full_name) - 1) * length(part), searchparts))
90-
# give shorter font names that matched equally well a higher score after the decimal point.
91-
# this should usually pick the "standard" variant of a font as long as it
92-
# doesn't have a special identifier like "regular", "roman", "book", etc.
93-
# to be fair, with these fonts the old fontconfig method also often fails because
94-
# it's not clearly defined what the most normal version is for the user.
95-
# it's therefore better to just have them specify these parts of the name that
96-
# they think are important. this is especially important for attributes that
97-
# fall outside of the standard italic / bold distinction like "condensed",
98-
# "semibold", "oblique", etc.
99-
final_score = match_score + (1.0 / length(full_name))
100-
return final_score
97+
98+
# check if any parts match the style name, otherwise return early
99+
if !any(occursin(part, sname) for part in remaining_parts)
100+
return (family_score, 0, is_regular_style, fontlength_penalty)
101+
end
102+
103+
style_score = sum(length(part) for part in remaining_parts if occursin(part, sname))
104+
105+
(family_score, style_score, is_regular_style, fontlength_penalty)
101106
end
102107

103108
function try_load(fpath)
@@ -108,38 +113,58 @@ function try_load(fpath)
108113
end
109114
end
110115

116+
fontname(ft::FTFont) = "$(family_name(ft)) $(style_name(ft))"
117+
118+
111119
function findfont(
112120
searchstring::String;
113121
italic::Bool=false, # this is unused in the new implementation
114122
bold::Bool=false, # and this as well
115123
additional_fonts::String=""
116124
)
117125
font_folders = copy(fontpaths())
118-
# normalized_name = family_name(name)
126+
119127
isempty(additional_fonts) || pushfirst!(font_folders, additional_fonts)
128+
120129
# \W splits at all groups of non-word characters (like space, -, ., etc)
121130
searchparts = unique(split(lowercase(searchstring), r"\W+", keepempty=false))
122-
candidates = Pair{FTFont, Float64}[]
131+
132+
candidates = Pair{FTFont, Tuple{Int, Int}}[]
133+
134+
best_score_so_far = (0, 0, false, typemin(Int))
135+
best_font = nothing
136+
123137
for folder in font_folders
124138
for font in readdir(folder)
125139
fpath = joinpath(folder, font)
126140
face = try_load(fpath)
127141
face === nothing && continue
142+
128143
score = match_font(face, searchparts)
129-
# only take results with net positive character matches into account
130-
if floor(score) > 0
131-
push!(candidates, face => score)
144+
145+
# we can compare all four tuple elements of the score at once
146+
# in order of importance:
147+
148+
# 1. number of family match characters
149+
# 2. number of style match characters
150+
# 3. is font a "regular" style variant?
151+
# 4. the negative length of the font name, the shorter the better
152+
153+
family_match_score = score[1]
154+
if family_match_score > 0 && score > best_score_so_far
155+
# finalize previous best font to close the font file
156+
if !isnothing(best_font)
157+
finalize(best_font)
158+
end
159+
160+
# new candidate
161+
best_font = face
162+
best_score_so_far = score
132163
else
133-
# help gc a bit! Otherwise, this won't end well with the font keeping tons of open files
134164
finalize(face)
135165
end
136166
end
137167
end
138-
if !isempty(candidates)
139-
sort!(candidates; by=last)
140-
final_candidate = pop!(candidates)
141-
foreach(x-> finalize(x[1]), candidates)
142-
return final_candidate[1]
143-
end
144-
return nothing
168+
169+
best_font
145170
end

0 commit comments

Comments
 (0)