Skip to content

Commit 9b54e4b

Browse files
committed
PDFBOX-6103: add DFLT script support in GSUB system for OpenType fonts, by Fabrice Calafat
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1929959 13f79535-47bb-0310-9956-ffa450edef68
1 parent ee1a431 commit 9b54e4b

File tree

6 files changed

+266
-2
lines changed

6 files changed

+266
-2
lines changed

fontbox/src/main/appended-resources/META-INF/LICENSE

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Apache FontBox is based on contributions made to the original FontBox project:
3131
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3232
SUCH DAMAGE.
3333

34-
Lohit-Bengali, Lohit-Devanagari, Lohit-Gujarati fonts (https://pagure.io/lohit):
34+
Lohit fonts (https://pagure.io/lohit):
3535

3636
Copyright 2011-15 Lohit Fonts Project contributors
3737

@@ -130,4 +130,11 @@ Lohit-Bengali, Lohit-Devanagari, Lohit-Gujarati fonts (https://pagure.io/lohit):
130130
FoglihtenNo07 font Copyright 2011-2024 Grzegorz Luk
131131
https://www.glukfonts.pl/font.php?l=de&font=FoglihtenNo07
132132

133-
SIL Open Font License, see above
133+
SIL Open Font License, see above
134+
135+
Josefin Sans fonts (https://fonts.google.com/specimen/Josefin+Sans)
136+
137+
Copyright 2010 The Josefin Sans Project Authors (https://github.com/ThomasJockin/JosefinSansFont-master),
138+
with Reserved Font Name "Josefin Sans".
139+
140+
SIL Open Font License, see above

fontbox/src/main/java/org/apache/fontbox/ttf/gsub/GsubWorkerFactory.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ public GsubWorker getGsubWorker(CmapLookup cmapLookup, GsubData gsubData)
3838
{
3939
//TODO this needs to be redesigned / improved because if a font supports several languages,
4040
// it will choose one of them and maybe not the one expected.
41+
// See also PDFBOX-5700 and PDFBOX-5729
42+
// For example, NotoSans-Regular hits Devanagari first
43+
// See also GlyphSubstitutionDataExtractor.getSupportedLanguage() which decides the language?!
4144
LOG.debug("Language: {}", gsubData.getLanguage());
4245
switch (gsubData.getLanguage())
4346
{
@@ -49,6 +52,8 @@ public GsubWorker getGsubWorker(CmapLookup cmapLookup, GsubData gsubData)
4952
return new GsubWorkerForGujarati(cmapLookup, gsubData);
5053
case LATIN:
5154
return new GsubWorkerForLatin(gsubData);
55+
case DFLT:
56+
return new GsubWorkerForDflt(gsubData);
5257
default:
5358
return new DefaultGsubWorker();
5459
}
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.fontbox.ttf.gsub;
19+
20+
import java.util.ArrayList;
21+
import java.util.Arrays;
22+
import java.util.Collections;
23+
import java.util.List;
24+
25+
import org.apache.fontbox.ttf.model.GsubData;
26+
import org.apache.fontbox.ttf.model.ScriptFeature;
27+
import org.apache.logging.log4j.LogManager;
28+
import org.apache.logging.log4j.Logger;
29+
30+
/**
31+
* DFLT (Default) script-specific implementation of GSUB system.
32+
*
33+
* <p>According to the OpenType specification, a Script table with the script tag 'DFLT' (default)
34+
* is used in fonts to define features that are not script-specific. Applications should use the
35+
* DFLT script table when no script table exists for the specific script of the text being
36+
* processed, or when text lacks a defined script (containing only symbols or punctuation).</p>
37+
*
38+
* <p>This implementation applies common, script-neutral typographic features that work across
39+
* writing systems. The feature order follows standard OpenType recommendations for universal
40+
* glyph substitutions.</p>
41+
*
42+
* <p>Reference:
43+
* <a href="https://learn.microsoft.com/en-us/typography/opentype/spec/chapter2#scriptlist-table">
44+
* OpenType ScriptList Table Specification</a></p>
45+
*/
46+
public class GsubWorkerForDflt implements GsubWorker
47+
{
48+
private static final Logger LOG = LogManager.getLogger(GsubWorkerForDflt.class);
49+
50+
/**
51+
* Script-neutral features in recommended processing order.
52+
*
53+
* <ul>
54+
* <li>ccmp - Glyph Composition/Decomposition (must be first)</li>
55+
* <li>liga - Standard Ligatures</li>
56+
* <li>clig - Contextual Ligatures</li>
57+
* <li>calt - Contextual Alternates</li>
58+
* </ul>
59+
*
60+
* Note: This feature list focuses on common GSUB (substitution) features.
61+
* GPOS features like 'kern', 'mark', 'mkmk' are handled separately.
62+
*/
63+
private static final List<String> FEATURES_IN_ORDER = Arrays.asList("ccmp", "liga", "clig", "calt");
64+
65+
private final GsubData gsubData;
66+
67+
GsubWorkerForDflt(GsubData gsubData)
68+
{
69+
this.gsubData = gsubData;
70+
}
71+
72+
@Override
73+
public List<Integer> applyTransforms(List<Integer> originalGlyphIds)
74+
{
75+
List<Integer> intermediateGlyphsFromGsub = originalGlyphIds;
76+
77+
for (String feature : FEATURES_IN_ORDER)
78+
{
79+
if (!gsubData.isFeatureSupported(feature))
80+
{
81+
LOG.debug("the feature " + feature + " was not found");
82+
continue;
83+
}
84+
85+
LOG.debug("applying the feature " + feature);
86+
87+
ScriptFeature scriptFeature = gsubData.getFeature(feature);
88+
89+
intermediateGlyphsFromGsub = applyGsubFeature(scriptFeature,
90+
intermediateGlyphsFromGsub);
91+
}
92+
93+
return Collections.unmodifiableList(intermediateGlyphsFromGsub);
94+
}
95+
96+
private List<Integer> applyGsubFeature(ScriptFeature scriptFeature,
97+
List<Integer> originalGlyphs)
98+
{
99+
if (scriptFeature.getAllGlyphIdsForSubstitution().isEmpty())
100+
{
101+
LOG.debug("getAllGlyphIdsForSubstitution() for " + scriptFeature.getName() + " is empty");
102+
return originalGlyphs;
103+
}
104+
105+
GlyphArraySplitter glyphArraySplitter = new GlyphArraySplitterRegexImpl(
106+
scriptFeature.getAllGlyphIdsForSubstitution());
107+
108+
List<List<Integer>> tokens = glyphArraySplitter.split(originalGlyphs);
109+
List<Integer> gsubProcessedGlyphs = new ArrayList<>();
110+
111+
for (List<Integer> chunk : tokens)
112+
{
113+
if (scriptFeature.canReplaceGlyphs(chunk))
114+
{
115+
// gsub system kicks in, you get the glyphId directly
116+
List<Integer> replacementForGlyphs = scriptFeature.getReplacementForGlyphs(chunk);
117+
gsubProcessedGlyphs.addAll(replacementForGlyphs);
118+
}
119+
else
120+
{
121+
gsubProcessedGlyphs.addAll(chunk);
122+
}
123+
}
124+
125+
LOG.debug("originalGlyphs: " + originalGlyphs + ", gsubProcessedGlyphs: "
126+
+ gsubProcessedGlyphs);
127+
128+
return gsubProcessedGlyphs;
129+
}
130+
}

fontbox/src/main/java/org/apache/fontbox/ttf/model/Language.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ public enum Language
3636
DEVANAGARI(new String[] { "dev2", "deva" }),
3737
GUJARATI(new String[] { "gjr2", "gujr" }),
3838
LATIN(new String[] { "latn" }),
39+
DFLT(new String[] { "DFLT" }),
3940

4041
/**
4142
* An entry explicitly denoting the absence of any concrete language. May be useful when no actual glyph
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.fontbox.ttf.gsub;
19+
20+
import org.apache.fontbox.ttf.CmapLookup;
21+
import org.apache.fontbox.ttf.TTFParser;
22+
import org.apache.fontbox.ttf.TrueTypeFont;
23+
import org.apache.pdfbox.io.RandomAccessReadBufferedFile;
24+
import static org.junit.jupiter.api.Assertions.assertEquals;
25+
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
26+
import static org.junit.jupiter.api.Assertions.assertThrows;
27+
import static org.junit.jupiter.api.Assertions.assertTrue;
28+
import org.junit.jupiter.api.BeforeAll;
29+
import org.junit.jupiter.api.Test;
30+
import org.junit.jupiter.params.ParameterizedTest;
31+
import org.junit.jupiter.params.provider.Arguments;
32+
import org.junit.jupiter.params.provider.MethodSource;
33+
34+
import java.io.IOException;
35+
import java.util.ArrayList;
36+
import java.util.Arrays;
37+
import java.util.Collections;
38+
import java.util.List;
39+
import java.util.stream.Stream;
40+
41+
/**
42+
* Integration test for {@link GsubWorkerForDflt}. Tests DFLT (default) script GSUB worker.
43+
*
44+
* <p>The DFLT script is used for script-neutral typographic features that work across
45+
* writing systems, particularly when text lacks a specific script (symbols, punctuation)
46+
* or when no script-specific table exists.</p>
47+
*
48+
* <p>JosefinSans-Italic.ttf (SIL Open Font License) uses DFLT script and has standard ligatures
49+
* (fi, fl) which are used for testing GSUB transformations. Words without ligature sequences
50+
* (like "font" or "code") pass through unchanged, while words containing "fi" or "fl" are
51+
* transformed to use ligature glyphs.</p>
52+
*
53+
*/
54+
class GsubWorkerForDfltTest
55+
{
56+
private static final String JOSEFIN_SANS_TTF = "src/test/resources/ttf/JosefinSans-Italic.ttf";
57+
58+
private static CmapLookup cmapLookup;
59+
private static GsubWorker gsubWorkerForDflt;
60+
61+
@BeforeAll
62+
static void init() throws IOException
63+
{
64+
try (TrueTypeFont ttf = new TTFParser().parse(new RandomAccessReadBufferedFile(JOSEFIN_SANS_TTF)))
65+
{
66+
cmapLookup = ttf.getUnicodeCmapLookup();
67+
gsubWorkerForDflt = new GsubWorkerFactory().getGsubWorker(cmapLookup, ttf.getGsubData());
68+
}
69+
}
70+
71+
@Test
72+
void testCorrectWorkerType()
73+
{
74+
assertInstanceOf(GsubWorkerForDflt.class, gsubWorkerForDflt);
75+
}
76+
77+
static Stream<Arguments> provideTransformTestCases()
78+
{
79+
return Stream.of(
80+
// No ligature - text passes through unchanged
81+
Arguments.of("code", Arrays.asList(229, 293, 235, 237), "no ligature sequences"),
82+
// Simple ligature
83+
Arguments.of("fi", Collections.singletonList(407), "fi -> ligature"),
84+
// Ligature within word
85+
Arguments.of("office", Arrays.asList(293, 257, 407, 229, 237), "ffi -> f + fi-ligature"),
86+
// Multi-f sequence
87+
Arguments.of("ffl", Arrays.asList(257, 408), "ffl -> f + fl-ligature")
88+
);
89+
}
90+
91+
@ParameterizedTest(name = "{0}: {2}")
92+
@MethodSource("provideTransformTestCases")
93+
void testApplyTransforms(String input, List<Integer> expectedGlyphs, String description)
94+
{
95+
List<Integer> result = gsubWorkerForDflt.applyTransforms(getGlyphIds(input));
96+
assertEquals(expectedGlyphs, result);
97+
}
98+
99+
@Test
100+
void testApplyTransforms_immutableResult()
101+
{
102+
List<Integer> result = gsubWorkerForDflt.applyTransforms(getGlyphIds("abc"));
103+
104+
assertThrows(UnsupportedOperationException.class, () -> result.add(999));
105+
assertThrows(UnsupportedOperationException.class, () -> result.remove(0));
106+
}
107+
108+
private static List<Integer> getGlyphIds(String word)
109+
{
110+
List<Integer> originalGlyphIds = new ArrayList<>();
111+
112+
for (char unicodeChar : word.toCharArray())
113+
{
114+
int glyphId = cmapLookup.getGlyphId(unicodeChar);
115+
assertTrue(glyphId > 0);
116+
originalGlyphIds.add(glyphId);
117+
}
118+
119+
return originalGlyphIds;
120+
}
121+
}
62.2 KB
Binary file not shown.

0 commit comments

Comments
 (0)