Skip to content

Commit 2af1b9a

Browse files
LodrKumquatyulian-gaponenko
authored andcommitted
Support word-break property
DEVSIX-4422
1 parent dc43a8e commit 2af1b9a

File tree

11 files changed

+398
-0
lines changed

11 files changed

+398
-0
lines changed

io/src/main/java/com/itextpdf/io/util/TextUtil.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,17 @@ public static boolean isWhitespaceOrNonPrintable(int code) {
290290
return Character.isWhitespace(code) || isNonPrintable(code);
291291
}
292292

293+
public static boolean isLetterOrDigit(Glyph glyph) {
294+
return Character.isLetterOrDigit(glyph.getUnicode());
295+
}
296+
297+
public static boolean isMark(Glyph glyph) {
298+
int unicode = glyph.getUnicode();
299+
return ((((1 << Character.NON_SPACING_MARK) |
300+
(1 << Character.COMBINING_SPACING_MARK) |
301+
(1 << Character.ENCLOSING_MARK)) >> Character.getType(unicode)) & 1) != 0;
302+
}
303+
293304
public static boolean charsetIsSupported(String charsetName) {
294305
try {
295306
return Charset.isSupported(charsetName);

io/src/test/java/com/itextpdf/io/util/TextUtilTest.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,37 @@ public void carriageReturnPrecededByTextFollowedByLineFeedTest() {
102102
new Glyph(0,0, 'a'), carriageReturn, lineFeed);
103103
}
104104

105+
@Test
106+
public void isLetterPositiveTest() {
107+
Glyph glyph = new Glyph(0, 0, 'a');
108+
Assert.assertTrue(TextUtil.isLetterOrDigit(glyph));
109+
}
110+
111+
@Test
112+
public void isDigitPositiveTest() {
113+
Glyph glyph = new Glyph(0, 0, '8');
114+
Assert.assertTrue(TextUtil.isLetterOrDigit(glyph));
115+
}
116+
117+
@Test
118+
public void isLetterOrDigitNegativeTest() {
119+
Glyph glyph = new Glyph(0, 0, '-');
120+
Assert.assertFalse(TextUtil.isLetterOrDigit(glyph));
121+
}
122+
123+
@Test
124+
public void isMarkPositiveTest() {
125+
// TAI THAM SIGN KHUEN TONE-3
126+
Glyph glyph = new Glyph(0, 0, 0x1A77);
127+
Assert.assertTrue(TextUtil.isMark(glyph));
128+
}
129+
130+
@Test
131+
public void isMarkNegativeTest() {
132+
Glyph glyph = new Glyph(0, 0, '-');
133+
Assert.assertFalse(TextUtil.isMark(glyph));
134+
}
135+
105136
private void helper(boolean expected, int currentCRPosition, Glyph...glyphs) {
106137
GlyphLine glyphLine = new GlyphLine(Arrays.asList(glyphs));
107138
Assert.assertTrue(expected == TextUtil.isCarriageReturnFollowedByLineFeed(glyphLine, currentCRPosition));

layout/src/main/java/com/itextpdf/layout/renderer/TextRenderer.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ This file is part of the iText (R) project.
8888
import com.itextpdf.layout.property.Underline;
8989
import com.itextpdf.layout.property.UnitValue;
9090
import com.itextpdf.layout.splitting.ISplitCharacters;
91+
import com.itextpdf.layout.splitting.BreakAllSplitCharacters;
9192
import com.itextpdf.layout.tagging.LayoutTaggingHelper;
9293

9394
import java.util.ArrayList;
@@ -1300,6 +1301,12 @@ boolean textContainsSpecialScriptGlyphs(boolean analyzeSpecialScriptsWordBreakPo
13001301
return false;
13011302
}
13021303

1304+
ISplitCharacters splitCharacters = this.<ISplitCharacters>getProperty(Property.SPLIT_CHARACTERS);
1305+
1306+
if (splitCharacters instanceof BreakAllSplitCharacters) {
1307+
specialScriptsWordBreakPoints = new ArrayList<>();
1308+
}
1309+
13031310
for (int i = text.start; i < text.end; i++) {
13041311
int unicode = text.get(i).getUnicode();
13051312
if (unicode > -1) {
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2020 iText Group NV
4+
Authors: iText Software.
5+
6+
This program is offered under a commercial and under the AGPL license.
7+
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
8+
9+
AGPL licensing:
10+
This program is free software: you can redistribute it and/or modify
11+
it under the terms of the GNU Affero General Public License as published by
12+
the Free Software Foundation, either version 3 of the License, or
13+
(at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU Affero General Public License for more details.
19+
20+
You should have received a copy of the GNU Affero General Public License
21+
along with this program. If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
package com.itextpdf.layout.splitting;
24+
25+
import com.itextpdf.io.font.otf.Glyph;
26+
import com.itextpdf.io.font.otf.GlyphLine;
27+
import com.itextpdf.io.util.TextUtil;
28+
29+
/**
30+
* The implementation of {@link ISplitCharacters} that allows breaking within words.
31+
*/
32+
public class BreakAllSplitCharacters implements ISplitCharacters {
33+
34+
@Override
35+
public boolean isSplitCharacter(GlyphLine text, int glyphPos) {
36+
if (text.size() - 1 == glyphPos) {
37+
return true;
38+
}
39+
40+
Glyph glyphToCheck = text.get(glyphPos);
41+
if (!glyphToCheck.hasValidUnicode()) {
42+
return true;
43+
}
44+
int charCode = glyphToCheck.getUnicode();
45+
46+
Glyph nextGlyph = text.get(glyphPos + 1);
47+
if (!nextGlyph.hasValidUnicode()) {
48+
return true;
49+
}
50+
51+
boolean nextGlyphIsLetterOrDigit = TextUtil.isLetterOrDigit(nextGlyph);
52+
boolean nextGlyphIsMark = TextUtil.isMark(nextGlyph);
53+
54+
boolean currentGlyphIsDefaultSplitCharacter = charCode <= ' ' || charCode == '-' || charCode == '\u2010'
55+
// block of whitespaces
56+
|| (charCode >= 0x2002 && charCode <= 0x200b);
57+
58+
return (currentGlyphIsDefaultSplitCharacter || nextGlyphIsLetterOrDigit || nextGlyphIsMark)
59+
&& !TextUtil.isNonBreakingHyphen(glyphToCheck);
60+
}
61+
}

layout/src/main/java/com/itextpdf/layout/splitting/DefaultSplitCharacters.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,12 @@ public boolean isSplitCharacter(GlyphLine text, int glyphPos) {
5757
}
5858
int charCode = text.get(glyphPos).getUnicode();
5959
//Check if a hyphen proceeds a digit to denote negative value
60+
// TODO: DEVSIX-4863 why is glyphPos == 0? negative value could be preceded by a whitespace!
6061
if ((glyphPos == 0) && (charCode == '-') && (text.size() - 1 > glyphPos) && (isADigitChar(text, glyphPos + 1))) {
6162
return false;
6263
}
6364
return (charCode <= ' ' || charCode == '-' || charCode == '\u2010'
65+
// block of whitespaces
6466
|| (charCode >= 0x2002 && charCode <= 0x200b)
6567
|| (charCode >= 0x2e80 && charCode < 0xd7a0)
6668
|| (charCode >= 0xf900 && charCode < 0xfb00)
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2020 iText Group NV
4+
Authors: iText Software.
5+
6+
This program is offered under a commercial and under the AGPL license.
7+
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
8+
9+
AGPL licensing:
10+
This program is free software: you can redistribute it and/or modify
11+
it under the terms of the GNU Affero General Public License as published by
12+
the Free Software Foundation, either version 3 of the License, or
13+
(at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU Affero General Public License for more details.
19+
20+
You should have received a copy of the GNU Affero General Public License
21+
along with this program. If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
package com.itextpdf.layout.splitting;
24+
25+
import com.itextpdf.io.font.otf.GlyphLine;
26+
27+
/**
28+
* The implementation of {@link ISplitCharacters} that prevents breaking within words.
29+
*/
30+
public class KeepAllSplitCharacters implements ISplitCharacters {
31+
32+
@Override
33+
public boolean isSplitCharacter(GlyphLine text, int glyphPos) {
34+
if (!text.get(glyphPos).hasValidUnicode()) {
35+
return false;
36+
}
37+
int charCode = text.get(glyphPos).getUnicode();
38+
//Check if a hyphen proceeds a digit to denote negative value
39+
// TODO: DEVSIX-4863 why is glyphPos == 0? negative value could be preceded by a whitespace!
40+
if ((glyphPos == 0) && (charCode == '-') && (text.size() - 1 > glyphPos) && (isADigitChar(text, glyphPos + 1))) {
41+
return false;
42+
}
43+
44+
return charCode <= ' ' || charCode == '-' || charCode == '\u2010'
45+
// block of whitespaces
46+
|| (charCode >= 0x2002 && charCode <= 0x200b);
47+
}
48+
49+
private static boolean isADigitChar(GlyphLine text, int glyphPos) {
50+
return Character.isDigit(text.get(glyphPos).getChars()[0]);
51+
}
52+
}
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
This file is part of the iText (R) project.
3+
Copyright (c) 1998-2020 iText Group NV
4+
Authors: iText Software.
5+
6+
This program is offered under a commercial and under the AGPL license.
7+
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
8+
9+
AGPL licensing:
10+
This program is free software: you can redistribute it and/or modify
11+
it under the terms of the GNU Affero General Public License as published by
12+
the Free Software Foundation, either version 3 of the License, or
13+
(at your option) any later version.
14+
15+
This program is distributed in the hope that it will be useful,
16+
but WITHOUT ANY WARRANTY; without even the implied warranty of
17+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18+
GNU Affero General Public License for more details.
19+
20+
You should have received a copy of the GNU Affero General Public License
21+
along with this program. If not, see <https://www.gnu.org/licenses/>.
22+
*/
23+
package com.itextpdf.layout.splitting;
24+
25+
import com.itextpdf.io.font.otf.Glyph;
26+
import com.itextpdf.io.font.otf.GlyphLine;
27+
import com.itextpdf.test.ExtendedITextTest;
28+
import com.itextpdf.test.annotations.type.UnitTest;
29+
30+
import java.util.ArrayList;
31+
import java.util.List;
32+
import org.junit.Assert;
33+
import org.junit.Test;
34+
import org.junit.experimental.categories.Category;
35+
36+
@Category(UnitTest.class)
37+
public class BreakAllSplitCharactersTest extends ExtendedITextTest {
38+
39+
private static final char charWithFalse = '\u201b';
40+
41+
@Test
42+
public void lastCharTest() {
43+
Assert.assertFalse(isSplitCharacter(new int[]{charWithFalse, charWithFalse, charWithFalse}, 1));
44+
Assert.assertTrue(isSplitCharacter(new int[]{charWithFalse, charWithFalse, charWithFalse}, 2));
45+
}
46+
47+
@Test
48+
public void currentIsNotUnicodeTest() {
49+
Assert.assertTrue(isSplitCharacter(new int[]{charWithFalse, -1, charWithFalse}, 1));
50+
}
51+
52+
@Test
53+
public void nextIsNotUnicodeTest() {
54+
Assert.assertTrue(isSplitCharacter(new int[]{charWithFalse, charWithFalse, -1}, 1));
55+
}
56+
57+
@Test
58+
public void beforeSpaceTest() {
59+
Assert.assertTrue(isSplitCharacter(new int[]{'a', 'a', ' '}, 0));
60+
Assert.assertFalse(isSplitCharacter(new int[]{'a', 'a', ' '}, 1));
61+
Assert.assertTrue(isSplitCharacter(new int[]{'a', ' ', ' '}, 1));
62+
Assert.assertTrue(isSplitCharacter(new int[]{'a', '-', ' '}, 1));
63+
Assert.assertTrue(isSplitCharacter(new int[]{'a', '\u2010', ' '}, 1));
64+
Assert.assertTrue(isSplitCharacter(new int[]{'a', '\u2004', ' '}, 1));
65+
}
66+
67+
@Test
68+
public void beforeSymbolTest() {
69+
Assert.assertFalse(isSplitCharacter(new int[]{charWithFalse, charWithFalse}, 0));
70+
Assert.assertTrue(isSplitCharacter(new int[]{charWithFalse, 'a'}, 0));
71+
// non spacing mark
72+
Assert.assertTrue(isSplitCharacter(new int[]{charWithFalse, '\u0303'}, 0));
73+
// combining mark
74+
Assert.assertTrue(isSplitCharacter(new int[]{charWithFalse, '\u093e'}, 0));
75+
// enclosing mark
76+
Assert.assertTrue(isSplitCharacter(new int[]{charWithFalse, '\u0488'}, 0));
77+
}
78+
79+
private static boolean isSplitCharacter(int[] unicodes, int glyphPosition) {
80+
return new BreakAllSplitCharacters().isSplitCharacter(createGlyphLine(unicodes), glyphPosition);
81+
}
82+
83+
private static GlyphLine createGlyphLine(int[] unicodes) {
84+
List<Glyph> glyphs = new ArrayList<>();
85+
for (int unicode : unicodes) {
86+
glyphs.add(new Glyph(1, unicode));
87+
}
88+
return new GlyphLine(glyphs);
89+
}
90+
}

0 commit comments

Comments
 (0)