Skip to content

Commit fc5d1b2

Browse files
committed
Enable khmer and lao word wrapping
- handle multiple chars within a glyph on word wrapping - check each char rather than a whole glyph when detecting script for word wrapping DEVSIX-4106
1 parent 9f69f26 commit fc5d1b2

File tree

5 files changed

+222
-59
lines changed

5 files changed

+222
-59
lines changed

layout/src/main/java/com/itextpdf/layout/renderer/LineRenderer.java

Lines changed: 79 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ This file is part of the iText (R) project.
4444
package com.itextpdf.layout.renderer;
4545

4646
import com.itextpdf.io.LogMessageConstant;
47+
import com.itextpdf.io.font.otf.ActualTextIterator;
4748
import com.itextpdf.io.font.otf.Glyph;
4849
import com.itextpdf.io.font.otf.GlyphLine;
4950
import com.itextpdf.io.util.ArrayUtil;
@@ -448,7 +449,7 @@ && isTextRendererAndRequiresSpecialScriptPreLayoutProcessing(childRenderer)) {
448449
(childPos, specialScriptLayoutResults, wasParentsHeightClipped, floatsOverflowedToNextLine);
449450

450451
curWidth -= getCurWidthSpecialScriptsDecrement(childPos, lastFittingChildRendererData.childIndex,
451-
lastFittingChildRendererData.childLayoutResult, specialScriptLayoutResults);
452+
specialScriptLayoutResults);
452453

453454
childPos = lastFittingChildRendererData.childIndex;
454455
childResult = lastFittingChildRendererData.childLayoutResult;
@@ -1202,7 +1203,7 @@ private void updateChildrenParent() {
12021203
*
12031204
* @return total number of trimmed glyphs.
12041205
*/
1205-
private int trimFirst() {
1206+
int trimFirst() {
12061207
int totalNumberOfTrimmedGlyphs = 0;
12071208
for (IRenderer renderer : childRenderers) {
12081209
if (FloatingHelper.isRendererFloating(renderer)) {
@@ -1267,7 +1268,7 @@ static void updateSpecialScriptLayoutResults(Map<Integer, LayoutResult> specialS
12671268
}
12681269
}
12691270

1270-
static float getCurWidthSpecialScriptsDecrement(int childPos, int newChildPos, LayoutResult newLayoutResult,
1271+
static float getCurWidthSpecialScriptsDecrement(int childPos, int newChildPos,
12711272
Map<Integer, LayoutResult> specialScriptLayoutResults) {
12721273
float decrement = 0.0f;
12731274
// if childPos == newChildPos, curWidth doesn't include width of the current childRenderer yet, so no decrement is needed
@@ -1277,12 +1278,6 @@ static float getCurWidthSpecialScriptsDecrement(int childPos, int newChildPos, L
12771278
decrement += specialScriptLayoutResults.get(i).getOccupiedArea().getBBox().getWidth();
12781279
}
12791280
}
1280-
1281-
// when LayoutResult.NOTHING has artificially been created in getIndexOfRendererWithLastFullyFittingWord,
1282-
// it's occupiedArea isn't 0.0x0.0 as it should be, so we need to subtract it here twice, because it'll be added later
1283-
if (newLayoutResult.getStatus() == LayoutResult.NOTHING) {
1284-
decrement += specialScriptLayoutResults.get(newChildPos).getOccupiedArea().getBBox().getWidth();
1285-
}
12861281
}
12871282

12881283
return decrement;
@@ -1315,6 +1310,7 @@ void specialScriptPreLayoutProcessing(int childPos) {
13151310
String sequentialTextContent = info.sequentialTextContent;
13161311
List<Integer> indicesOfFloating = info.indicesOfFloating;
13171312
List<Integer> possibleBreakPointsGlobal = TypographyUtils.getPossibleBreaks(sequentialTextContent);
1313+
13181314
distributePossibleBreakPointsOverSequentialTextRenderers(childPos, numberOfSequentialTextRenderers,
13191315
possibleBreakPointsGlobal, indicesOfFloating);
13201316
}
@@ -1345,26 +1341,35 @@ SpecialScriptsContainingTextRendererSequenceInfo getSpecialScriptsContainingText
13451341
void distributePossibleBreakPointsOverSequentialTextRenderers(
13461342
int childPos, int numberOfSequentialTextRenderers, List<Integer> possibleBreakPointsGlobal,
13471343
List<Integer> indicesOfFloating) {
1348-
int alreadyProcessedNumberOfGlyphs = 0;
1344+
int alreadyProcessedNumberOfCharsWithinGlyphLines = 0;
13491345
int indexToBeginWith = 0;
13501346
for (int i = 0; i < numberOfSequentialTextRenderers; i++) {
13511347
if (!indicesOfFloating.contains(i)) {
13521348
TextRenderer childTextRenderer = (TextRenderer) childRenderers.get(childPos + i);
1353-
int length = childTextRenderer.length();
1349+
List<Integer> amountOfCharsBetweenTextStartAndActualTextChunk = new ArrayList<>();
1350+
List<Integer> glyphLineBasedIndicesOfActualTextChunkEnds = new ArrayList<>();
1351+
1352+
fillActualTextChunkRelatedLists(childTextRenderer.getText(),
1353+
amountOfCharsBetweenTextStartAndActualTextChunk, glyphLineBasedIndicesOfActualTextChunkEnds);
1354+
13541355
List<Integer> possibleBreakPoints = new ArrayList<Integer>();
13551356
for (int j = indexToBeginWith; j < possibleBreakPointsGlobal.size(); j++) {
1356-
int shiftedBreakPoint = possibleBreakPointsGlobal.get(j) - alreadyProcessedNumberOfGlyphs;
1357-
if (shiftedBreakPoint > length) {
1357+
int shiftedBreakPoint = possibleBreakPointsGlobal.get(j)
1358+
- alreadyProcessedNumberOfCharsWithinGlyphLines;
1359+
int amountOfCharsBetweenTextStartAndTextEnd = amountOfCharsBetweenTextStartAndActualTextChunk
1360+
.get(amountOfCharsBetweenTextStartAndActualTextChunk.size() - 1);
1361+
if (shiftedBreakPoint > amountOfCharsBetweenTextStartAndTextEnd) {
13581362
indexToBeginWith = j;
1359-
alreadyProcessedNumberOfGlyphs += length;
1363+
alreadyProcessedNumberOfCharsWithinGlyphLines += amountOfCharsBetweenTextStartAndTextEnd;
13601364
break;
13611365
}
1362-
possibleBreakPoints.add(shiftedBreakPoint + childTextRenderer.text.start);
1366+
possibleBreakPoints.add(shiftedBreakPoint);
13631367
}
1364-
if (possibleBreakPoints.isEmpty()) {
1365-
possibleBreakPoints.add(-1);
1366-
}
1367-
childTextRenderer.setSpecialScriptsWordBreakPoints(possibleBreakPoints);
1368+
1369+
List<Integer> glyphLineBasedPossibleBreakPoints = convertPossibleBreakPointsToGlyphLineBased(
1370+
possibleBreakPoints, amountOfCharsBetweenTextStartAndActualTextChunk,
1371+
glyphLineBasedIndicesOfActualTextChunkEnds);
1372+
childTextRenderer.setSpecialScriptsWordBreakPoints(glyphLineBasedPossibleBreakPoints);
13681373
}
13691374
}
13701375
}
@@ -1402,9 +1407,9 @@ void distributePossibleBreakPointsOverSequentialTextRenderers(
14021407
if (fittingLengthWithTrailingRightSideSpaces > 0) {
14031408
List<Integer> breakPoints = textRenderer.getSpecialScriptsWordBreakPoints();
14041409
if (breakPoints != null && breakPoints.size() > 0 && breakPoints.get(0) != -1) {
1405-
int possibleBreakPointPosition =
1406-
textRenderer.findPossibleBreaksSplitPosition(
1407-
fittingLengthWithTrailingRightSideSpaces + textRenderer.text.start, false);
1410+
int possibleBreakPointPosition = TextRenderer.findPossibleBreaksSplitPosition(
1411+
textRenderer.getSpecialScriptsWordBreakPoints(),
1412+
fittingLengthWithTrailingRightSideSpaces + textRenderer.text.start, false);
14081413
if (possibleBreakPointPosition > -1) {
14091414
splitPosition = breakPoints.get(possibleBreakPointPosition) - amountOfTrailingRightSideSpaces;
14101415
needToSplitRendererContainingLastFullyFittingWord = splitPosition != textRenderer.text.end;
@@ -1451,7 +1456,8 @@ && isChildFloating(childRenderers.get(analyzedTextRendererIndex - 1))) {
14511456
}
14521457
}
14531458

1454-
updateFloatsOverflowedToNextLine(floatsOverflowedToNextLine, indicesOfFloats, indexOfRendererContainingLastFullyFittingWord);
1459+
updateFloatsOverflowedToNextLine(floatsOverflowedToNextLine, indicesOfFloats,
1460+
indexOfRendererContainingLastFullyFittingWord);
14551461

14561462
if (returnLayoutResult == null) {
14571463
returnLayoutResult = childPosLayoutResult;
@@ -1468,9 +1474,7 @@ && isChildFloating(childRenderers.get(analyzedTextRendererIndex - 1))) {
14681474
childRenderer.setSpecialScriptFirstNotFittingIndex(-1);
14691475
}
14701476
} else {
1471-
LayoutArea occupiedArea = specialScriptLayoutResults.get(indexOfRendererContainingLastFullyFittingWord)
1472-
.getOccupiedArea();
1473-
returnLayoutResult = new TextLayoutResult(LayoutResult.NOTHING, occupiedArea, null, childRenderer);
1477+
returnLayoutResult = new TextLayoutResult(LayoutResult.NOTHING, null, null, childRenderer);
14741478
}
14751479
}
14761480

@@ -1560,6 +1564,55 @@ private boolean isInlineBlockChild(IRenderer child) {
15601564
return child instanceof BlockRenderer || child instanceof TableRenderer;
15611565
}
15621566

1567+
// ActualTextChunk is either an ActualText or a single independent glyph
1568+
private static void fillActualTextChunkRelatedLists(
1569+
GlyphLine glyphLine, List<Integer> amountOfCharsBetweenTextStartAndActualTextChunk,
1570+
List<Integer> glyphLineBasedIndicesOfActualTextChunkEnds) {
1571+
ActualTextIterator actualTextIterator = new ActualTextIterator(glyphLine);
1572+
1573+
int amountOfCharsBetweenTextStartAndCurrentActualTextStartOrGlyph = 0;
1574+
while (actualTextIterator.hasNext()) {
1575+
GlyphLine.GlyphLinePart part = actualTextIterator.next();
1576+
int amountOfCharsWithinCurrentActualTextOrGlyph = 0;
1577+
if (part.actualText != null) {
1578+
amountOfCharsWithinCurrentActualTextOrGlyph = part.actualText.length();
1579+
int nextAmountOfChars = amountOfCharsWithinCurrentActualTextOrGlyph
1580+
+ amountOfCharsBetweenTextStartAndCurrentActualTextStartOrGlyph;
1581+
amountOfCharsBetweenTextStartAndActualTextChunk.add(nextAmountOfChars);
1582+
glyphLineBasedIndicesOfActualTextChunkEnds.add(part.end);
1583+
amountOfCharsBetweenTextStartAndCurrentActualTextStartOrGlyph = nextAmountOfChars;
1584+
} else {
1585+
for (int j = part.start; j < part.end; j++) {
1586+
char[] chars = glyphLine.get(j).getChars();
1587+
amountOfCharsWithinCurrentActualTextOrGlyph = chars != null ? chars.length : 0;
1588+
int nextAmountOfChars = amountOfCharsWithinCurrentActualTextOrGlyph
1589+
+ amountOfCharsBetweenTextStartAndCurrentActualTextStartOrGlyph;
1590+
amountOfCharsBetweenTextStartAndActualTextChunk.add(nextAmountOfChars);
1591+
glyphLineBasedIndicesOfActualTextChunkEnds.add(j + 1);
1592+
amountOfCharsBetweenTextStartAndCurrentActualTextStartOrGlyph = nextAmountOfChars;
1593+
}
1594+
}
1595+
}
1596+
}
1597+
1598+
private static List<Integer> convertPossibleBreakPointsToGlyphLineBased(
1599+
List<Integer> possibleBreakPoints, List<Integer> amountOfChars, List<Integer> indices) {
1600+
if (possibleBreakPoints.isEmpty()) {
1601+
possibleBreakPoints.add(-1);
1602+
return possibleBreakPoints;
1603+
} else {
1604+
List<Integer> glyphLineBased = new ArrayList<>();
1605+
1606+
for (int j : possibleBreakPoints) {
1607+
int found = TextRenderer.findPossibleBreaksSplitPosition(amountOfChars, j, true);
1608+
if (found >= 0) {
1609+
glyphLineBased.add(indices.get(found));
1610+
}
1611+
}
1612+
return glyphLineBased;
1613+
}
1614+
}
1615+
15631616
static class RendererGlyph {
15641617
public Glyph glyph;
15651618
public TextRenderer renderer;

layout/src/main/java/com/itextpdf/layout/renderer/TextRenderer.java

Lines changed: 63 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,8 @@ public LayoutResult layout(LayoutContext layoutContext) {
390390
}
391391

392392
boolean endOfWordBelongingToSpecialScripts = textContainsSpecialScriptGlyphs(true)
393-
&& findPossibleBreaksSplitPosition(ind + 1, true) >= 0;
393+
&& findPossibleBreaksSplitPosition(specialScriptsWordBreakPoints,
394+
ind + 1, true) >= 0;
394395
if (ind + 1 == text.end || splitCharacters.isSplitCharacter(text, ind) ||
395396
splitCharacters.isSplitCharacter(text, ind + 1) &&
396397
TextUtil.isSpaceOrWhitespace(text.get(ind + 1)) || endOfWordBelongingToSpecialScripts) {
@@ -891,6 +892,23 @@ public void trimFirst() {
891892
text.start++;
892893
}
893894
}
895+
896+
/* Between two sentences separated by one or more whitespaces,
897+
icu allows to break right after the last whitespace.
898+
Therefore we need to carefully edit specialScriptsWordBreakPoints list after trimming:
899+
if a break is allowed to happen right before the first glyph of an already trimmed text,
900+
we need to remove this point from the list
901+
(or replace it with -1 thus marking that text contains special scripts,
902+
in case if the removed break point was the only possible break point).
903+
*/
904+
if (textContainsSpecialScriptGlyphs(true)
905+
&& specialScriptsWordBreakPoints.get(0) == text.start) {
906+
if (specialScriptsWordBreakPoints.size() == 1) {
907+
specialScriptsWordBreakPoints.set(0, -1);
908+
} else {
909+
specialScriptsWordBreakPoints.remove(0);
910+
}
911+
}
894912
}
895913

896914
float trimLast() {
@@ -1128,7 +1146,7 @@ private boolean hasOtfFont() {
11281146
*
11291147
* Mind that the behavior of this method depends on the analyzeSpecialScriptsWordBreakPointsOnly parameter:
11301148
* - pass {@code false} if you need to analyze the {@link TextRenderer#text} by checking each of its glyphs
1131-
* AND to fill {@link TextRenderer#specialScriptsWordBreakPoints} list,
1149+
* AND to fill {@link TextRenderer#specialScriptsWordBreakPoints} list afterwards,
11321150
* i.e. when analyzing a sequence of TextRenderers prior to layouting;
11331151
* - pass {@code true} if you want to check if text contains glyphs belonging to special scripts,
11341152
* according to the already filled {@link TextRenderer#specialScriptsWordBreakPoints} list.
@@ -1143,18 +1161,33 @@ boolean textContainsSpecialScriptGlyphs(boolean analyzeSpecialScriptsWordBreakPo
11431161
if (specialScriptsWordBreakPoints != null) {
11441162
return !specialScriptsWordBreakPoints.isEmpty();
11451163
}
1146-
if (!analyzeSpecialScriptsWordBreakPointsOnly) {
1147-
for (int i = text.start; i < text.end; i++) {
1148-
int unicode = text.get(i).getUnicode();
1149-
if (unicode > -1) {
1150-
Character.UnicodeScript glyphScript = Character.UnicodeScript.of(unicode);
1151-
if (Character.UnicodeScript.THAI.equals(glyphScript)) {
1152-
return true;
1164+
1165+
if (analyzeSpecialScriptsWordBreakPointsOnly) {
1166+
return false;
1167+
}
1168+
1169+
for (int i = text.start; i < text.end; i++) {
1170+
int unicode = text.get(i).getUnicode();
1171+
if (unicode > -1) {
1172+
if (codePointIsOfSpecialScript(unicode)) {
1173+
return true;
1174+
}
1175+
} else {
1176+
char[] chars = text.get(i).getChars();
1177+
if (chars != null) {
1178+
for (char ch : chars) {
1179+
if (codePointIsOfSpecialScript(ch)) {
1180+
return true;
1181+
}
11531182
}
11541183
}
11551184
}
1156-
specialScriptsWordBreakPoints = new ArrayList<>();
11571185
}
1186+
// if we've reached this point, it means we've analyzed the entire TextRenderer#text
1187+
// and haven't found special scripts, therefore we define specialScriptsWordBreakPoints
1188+
// as an empty list to mark, it's already been analyzed
1189+
specialScriptsWordBreakPoints = new ArrayList<>();
1190+
11581191
return false;
11591192
}
11601193

@@ -1269,7 +1302,8 @@ protected TextRenderer[] split(int initialOverflowTextPos) {
12691302
overflow.add(-1);
12701303
overflowRenderer.setSpecialScriptsWordBreakPoints(overflow);
12711304
} else {
1272-
int splitIndex = findPossibleBreaksSplitPosition(initialOverflowTextPos, false);
1305+
int splitIndex = findPossibleBreaksSplitPosition(specialScriptsWordBreakPoints, initialOverflowTextPos,
1306+
false);
12731307

12741308
if (splitIndex > -1) {
12751309
splitRenderer.setSpecialScriptsWordBreakPoints(specialScriptsWordBreakPoints
@@ -1540,26 +1574,24 @@ private void saveWordBreakIfNotYetSaved(Glyph wordBreak) {
15401574
savedWordBreakAtLineEnding = new GlyphLine(Collections.<Glyph>singletonList(wordBreak));
15411575
}
15421576
}
1543-
// if amongPresentOnly is true, returns the index of specialScriptsWordBreakPoints's element
1544-
// or -1 if element wasn't found.
1545-
// if amongPresentOnly is false, returns the index of specialScriptsWordBreakPoints's element
1577+
// if amongPresentOnly is true,
1578+
// returns the index of lists's element which equals textStartBasedInitialOverflowTextPos
1579+
// or -1 if textStartBasedInitialOverflowTextPos wasn't found in the list.
1580+
// if amongPresentOnly is false, returns the index of list's element
15461581
// that is not greater than textStartBasedInitialOverflowTextPos
1547-
// if there's no such element in specialScriptsWordBreakPoints, -1 is returned
1548-
int findPossibleBreaksSplitPosition(int textStartBasedInitialOverflowTextPos, boolean amongPresentOnly) {
1582+
// if there's no such element in the list, -1 is returned
1583+
static int findPossibleBreaksSplitPosition(List<Integer> list, int textStartBasedInitialOverflowTextPos,
1584+
boolean amongPresentOnly) {
15491585
int low = 0;
1550-
int high = specialScriptsWordBreakPoints.size() - 1;
1586+
int high = list.size() - 1;
15511587

15521588
while (low <= high) {
15531589
int middle = (low + high) >>> 1;
1554-
if (specialScriptsWordBreakPoints.get(middle)
1555-
.compareTo(textStartBasedInitialOverflowTextPos) < 0) {
1590+
if (list.get(middle).compareTo(textStartBasedInitialOverflowTextPos) < 0) {
15561591
low = middle + 1;
1557-
}
1558-
else if (specialScriptsWordBreakPoints.get(middle)
1559-
.compareTo(textStartBasedInitialOverflowTextPos) > 0) {
1592+
} else if (list.get(middle).compareTo(textStartBasedInitialOverflowTextPos) > 0) {
15601593
high = middle - 1;
1561-
}
1562-
else {
1594+
} else {
15631595
return middle;
15641596
}
15651597
}
@@ -1569,6 +1601,13 @@ else if (specialScriptsWordBreakPoints.get(middle)
15691601
return -1;
15701602
}
15711603

1604+
private boolean codePointIsOfSpecialScript(int codePoint) {
1605+
Character.UnicodeScript glyphScript = Character.UnicodeScript.of(codePoint);
1606+
return Character.UnicodeScript.THAI == glyphScript
1607+
|| Character.UnicodeScript.KHMER == glyphScript
1608+
|| Character.UnicodeScript.LAO == glyphScript;
1609+
}
1610+
15721611
private static class ReversedCharsIterator implements Iterator<GlyphLine.GlyphLinePart> {
15731612
private List<Integer> outStart;
15741613
private List<Integer> outEnd;

0 commit comments

Comments
 (0)