Skip to content

Commit b1281a4

Browse files
committed
Refactor implementation of building hyphenation trees to avoid StackOverflowErrors
The recursive implementation is now rewritten in a sequential way This solves failures (sometimes flaky ones that depend on JVM config) of the tests involving hyphenating strings in some of the languages DEVSIX-2036 DEVSIX-3183
1 parent f5e58ab commit b1281a4

File tree

3 files changed

+118
-49
lines changed

3 files changed

+118
-49
lines changed

layout/src/main/java/com/itextpdf/layout/hyphenation/TernaryTree.java

Lines changed: 118 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
* limitations under the License.
1616
*/
1717

18+
/*
19+
* PLEASE NOTE that implementation of "insert" function was refactored to consume less stack memory
20+
*/
21+
1822
package com.itextpdf.layout.hyphenation;
1923

2024
import java.io.Serializable;
@@ -164,7 +168,7 @@ public void insert(String key, char val) {
164168
char[] strkey = new char[len--];
165169
key.getChars(0, len, strkey, 0);
166170
strkey[len] = 0;
167-
root = insert(root, strkey, 0, val);
171+
root = insert(new TreeInsertionParams(root, strkey, 0, val));
168172
}
169173

170174
/**
@@ -178,13 +182,17 @@ public void insert(char[] key, int start, char val) {
178182
if (freenode + len > eq.length) {
179183
redimNodeArrays(eq.length + BLOCK_SIZE);
180184
}
181-
root = insert(root, key, start, val);
185+
root = insert(new TreeInsertionParams(root, key, start, val));
182186
}
183187

184-
/**
185-
* The actual insertion function, recursive version.
186-
*/
187-
private char insert(char p, char[] key, int start, char val) {
188+
// PLEASE NOTE that this function is a result of refactoring "insert" method which
189+
// is a modification of the original work
190+
// Returns null if insertion is not needed and the id of the new node if insertion was performed
191+
private Character insertNewBranchIfNeeded(TreeInsertionParams params) {
192+
char p = params.p;
193+
char[] key = params.key;
194+
int start = params.start;
195+
char val = params.val;
188196
int len = strlen(key, start);
189197
if (p == 0) {
190198
// this means there is no branch, this node will start a new branch.
@@ -204,54 +212,104 @@ private char insert(char p, char[] key, int start, char val) {
204212
lo[p] = 0;
205213
}
206214
return p;
215+
} else {
216+
return null;
207217
}
218+
}
208219

209-
if (sc[p] == 0xFFFF) {
210-
// branch is compressed: need to decompress
211-
// this will generate garbage in the external key array
212-
// but we can do some garbage collection later
213-
char pp = freenode++;
214-
lo[pp] = lo[p]; // previous pointer to key
215-
eq[pp] = eq[p]; // previous pointer to data
216-
lo[p] = 0;
217-
if (len > 0) {
218-
sc[p] = kv.get(lo[pp]);
219-
eq[p] = pp;
220-
lo[pp]++;
221-
if (kv.get(lo[pp]) == 0) {
222-
// key completly decompressed leaving garbage in key array
223-
lo[pp] = 0;
224-
sc[pp] = 0;
225-
hi[pp] = 0;
220+
// PLEASE NOTE that this function is a result of refactoring "insert" method which
221+
// is a modification of the original work
222+
private char insertIntoExistingBranch(TreeInsertionParams params) {
223+
char initialP = params.p;
224+
TreeInsertionParams paramsToInsertNext = params;
225+
while (paramsToInsertNext != null) {
226+
char p = paramsToInsertNext.p;
227+
// We are inserting into an existing branch hence the id must be non-zero
228+
assert p != 0;
229+
char[] key = paramsToInsertNext.key;
230+
int start = paramsToInsertNext.start;
231+
char val = paramsToInsertNext.val;
232+
int len = strlen(key, start);
233+
paramsToInsertNext = null;
234+
235+
if (sc[p] == 0xFFFF) {
236+
// branch is compressed: need to decompress
237+
// this will generate garbage in the external key array
238+
// but we can do some garbage collection later
239+
char pp = freenode++;
240+
lo[pp] = lo[p]; // previous pointer to key
241+
eq[pp] = eq[p]; // previous pointer to data
242+
lo[p] = 0;
243+
if (len > 0) {
244+
sc[p] = kv.get(lo[pp]);
245+
eq[p] = pp;
246+
lo[pp]++;
247+
if (kv.get(lo[pp]) == 0) {
248+
// key completly decompressed leaving garbage in key array
249+
lo[pp] = 0;
250+
sc[pp] = 0;
251+
hi[pp] = 0;
252+
} else {
253+
// we only got first char of key, rest is still there
254+
sc[pp] = 0xFFFF;
255+
}
226256
} else {
227-
// we only got first char of key, rest is still there
257+
// In this case we can save a node by swapping the new node
258+
// with the compressed node
228259
sc[pp] = 0xFFFF;
260+
hi[p] = pp;
261+
sc[p] = 0;
262+
eq[p] = val;
263+
length++;
264+
break;
229265
}
230-
} else {
231-
// In this case we can save a node by swapping the new node
232-
// with the compressed node
233-
sc[pp] = 0xFFFF;
234-
hi[p] = pp;
235-
sc[p] = 0;
236-
eq[p] = val;
237-
length++;
238-
return p;
239266
}
240-
}
241-
char s = key[start];
242-
if (s < sc[p]) {
243-
lo[p] = insert(lo[p], key, start, val);
244-
} else if (s == sc[p]) {
245-
if (s != 0) {
246-
eq[p] = insert(eq[p], key, start + 1, val);
267+
char s = key[start];
268+
if (s < sc[p]) {
269+
TreeInsertionParams branchParams = new TreeInsertionParams(lo[p], key, start, val);
270+
Character insertNew = insertNewBranchIfNeeded(branchParams);
271+
if (insertNew == null) {
272+
paramsToInsertNext = branchParams;
273+
} else {
274+
lo[p] = insertNew;
275+
}
276+
} else if (s == sc[p]) {
277+
if (s != 0) {
278+
TreeInsertionParams branchParams = new TreeInsertionParams(eq[p], key, start + 1, val);
279+
Character insertNew = insertNewBranchIfNeeded(branchParams);
280+
if (insertNew == null) {
281+
paramsToInsertNext = branchParams;
282+
} else {
283+
eq[p] = insertNew;
284+
}
285+
} else {
286+
// key already in tree, overwrite data
287+
eq[p] = val;
288+
}
247289
} else {
248-
// key already in tree, overwrite data
249-
eq[p] = val;
290+
TreeInsertionParams branchParams = new TreeInsertionParams(hi[p], key, start, val);
291+
Character insertNew = insertNewBranchIfNeeded(branchParams);
292+
if (insertNew == null) {
293+
paramsToInsertNext = branchParams;
294+
} else {
295+
hi[p] = insertNew;
296+
}
250297
}
298+
}
299+
return initialP;
300+
}
301+
302+
/**
303+
* The actual insertion function, recursive version.
304+
* PLEASE NOTE that the implementation has been adapted to consume less stack memory
305+
*/
306+
private char insert(TreeInsertionParams params) {
307+
Character newBranch = insertNewBranchIfNeeded(params);
308+
if (newBranch == null) {
309+
return insertIntoExistingBranch(params);
251310
} else {
252-
hi[p] = insert(hi[p], key, start, val);
311+
return (char)newBranch;
253312
}
254-
return p;
255313
}
256314

257315
/**
@@ -514,5 +572,21 @@ private void compact(CharVector kx, TernaryTree map, char p) {
514572
public Enumeration keys() {
515573
return new TernaryTreeIterator(this);
516574
}
575+
576+
// PLEASE NOTE that this is a helper class that was added as a result of the file modification
577+
// and is not a part of the original file
578+
private static class TreeInsertionParams {
579+
char p;
580+
char[] key;
581+
int start;
582+
char val;
583+
584+
public TreeInsertionParams(char p, char[] key, int start, char val) {
585+
this.p = p;
586+
this.key = key;
587+
this.start = start;
588+
this.val = val;
589+
}
590+
}
517591
}
518592

layout/src/test/java/com/itextpdf/layout/HyphenateResultTest.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,22 +47,19 @@ This file is part of the iText (R) project.
4747
import com.itextpdf.test.ExtendedITextTest;
4848
import com.itextpdf.test.annotations.type.IntegrationTest;
4949
import org.junit.Assert;
50-
import org.junit.Ignore;
5150
import org.junit.Test;
5251
import org.junit.experimental.categories.Category;
5352

5453
@Category(IntegrationTest.class)
5554
public class HyphenateResultTest extends ExtendedITextTest {
5655

5756
@Test
58-
@Ignore("DEVSIX-2036")
5957
public void ukraineHyphenTest() {
6058
//здравствуйте
6159
testHyphenateResult("uk", "\u0437\u0434\u0440\u0430\u0432\u0441\u0442\u0432\u0443\u0439", new int[]{5});
6260
}
6361

6462
@Test
65-
@Ignore("DEVSIX-2036")
6663
public void ukraineNoneHyphenTest() {
6764
//день
6865
testHyphenateResult("uk", "\u0434\u0435\u043D\u044C", null);

layout/src/test/java/com/itextpdf/layout/HyphenateTest.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ This file is part of the iText (R) project.
4848
import com.itextpdf.test.ExtendedITextTest;
4949
import com.itextpdf.test.annotations.type.IntegrationTest;
5050
import org.junit.Assert;
51-
import org.junit.Ignore;
5251
import org.junit.Test;
5352
import org.junit.experimental.categories.Category;
5453

@@ -135,7 +134,6 @@ public class HyphenateTest extends ExtendedITextTest {
135134
private List<String> errors = new ArrayList<>();
136135

137136
@Test
138-
@Ignore("DEVSIX-2036")
139137
public void runTest() {
140138
for (TestParams param : params) {
141139
tryHyphenate(param.lang, param.testWorld, param.shouldPass);

0 commit comments

Comments
 (0)