Skip to content

Commit e081e17

Browse files
SnipxiText-CI
authored andcommitted
Refactor implementation of building hyphenation trees to avoid StackOverflowErrors
The recursive implementation is now rewritten in a sequential way This solves failures (sometimes flaky ones that depend on JVM config) of the tests involving hyphenating strings in some of the languages DEVSIX-2036 DEVSIX-3183 Autoported commit. Original commit hash: [b1281a42c]
1 parent 167e1d8 commit e081e17

File tree

4 files changed

+133
-51
lines changed

4 files changed

+133
-51
lines changed

itext.tests/itext.layout.tests/itext/layout/HyphenateResultTest.cs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,12 @@ source product.
4747
namespace iText.Layout {
4848
public class HyphenateResultTest : ExtendedITextTest {
4949
[NUnit.Framework.Test]
50-
[NUnit.Framework.Ignore("DEVSIX-2036")]
5150
public virtual void UkraineHyphenTest() {
5251
//здравствуйте
5352
TestHyphenateResult("uk", "\u0437\u0434\u0440\u0430\u0432\u0441\u0442\u0432\u0443\u0439", new int[] { 5 });
5453
}
5554

5655
[NUnit.Framework.Test]
57-
[NUnit.Framework.Ignore("DEVSIX-2036")]
5856
public virtual void UkraineNoneHyphenTest() {
5957
//день
6058
TestHyphenateResult("uk", "\u0434\u0435\u043D\u044C", null);

itext.tests/itext.layout.tests/itext/layout/HyphenateTest.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,6 @@ public class HyphenateTest : ExtendedITextTest {
113113
//సుస్వాగతం
114114
//здравствуй
115115
[NUnit.Framework.Test]
116-
[NUnit.Framework.Ignore("DEVSIX-2036")]
117116
public virtual void RunTest() {
118117
foreach (HyphenateTest.TestParams param in @params) {
119118
TryHyphenate(param.lang, param.testWorld, param.shouldPass);

itext/itext.layout/itext/layout/hyphenation/TernaryTree.cs

Lines changed: 132 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
* See the License for the specific language governing permissions and
1515
* limitations under the License.
1616
*/
17+
/*
18+
* PLEASE NOTE that implementation of "insert" function was refactored to consume less stack memory
19+
*/
1720
using System;
1821
using System.Collections;
1922

@@ -153,7 +156,7 @@ public virtual void Insert(String key, char val) {
153156
char[] strkey = new char[len--];
154157
key.JGetChars(0, len, strkey, 0);
155158
strkey[len] = (char)0;
156-
root = Insert(root, strkey, 0, val);
159+
root = Insert(new TernaryTree.TreeInsertionParams(root, strkey, 0, val));
157160
}
158161

159162
/// <summary>Insert key.</summary>
@@ -165,11 +168,17 @@ public virtual void Insert(char[] key, int start, char val) {
165168
if (freenode + len > eq.Length) {
166169
RedimNodeArrays(eq.Length + BLOCK_SIZE);
167170
}
168-
root = Insert(root, key, start, val);
171+
root = Insert(new TernaryTree.TreeInsertionParams(root, key, start, val));
169172
}
170173

171-
/// <summary>The actual insertion function, recursive version.</summary>
172-
private char Insert(char p, char[] key, int start, char val) {
174+
// PLEASE NOTE that this function is a result of refactoring "insert" method which
175+
// is a modification of the original work
176+
// Returns null if insertion is not needed and the id of the new node if insertion was performed
177+
private char? InsertNewBranchIfNeeded(TernaryTree.TreeInsertionParams @params) {
178+
char p = @params.p;
179+
char[] key = @params.key;
180+
int start = @params.start;
181+
char val = @params.val;
173182
int len = Strlen(key, start);
174183
if (p == 0) {
175184
// this means there is no branch, this node will start a new branch.
@@ -193,61 +202,118 @@ private char Insert(char p, char[] key, int start, char val) {
193202
}
194203
return p;
195204
}
196-
if (sc[p] == 0xFFFF) {
197-
// branch is compressed: need to decompress
198-
// this will generate garbage in the external key array
199-
// but we can do some garbage collection later
200-
char pp = freenode++;
201-
lo[pp] = lo[p];
202-
// previous pointer to key
203-
eq[pp] = eq[p];
204-
// previous pointer to data
205-
lo[p] = (char)0;
206-
if (len > 0) {
207-
sc[p] = kv.Get(lo[pp]);
208-
eq[p] = pp;
209-
lo[pp]++;
210-
if (kv.Get(lo[pp]) == 0) {
211-
// key completly decompressed leaving garbage in key array
212-
lo[pp] = (char)0;
213-
sc[pp] = (char)0;
214-
hi[pp] = (char)0;
205+
else {
206+
return null;
207+
}
208+
}
209+
210+
// PLEASE NOTE that this function is a result of refactoring "insert" method which
211+
// is a modification of the original work
212+
private char InsertIntoExistingBranch(TernaryTree.TreeInsertionParams @params) {
213+
char initialP = @params.p;
214+
TernaryTree.TreeInsertionParams paramsToInsertNext = @params;
215+
while (paramsToInsertNext != null) {
216+
char p = paramsToInsertNext.p;
217+
// We are inserting into an existing branch hence the id must be non-zero
218+
System.Diagnostics.Debug.Assert(p != 0);
219+
char[] key = paramsToInsertNext.key;
220+
int start = paramsToInsertNext.start;
221+
char val = paramsToInsertNext.val;
222+
int len = Strlen(key, start);
223+
paramsToInsertNext = null;
224+
if (sc[p] == 0xFFFF) {
225+
// branch is compressed: need to decompress
226+
// this will generate garbage in the external key array
227+
// but we can do some garbage collection later
228+
char pp = freenode++;
229+
lo[pp] = lo[p];
230+
// previous pointer to key
231+
eq[pp] = eq[p];
232+
// previous pointer to data
233+
lo[p] = (char)0;
234+
if (len > 0) {
235+
sc[p] = kv.Get(lo[pp]);
236+
eq[p] = pp;
237+
lo[pp]++;
238+
if (kv.Get(lo[pp]) == 0) {
239+
// key completly decompressed leaving garbage in key array
240+
lo[pp] = (char)0;
241+
sc[pp] = (char)0;
242+
hi[pp] = (char)0;
243+
}
244+
else {
245+
// we only got first char of key, rest is still there
246+
sc[pp] = (char)0xFFFF;
247+
}
215248
}
216249
else {
217-
// we only got first char of key, rest is still there
250+
// In this case we can save a node by swapping the new node
251+
// with the compressed node
218252
sc[pp] = (char)0xFFFF;
253+
hi[p] = pp;
254+
sc[p] = (char)0;
255+
eq[p] = val;
256+
length++;
257+
break;
219258
}
220259
}
221-
else {
222-
// In this case we can save a node by swapping the new node
223-
// with the compressed node
224-
sc[pp] = (char)0xFFFF;
225-
hi[p] = pp;
226-
sc[p] = (char)0;
227-
eq[p] = val;
228-
length++;
229-
return p;
230-
}
231-
}
232-
char s = key[start];
233-
if (s < sc[p]) {
234-
lo[p] = Insert(lo[p], key, start, val);
235-
}
236-
else {
237-
if (s == sc[p]) {
238-
if (s != 0) {
239-
eq[p] = Insert(eq[p], key, start + 1, val);
260+
char s = key[start];
261+
if (s < sc[p]) {
262+
TernaryTree.TreeInsertionParams branchParams = new TernaryTree.TreeInsertionParams(lo[p], key, start, val);
263+
char? insertNew = InsertNewBranchIfNeeded(branchParams);
264+
if (insertNew == null) {
265+
paramsToInsertNext = branchParams;
240266
}
241267
else {
242-
// key already in tree, overwrite data
243-
eq[p] = val;
268+
lo[p] = (char)insertNew;
244269
}
245270
}
246271
else {
247-
hi[p] = Insert(hi[p], key, start, val);
272+
if (s == sc[p]) {
273+
if (s != 0) {
274+
TernaryTree.TreeInsertionParams branchParams = new TernaryTree.TreeInsertionParams(eq[p], key, start + 1,
275+
val);
276+
char? insertNew = InsertNewBranchIfNeeded(branchParams);
277+
if (insertNew == null) {
278+
paramsToInsertNext = branchParams;
279+
}
280+
else {
281+
eq[p] = (char)insertNew;
282+
}
283+
}
284+
else {
285+
// key already in tree, overwrite data
286+
eq[p] = val;
287+
}
288+
}
289+
else {
290+
TernaryTree.TreeInsertionParams branchParams = new TernaryTree.TreeInsertionParams(hi[p], key, start, val);
291+
char? insertNew = InsertNewBranchIfNeeded(branchParams);
292+
if (insertNew == null) {
293+
paramsToInsertNext = branchParams;
294+
}
295+
else {
296+
hi[p] = (char)insertNew;
297+
}
298+
}
248299
}
249300
}
250-
return p;
301+
return initialP;
302+
}
303+
304+
/// <summary>The actual insertion function, recursive version.</summary>
305+
/// <remarks>
306+
/// The actual insertion function, recursive version.
307+
/// PLEASE NOTE that the implementation has been adapted to consume less stack memory
308+
/// </remarks>
309+
private char Insert(TernaryTree.TreeInsertionParams @params) {
310+
char? newBranch = InsertNewBranchIfNeeded(@params);
311+
if (newBranch == null) {
312+
return InsertIntoExistingBranch(@params);
313+
}
314+
else {
315+
return (char)newBranch;
316+
}
251317
}
252318

253319
/// <summary>Compares 2 null terminated char arrays</summary>
@@ -496,5 +562,24 @@ private void Compact(CharVector kx, iText.Layout.Hyphenation.TernaryTree map, ch
496562
public virtual IEnumerator Keys() {
497563
return new TernaryTreeIterator(this);
498564
}
565+
566+
private class TreeInsertionParams {
567+
internal char p;
568+
569+
internal char[] key;
570+
571+
internal int start;
572+
573+
internal char val;
574+
575+
public TreeInsertionParams(char p, char[] key, int start, char val) {
576+
// PLEASE NOTE that this is a helper class that was added as a result of the file modification
577+
// and is not a part of the original file
578+
this.p = p;
579+
this.key = key;
580+
this.start = start;
581+
this.val = val;
582+
}
583+
}
499584
}
500585
}

port-hash

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
f5e58abfe5b1b017a9326dc516d1c001d4434fa3
1+
b1281a42c0cbeba0db22049a125ab6d52aa22504

0 commit comments

Comments
 (0)