Skip to content

Commit 5c5bb61

Browse files
committed
Fixed #14
1 parent d9b1e63 commit 5c5bb61

File tree

6 files changed

+150
-66
lines changed

6 files changed

+150
-66
lines changed

release-notes/CREDITS

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@ Here are individuals who have contributed to development of this project.
44
Tatu Saloranta, [email protected]: author
55

66
7-
87
* Reported [Issue#6]: broken handling of '\r' (on Windows)
98
(0.7.2)
109

1110
Nathan Williams (nathanlws@github)
12-
1311
* Contributed #10: Add Iterator-based(pull-style) sorter as an alternative
1412
(0.9.0)
13+
14+
Mathias Herberts (hbs@github)
15+
* Reported #14: Long lines are corrupted when read by `RawTextLineReader._readNextSlow()`
16+
(1.0.1)

release-notes/VERSION

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,19 @@
11
Project: java-merge-sort
22
License: Apache License 2.0
3-
Version: 1.0.0
4-
Release date: 04-Jun-2014
3+
Version: 1.0.1
4+
Release date: 14-Feb-2017
55

6-
The first official non-beta release. No changes since 0.9.1.
6+
#14: Long lines are corrupted when read by `RawTextLineReader._readNextSlow()`
7+
(reported by Mathias H, hbs@github)
78

89
------------------------------------------------------------------------
910
=== History: ===
1011
------------------------------------------------------------------------
1112

13+
1.0.0 (04-Jun-2014)
14+
15+
The first official non-beta release. No changes since 0.9.1.
16+
1217
0.9.1 (12-Oct-2013)
1318

1419
#11: Change more constructors to be public

src/main/java/com/fasterxml/sort/std/RawTextLineReader.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public class RawTextLineReader
1919
protected final InputStream _in;
2020

2121
protected boolean _closed = false;
22-
22+
2323
protected byte[] _inputBuffer = new byte[16000];
2424
protected int _inputPtr = 0;
2525
protected int _inputEnd = 0;
@@ -126,6 +126,7 @@ protected final byte[] _readNextSlow(int start) throws IOException
126126
break main_loop;
127127
}
128128
}
129+
bytes.write(_inputBuffer, 0, _inputEnd);
129130
}
130131
return bytes.toByteArray();
131132
}
@@ -167,7 +168,7 @@ protected boolean _skipLF() throws IOException
167168
/* Helper classes
168169
/**********************************************************************
169170
*/
170-
171+
171172
public static class Factory
172173
extends DataReaderFactory<byte[]>
173174
{
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,64 @@
11
package com.fasterxml.sort;
22

3+
import java.nio.charset.Charset;
4+
35
import junit.framework.TestCase;
46

57
public abstract class SortTestBase extends TestCase
68
{
9+
protected static final Charset CHARSET = Charset.forName("UTF-8");
10+
11+
protected static class StringGenerator extends DataReader<byte[]> {
12+
private final int generateCount;
13+
private final StringBuilder sb;
14+
private int count;
15+
16+
public StringGenerator(int generateCount, int stringLength) {
17+
this.generateCount = generateCount;
18+
this.sb = new StringBuilder(stringLength);
19+
for(int i = 0; i < stringLength; ++i) {
20+
sb.append('a');
21+
}
22+
}
23+
24+
@Override
25+
public byte[] readNext() {
26+
if (count >= generateCount) {
27+
return null;
28+
}
29+
int saveLen = sb.length();
30+
sb.append(count++);
31+
String s = sb.toString();
32+
sb.setLength(saveLen);
33+
return s.getBytes(CHARSET);
34+
}
35+
36+
@Override
37+
public int estimateSizeInBytes(byte[] item) {
38+
return item.length;
39+
}
40+
41+
@Override
42+
public void close() {
43+
// None
44+
}
45+
}
46+
47+
protected static class CountingWriter<T> extends DataWriter<T> {
48+
private int count = 0;
49+
50+
public int getCount() {
51+
return count;
52+
}
53+
54+
@Override
55+
public void writeEntry(T item) {
56+
++count;
57+
}
758

59+
@Override
60+
public void close() {
61+
// None
62+
}
63+
}
864
}
Lines changed: 3 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,74 +1,17 @@
11
package com.fasterxml.sort;
22

3+
import java.io.IOException;
4+
35
import com.fasterxml.sort.std.ByteArrayComparator;
46
import com.fasterxml.sort.std.RawTextLineReader;
57
import com.fasterxml.sort.std.RawTextLineWriter;
68

7-
import java.io.IOException;
8-
import java.nio.charset.Charset;
9-
109
public class TestLargeSort extends SortTestBase
1110
{
12-
private static final Charset CHARSET =Charset.forName("UTF-8");
1311
private static final int STRING_LENGTH = 256;
1412
private static final int SORT_MEM_BYTES = 1024 * 1024; // 1MB
1513
private static final int STRING_COUNT = 10 * (SORT_MEM_BYTES / STRING_LENGTH);
1614

17-
private static class StringGenerator extends DataReader<byte[]> {
18-
private final int generateCount;
19-
private final StringBuilder sb;
20-
private int count;
21-
22-
private StringGenerator(int generateCount, int stringLength) {
23-
this.generateCount = generateCount;
24-
this.sb = new StringBuilder(stringLength);
25-
for(int i = 0; i < stringLength; ++i) {
26-
sb.append('a');
27-
}
28-
}
29-
30-
@Override
31-
public byte[] readNext() {
32-
if(count >= generateCount) {
33-
return null;
34-
}
35-
int saveLen = sb.length();
36-
sb.append(count++);
37-
String s = sb.toString();
38-
sb.setLength(saveLen);
39-
return s.getBytes(CHARSET);
40-
}
41-
42-
@Override
43-
public int estimateSizeInBytes(byte[] item) {
44-
return item.length;
45-
}
46-
47-
@Override
48-
public void close() {
49-
// None
50-
}
51-
}
52-
53-
private static class CountingWriter<T> extends DataWriter<T> {
54-
private int count = 0;
55-
56-
public int getCount() {
57-
return count;
58-
}
59-
60-
@Override
61-
public void writeEntry(T item) {
62-
++count;
63-
}
64-
65-
@Override
66-
public void close() {
67-
// None
68-
}
69-
}
70-
71-
7215
public void testLargeSort() throws IOException {
7316
Sorter<byte[]> sorter = new Sorter<byte[]>(
7417
new SortConfig().withMaxMemoryUsage(SORT_MEM_BYTES),
@@ -79,5 +22,6 @@ public void testLargeSort() throws IOException {
7922
CountingWriter<byte[]> counter = new CountingWriter<byte[]>();
8023
sorter.sort(new StringGenerator(STRING_COUNT, STRING_LENGTH), counter);
8124
assertEquals("sorted count", STRING_COUNT, counter.getCount());
25+
sorter.close();
8226
}
8327
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package com.fasterxml.sort;
2+
3+
import java.io.ByteArrayInputStream;
4+
import java.util.ArrayList;
5+
import java.util.List;
6+
7+
import com.fasterxml.sort.std.ByteArrayComparator;
8+
import com.fasterxml.sort.std.RawTextLineReader;
9+
import com.fasterxml.sort.std.RawTextLineWriter;
10+
11+
// for issue [#14], problem with lines longer than 32k
12+
public class TestLongLineSort extends SortTestBase
13+
{
14+
protected static class CollectingWriter<T> extends DataWriter<T> {
15+
private final List<T> _contents = new ArrayList<T>();
16+
17+
public List<T> contents() {
18+
return _contents;
19+
}
20+
21+
@Override
22+
public void writeEntry(T item) {
23+
_contents.add(item);
24+
}
25+
26+
@Override
27+
public void close() {
28+
// None
29+
}
30+
}
31+
32+
public void testLongLine() throws Exception
33+
{
34+
String line1 = _generate("cxxx", 33000);
35+
String line2 = _generate("abab", 33003);
36+
String line3 = _generate("byyy", 32900);
37+
byte[] input = String.format("%s\n%s\n%s\n", line1, line2, line3)
38+
.getBytes(CHARSET);
39+
40+
Sorter<byte[]> sorter = new Sorter<byte[]>(
41+
new SortConfig(),
42+
RawTextLineReader.factory(),
43+
RawTextLineWriter.factory(),
44+
new ByteArrayComparator()
45+
);
46+
CollectingWriter<byte[]> collator = new CollectingWriter<byte[]>();
47+
48+
sorter.sort(new RawTextLineReader(new ByteArrayInputStream(input)),
49+
collator);
50+
sorter.close();
51+
List<byte[]> results = collator.contents();
52+
assertEquals(3, results.size());
53+
_verify(line2, results, 0);
54+
_verify(line3, results, 1);
55+
_verify(line1, results, 2);
56+
}
57+
58+
private void _verify(String input, List<byte[]> results, int index)
59+
{
60+
byte[] output = results.get(index);
61+
String outputStr = new String(output, CHARSET);
62+
// first assert lengths are equal
63+
assertEquals(input.length(), outputStr.length());
64+
// and then content
65+
assertEquals(outputStr, input);
66+
}
67+
68+
private String _generate(String part, int len) {
69+
StringBuilder sb = new StringBuilder(len + part.length());
70+
do {
71+
sb.append(part);
72+
} while (sb.length() < len);
73+
return sb.toString();
74+
}
75+
76+
}

0 commit comments

Comments
 (0)