Skip to content

Commit 1465e1c

Browse files
committed
Fix to handle any combination of interspersed CR, LF, CRLF
1 parent ee1827a commit 1465e1c

File tree

4 files changed

+105
-64
lines changed

4 files changed

+105
-64
lines changed

opengrok-indexer/src/main/java/org/opengrok/indexer/util/LineBreaker.java

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -76,33 +76,30 @@ private void reset(Reader reader) throws IOException {
7676
int c;
7777
while ((c = reader.read()) != -1) {
7878
++length;
79-
switch (c) {
80-
case '\r':
81-
c = reader.read();
82-
if (c == -1) {
83-
newOffsets.add(length);
84-
break;
85-
} else {
79+
80+
redo_c:
81+
while (true) {
82+
switch (c) {
83+
case '\r':
84+
c = reader.read();
85+
if (c == -1) {
86+
newOffsets.add(length);
87+
break redo_c;
88+
}
8689
++length;
87-
switch (c) {
88-
case '\n':
89-
newOffsets.add(length);
90-
break;
91-
case '\r':
92-
newOffsets.add(length - 1);
93-
newOffsets.add(length);
94-
break;
95-
default:
96-
newOffsets.add(length - 1);
97-
break;
90+
if (c == '\n') {
91+
newOffsets.add(length);
92+
break redo_c;
9893
}
99-
}
100-
break;
101-
case '\n':
102-
newOffsets.add(length);
103-
break;
104-
default:
105-
break;
94+
newOffsets.add(length - 1);
95+
continue redo_c;
96+
case '\n':
97+
newOffsets.add(length);
98+
break redo_c;
99+
default:
100+
// pass
101+
}
102+
break;
106103
}
107104
}
108105

opengrok-indexer/src/main/java/org/opengrok/indexer/util/SourceSplitter.java

Lines changed: 28 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -163,45 +163,36 @@ private void reset(Reader reader) throws IOException {
163163
int c;
164164
while ((c = reader.read()) != -1) {
165165
++length;
166-
bld.append((char) c);
167-
switch (c) {
168-
case '\r':
169-
c = reader.read();
170-
if (c == -1) {
171-
slist.add(bld.toString());
172-
bld.setLength(0);
173-
break;
174-
} else {
166+
167+
redo_c:
168+
while (true) {
169+
bld.append((char) c);
170+
switch (c) {
171+
case '\r':
172+
c = reader.read();
173+
if (c == -1) {
174+
slist.add(bld.toString());
175+
bld.setLength(0);
176+
break redo_c;
177+
}
175178
++length;
176-
switch (c) {
177-
case '\n':
178-
bld.append((char) c);
179-
slist.add(bld.toString());
180-
bld.setLength(0);
181-
break;
182-
case '\r':
183-
slist.add(bld.toString());
184-
bld.setLength(0);
185-
186-
bld.append((char) c);
187-
slist.add(bld.toString());
188-
bld.setLength(0);
189-
break;
190-
default:
191-
slist.add(bld.toString());
192-
bld.setLength(0);
193-
194-
bld.append((char) c);
195-
break;
179+
if (c == '\n') {
180+
bld.append((char) c);
181+
slist.add(bld.toString());
182+
bld.setLength(0);
183+
break redo_c;
196184
}
197-
}
198-
break;
199-
case '\n':
200-
slist.add(bld.toString());
201-
bld.setLength(0);
202-
break;
203-
default:
204-
break;
185+
slist.add(bld.toString());
186+
bld.setLength(0);
187+
continue redo_c;
188+
case '\n':
189+
slist.add(bld.toString());
190+
bld.setLength(0);
191+
break redo_c;
192+
default:
193+
// pass
194+
}
195+
break;
205196
}
206197
}
207198
if (bld.length() > 0) {

opengrok-indexer/src/test/java/org/opengrok/indexer/util/LineBreakerTest.java

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@
2323

2424
package org.opengrok.indexer.util;
2525

26-
import java.io.IOException;
2726
import static org.junit.Assert.assertEquals;
27+
28+
import java.io.IOException;
2829
import org.junit.BeforeClass;
2930
import org.junit.Test;
3031
import org.opengrok.indexer.analysis.StreamSource;
@@ -91,4 +92,29 @@ public void shouldHandleDocsOfLongerLength() throws IOException {
9192
assertEquals("split find-index", 4, brkr.findLineIndex(20));
9293
assertEquals("split find-index", 4, brkr.findLineIndex(21));
9394
}
95+
96+
@Test
97+
public void shouldHandleInterspersedLineEndings() throws IOException {
98+
// 0 0
99+
// 0- -- -5 - -- - 1 - - - -5 -- - -2--
100+
// 0 1 2 3 4 5 6 7 8 9 0
101+
// 1
102+
final String INPUT = "a\rb\nc\r\nd\r\r\r\n\re\n\rf\r\nghij";
103+
StreamSource src = StreamSource.fromString(INPUT);
104+
105+
brkr.reset(src);
106+
assertEquals("split count", 11, brkr.count());
107+
assertEquals("split offset", 0, brkr.getOffset(0));
108+
assertEquals("split offset", 2, brkr.getOffset(1));
109+
assertEquals("split offset", 4, brkr.getOffset(2));
110+
assertEquals("split offset", 7, brkr.getOffset(3));
111+
assertEquals("split offset", 9, brkr.getOffset(4));
112+
assertEquals("split offset", 10, brkr.getOffset(5));
113+
assertEquals("split offset", 12, brkr.getOffset(6));
114+
assertEquals("split offset", 13, brkr.getOffset(7));
115+
assertEquals("split offset", 15, brkr.getOffset(8));
116+
assertEquals("split offset", 16, brkr.getOffset(9));
117+
assertEquals("split offset", 19, brkr.getOffset(10));
118+
assertEquals("split offset", 23, brkr.getOffset(11));
119+
}
94120
}

opengrok-indexer/src/test/java/org/opengrok/indexer/util/SourceSplitterTest.java

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@
2323

2424
package org.opengrok.indexer.util;
2525

26-
import java.io.IOException;
2726
import static org.junit.Assert.assertEquals;
27+
28+
import java.io.IOException;
2829
import org.junit.Test;
2930
import org.opengrok.indexer.analysis.StreamSource;
3031

@@ -132,4 +133,30 @@ public void shouldHandleStreamedDocsOfLongerLength() throws IOException {
132133
assertEquals("split find-index of " + i, exp, li);
133134
}
134135
}
136+
137+
@Test
138+
public void shouldHandleInterspersedLineEndings() throws IOException {
139+
// 0 0
140+
// 0- -- -5 - -- - 1 - - - -5 -- - -2--
141+
// 0 1 2 3 4 5 6 7 8 9 0
142+
// 1
143+
final String INPUT = "a\rb\nc\r\nd\r\r\r\n\re\n\rf\r\nghij";
144+
StreamSource src = StreamSource.fromString(INPUT);
145+
146+
SourceSplitter splitter = new SourceSplitter();
147+
splitter.reset(src);
148+
assertEquals("split count", 11, splitter.count());
149+
assertEquals("split offset", 0, splitter.getOffset(0));
150+
assertEquals("split offset", 2, splitter.getOffset(1));
151+
assertEquals("split offset", 4, splitter.getOffset(2));
152+
assertEquals("split offset", 7, splitter.getOffset(3));
153+
assertEquals("split offset", 9, splitter.getOffset(4));
154+
assertEquals("split offset", 10, splitter.getOffset(5));
155+
assertEquals("split offset", 12, splitter.getOffset(6));
156+
assertEquals("split offset", 13, splitter.getOffset(7));
157+
assertEquals("split offset", 15, splitter.getOffset(8));
158+
assertEquals("split offset", 16, splitter.getOffset(9));
159+
assertEquals("split offset", 19, splitter.getOffset(10));
160+
assertEquals("split offset", 23, splitter.getOffset(11));
161+
}
135162
}

0 commit comments

Comments
 (0)