Skip to content

Commit ee1827a

Browse files
committed
Share some code between SourceSplitter and LineBreaker
Also, use "offset" consistently to align with Lucene.
1 parent b47b86e commit ee1827a

File tree

9 files changed

+299
-229
lines changed

9 files changed

+299
-229
lines changed

opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/CtagsReader.java

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
/*
2121
* Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
22-
* Portions Copyright (c) 2017-2018, Chris Fraire <[email protected]>.
22+
* Portions Copyright (c) 2017-2018, 2020, Chris Fraire <[email protected]>.
2323
*/
2424

2525
package org.opengrok.indexer.analysis;
@@ -395,9 +395,9 @@ private CpatIndex bestIndexOfTag(int lineno, String whole, String str) {
395395

396396
int woff = strictIndexOf(whole, str);
397397
if (woff < 0) {
398-
/**
398+
/*
399399
* When a splitter is available, search the entire line.
400-
* (N.b. use 0-offset vs ctags's 1-offset.)
400+
* (N.b. use 0-based indexing vs ctags's 1-based.)
401401
*/
402402
String cut = trySplitterCut(lineno - 1, 1);
403403
if (cut == null || !cut.startsWith(whole)) {
@@ -512,9 +512,9 @@ private CpatIndex bestIndexOfArg(int lineno, String whole, String arg) {
512512
return new CpatIndex(lineno, s, e);
513513
}
514514

515-
/**
515+
/*
516516
* When a splitter is available, search the next several lines.
517-
* (N.b. use 0-offset vs ctags's 1-offset.)
517+
* (N.b. use 0-based indexing vs ctags's 1-based.)
518518
*/
519519
String cut = trySplitterCut(lineno - 1, MAX_CUT_LINES);
520520
if (cut == null || !cut.startsWith(whole)) {
@@ -640,22 +640,22 @@ private PatResult strictMatch(String whole, String substr, Pattern pat) {
640640
}
641641

642642
/**
643-
* Finds the line with the longest content from {@code midx}.
643+
* Finds the line with the longest content from {@code cut}.
644644
* <p>
645645
* The {@link Definitions} tag model is based on a match within a line.
646646
* "signature" fields, however, can be condensed from multiple lines; and a
647647
* fuzzy match can therefore span multiple lines.
648648
*/
649649
private CpatIndex bestLineOfMatch(int lineno, PatResult pr, String cut) {
650-
// (N.b. use 0-offset vs ctags's 1-offset.)
651-
int lpos = splitter.getPosition(lineno - 1);
652-
int mpos = lpos + pr.start;
653-
int moff = splitter.findLineOffset(mpos);
654-
int zpos = lpos + pr.end - 1;
655-
int zoff = splitter.findLineOffset(zpos);
650+
// (N.b. use 0-based indexing vs ctags's 1-based.)
651+
int lineOff = splitter.getOffset(lineno - 1);
652+
int mOff = lineOff + pr.start;
653+
int mIndex = splitter.findLineIndex(mOff);
654+
int zOff = lineOff + pr.end - 1;
655+
int zIndex = splitter.findLineIndex(zOff);
656656

657657
int t = tabSize;
658-
int resoff = moff;
658+
int resIndex = mIndex;
659659
int contentLength = 0;
660660
/**
661661
* Initialize the following just to silence warnings but with values
@@ -664,31 +664,31 @@ private CpatIndex bestLineOfMatch(int lineno, PatResult pr, String cut) {
664664
String whole = "";
665665
int s = 0;
666666
int e = 1;
667-
/**
668-
* Iterate to determine the length of the portion of `midx' that
669-
* is contained within each line.
667+
/*
668+
* Iterate to determine the length of the portion of cut that is
669+
* contained within each line.
670670
*/
671-
for (int ioff = moff; ioff <= zoff; ++ioff) {
672-
String iwhole = splitter.getLine(ioff);
673-
int ioffpos = splitter.getPosition(ioff);
674-
int iendpos = ioffpos + iwhole.length();
675-
int i_s = pr.start + lpos < ioffpos ? ioffpos : pr.start + lpos;
676-
int i_e = pr.end + lpos > iendpos ? iendpos : pr.end + lpos;
677-
if (i_e - i_s > contentLength) {
678-
contentLength = i_e - i_s;
679-
resoff = ioff;
671+
for (int lIndex = mIndex; lIndex <= zIndex; ++lIndex) {
672+
String iwhole = splitter.getLine(lIndex);
673+
int lOff = splitter.getOffset(lIndex);
674+
int lOffZ = lOff + iwhole.length();
675+
int offStart = Math.max(pr.start + lineOff, lOff);
676+
int offEnd = Math.min(pr.end + lineOff, lOffZ);
677+
if (offEnd - offStart > contentLength) {
678+
contentLength = offEnd - offStart;
679+
resIndex = lIndex;
680680
whole = iwhole;
681681
// (The following are not yet adjusted for tabs.)
682-
s = i_s - ioffpos;
683-
e = i_e - ioffpos;
682+
s = offStart - lOff;
683+
e = offEnd - lOff;
684684
}
685685
}
686686

687687
if (s >= 0 && s < whole.length() && e >= 0 && e <= whole.length()) {
688688
s = ExpandTabsReader.translate(whole, s, t);
689689
e = ExpandTabsReader.translate(whole, e, t);
690-
// (N.b. use ctags's 1-offset.)
691-
return new CpatIndex(resoff + 1, s, e);
690+
// (N.b. use ctags's 1-based indexing.)
691+
return new CpatIndex(resIndex + 1, s, e);
692692
}
693693

694694
/**

opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/plain/DefinitionsTokenStream.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2018, Chris Fraire <[email protected]>.
21+
* Copyright (c) 2018, 2020, Chris Fraire <[email protected]>.
2222
*/
2323

2424
package org.opengrok.indexer.analysis.plain;
@@ -118,7 +118,7 @@ private void createTokens(Definitions defs, LineBreaker brk) {
118118

119119
if (lineno >= 0 && lineno < brk.count() && tag.symbol != null &&
120120
tag.text != null) {
121-
int lineoff = brk.getPosition(lineno);
121+
int lineoff = brk.getOffset(lineno);
122122
if (tag.lineStart >= 0) {
123123
PendingToken tok = new PendingToken(tag.symbol, lineoff +
124124
tag.lineStart, lineoff + tag.lineEnd);

opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/ContextFormatter.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2018, Chris Fraire <[email protected]>.
21+
* Copyright (c) 2018, 2020, Chris Fraire <[email protected]>.
2222
*/
2323

2424
package org.opengrok.indexer.search.context;
@@ -323,7 +323,7 @@ private void writeScope(int lineOffset, Appendable dest)
323323
throws IOException {
324324
Scopes.Scope scope = null;
325325
if (scopes != null) {
326-
// N.b. use ctags 1-offset vs 0-offset.
326+
// N.b. use ctags 1-based indexing vs 0-based.
327327
scope = scopes.getScope(lineOffset + 1);
328328
}
329329
if (scope != null && scope != scopes.getScope(-1)) {
@@ -340,7 +340,7 @@ private void writeScope(int lineOffset, Appendable dest)
340340
private void writeTag(int lineOffset, Appendable dest, List<String> marks)
341341
throws IOException {
342342
if (defs != null) {
343-
// N.b. use ctags 1-offset vs 0-offset.
343+
// N.b. use ctags 1-based indexing vs 0-based.
344344
List<Tag> linetags = defs.getTags(lineOffset + 1);
345345
if (linetags != null) {
346346
Tag pickedTag = findTagForMark(linetags, marks);

opengrok-indexer/src/main/java/org/opengrok/indexer/search/context/PassageConverter.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2018, Chris Fraire <[email protected]>.
21+
* Copyright (c) 2018, 2020, Chris Fraire <[email protected]>.
2222
*/
2323

2424
package org.opengrok.indexer.search.context;
@@ -75,11 +75,11 @@ public SortedMap<Integer, LineHighlight> convert(Passage[] passages,
7575
continue;
7676
}
7777

78-
int m = splitter.findLineOffset(start);
78+
int m = splitter.findLineIndex(start);
7979
if (m < 0) {
8080
continue;
8181
}
82-
int n = splitter.findLineOffset(end - 1);
82+
int n = splitter.findLineIndex(end - 1);
8383
if (n < 0) {
8484
continue;
8585
}
@@ -97,23 +97,23 @@ public SortedMap<Integer, LineHighlight> convert(Passage[] passages,
9797
// Create LineHighlight entries for passage matches.
9898
for (int i = 0; i < passage.getNumMatches(); ++i) {
9999
int mstart = passage.getMatchStarts()[i];
100-
int mm = splitter.findLineOffset(mstart);
100+
int mm = splitter.findLineIndex(mstart);
101101
int mend = passage.getMatchEnds()[i];
102-
int nn = splitter.findLineOffset(mend - 1);
102+
int nn = splitter.findLineIndex(mend - 1);
103103
if (mstart < mend && mm >= m && mm <= n && nn >= m && nn <= n) {
104104
if (mm == nn) {
105-
int lbeg = splitter.getPosition(mm);
105+
int lbeg = splitter.getOffset(mm);
106106
int lstart = mstart - lbeg;
107107
int lend = mend - lbeg;
108108
LineHighlight lhigh = res.get(mm);
109109
lhigh.addMarkup(PhraseHighlight.create(lstart, lend));
110110
} else {
111-
int lbeg = splitter.getPosition(mm);
111+
int lbeg = splitter.getOffset(mm);
112112
int loff = mstart - lbeg;
113113
LineHighlight lhigh = res.get(mm);
114114
lhigh.addMarkup(PhraseHighlight.createStarter(loff));
115115

116-
lbeg = splitter.getPosition(nn);
116+
lbeg = splitter.getOffset(nn);
117117
loff = mend - lbeg;
118118
lhigh = res.get(nn);
119119
lhigh.addMarkup(PhraseHighlight.createEnder(loff));

opengrok-indexer/src/main/java/org/opengrok/indexer/util/LineBreaker.java

Lines changed: 40 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,13 @@
1818
*/
1919

2020
/*
21-
* Copyright (c) 2018, Chris Fraire <[email protected]>.
21+
* Copyright (c) 2018, 2020, Chris Fraire <[email protected]>.
2222
*/
2323

2424
package org.opengrok.indexer.util;
2525

26-
import java.io.BufferedReader;
2726
import java.io.IOException;
28-
import java.io.InputStream;
2927
import java.io.Reader;
30-
import java.nio.charset.StandardCharsets;
3128
import java.util.ArrayList;
3229
import java.util.List;
3330
import org.opengrok.indexer.analysis.StreamSource;
@@ -40,6 +37,7 @@
4037
public class LineBreaker {
4138

4239
private int length;
40+
private int count;
4341
private int[] lineOffsets;
4442

4543
/**
@@ -65,29 +63,13 @@ public void reset(StreamSource src, ReaderWrapper wrapper)
6563
throw new IllegalArgumentException("`src' is null");
6664
}
6765

68-
length = 0;
69-
lineOffsets = null;
70-
71-
try (InputStream in = src.getStream();
72-
Reader rdr = IOUtils.createBOMStrippedReader(in,
73-
StandardCharsets.UTF_8.name())) {
74-
Reader intermediate = null;
75-
if (wrapper != null) {
76-
intermediate = wrapper.get(rdr);
77-
}
78-
79-
try (BufferedReader brdr = new BufferedReader(
80-
intermediate != null ? intermediate : rdr)) {
81-
reset(brdr);
82-
} finally {
83-
if (intermediate != null) {
84-
intermediate.close();
85-
}
86-
}
87-
}
66+
SplitterUtil.reset(this::reset, src, wrapper);
8867
}
8968

9069
private void reset(Reader reader) throws IOException {
70+
length = 0;
71+
lineOffsets = null;
72+
9173
List<Integer> newOffsets = new ArrayList<>();
9274
newOffsets.add(0);
9375

@@ -124,6 +106,12 @@ private void reset(Reader reader) throws IOException {
124106
}
125107
}
126108

109+
count = newOffsets.size();
110+
if (newOffsets.get(newOffsets.size() - 1) < length) {
111+
newOffsets.add(length);
112+
// Do not increment count.
113+
}
114+
127115
lineOffsets = new int[newOffsets.size()];
128116
for (int i = 0; i < lineOffsets.length; ++i) {
129117
lineOffsets[i] = newOffsets.get(i);
@@ -139,28 +127,44 @@ public int originalLength() {
139127
}
140128

141129
/**
142-
* Gets the number of broken lines.
143-
* @return value
130+
* Gets the number of split lines.
144131
*/
145132
public int count() {
146133
if (lineOffsets == null) {
147134
throw new IllegalStateException("reset() did not succeed");
148135
}
149-
return lineOffsets.length;
136+
return count;
150137
}
151138

152139
/**
153-
* Gets the starting document character position of the line at the
154-
* specified offset.
155-
* @param offset greater than or equal to zero and less than or equal to
140+
* Gets the starting document character offset of the line at the
141+
* specified index in the lines list.
142+
* @param index greater than or equal to zero and less than or equal to
156143
* {@link #count()}
157-
* @return line length, including the end-of-line token
158-
* @throws IllegalArgumentException if {@code offset} is out of bounds
144+
* @return line starting offset
145+
* @throws IllegalArgumentException if {@code index} is out of bounds
159146
*/
160-
public int getPosition(int offset) {
161-
if (offset < 0 || lineOffsets == null || offset >= lineOffsets.length) {
162-
throw new IllegalArgumentException("`offset' is out of bounds");
147+
public int getOffset(int index) {
148+
if (lineOffsets == null) {
149+
throw new IllegalStateException("reset() did not succeed");
150+
}
151+
if (index < 0 || index >= lineOffsets.length) {
152+
throw new IllegalArgumentException("index is out of bounds");
153+
}
154+
return lineOffsets[index];
155+
}
156+
157+
/**
158+
* Find the line index for the specified document offset.
159+
* @param offset greater than or equal to zero and less than
160+
* {@link #originalLength()}.
161+
* @return -1 if {@code offset} is beyond the document bounds; otherwise,
162+
* a valid index
163+
*/
164+
public int findLineIndex(int offset) {
165+
if (lineOffsets == null) {
166+
throw new IllegalStateException("reset() did not succeed");
163167
}
164-
return lineOffsets[offset];
168+
return SplitterUtil.findLineIndex(length, lineOffsets, offset);
165169
}
166170
}

0 commit comments

Comments
 (0)