Skip to content

Commit 7018a40

Browse files
ahornaceVladimir Kotal
authored andcommitted
Highlight changed parts in Lucene classes
1 parent 03bb0e4 commit 7018a40

File tree

2 files changed

+97
-29
lines changed

2 files changed

+97
-29
lines changed

suggester/src/main/java/org/opengrok/suggest/query/customized/CustomExactPhraseScorer.java

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
* Modified Apache Lucene's ExactPhraseScorer (now {@link org.apache.lucene.search.ExactPhraseMatcher}) to support
3737
* remembering the positions where the match was found.
3838
*/
39-
final class CustomExactPhraseScorer extends Scorer implements PhraseScorer {
39+
final class CustomExactPhraseScorer extends Scorer implements PhraseScorer { // custom – special interface
4040

4141
private static class PostingsAndPosition {
4242
private final PostingsEnum postings;
@@ -49,29 +49,44 @@ public PostingsAndPosition(PostingsEnum postings, int offset) {
4949
}
5050
}
5151

52-
private Map<Integer, IntsHolder> map = new HashMap<>();
52+
// custom begins – only necessary attributes
53+
private Map<Integer, IntsHolder> documentToPositionsMap = new HashMap<>();
5354

5455
private int offset;
5556

5657
private final DocIdSetIterator conjunction;
5758
private final PostingsAndPosition[] postings;
58-
59-
public CustomExactPhraseScorer(Weight weight, CustomPhraseQuery.PostingsAndFreq[] postings, int offset) {
59+
// custom ends
60+
61+
// custom – constructor parameters
62+
/**
63+
* Creates custom exact phrase scorer which remembers the positions of the found matches.
64+
* @param weight query weight
65+
* @param postings postings of the terms
66+
* @param offset the offset that is added to the found match position
67+
*/
68+
public CustomExactPhraseScorer(
69+
final Weight weight,
70+
final CustomPhraseQuery.PostingsAndFreq[] postings,
71+
final int offset
72+
) {
6073
super(weight);
6174

62-
this.offset = offset;
75+
this.offset = offset; // custom
6376

6477
List<DocIdSetIterator> iterators = new ArrayList<>();
6578
List<PostingsAndPosition> postingsAndPositions = new ArrayList<>();
6679
for (CustomPhraseQuery.PostingsAndFreq posting : postings) {
6780
iterators.add(posting.postings);
6881
postingsAndPositions.add(new PostingsAndPosition(posting.postings, posting.position));
6982
}
83+
// custom begins – support for single term
7084
if (iterators.size() == 1) {
7185
conjunction = iterators.get(0);
7286
} else {
7387
conjunction = ConjunctionDISI.intersectIterators(iterators);
7488
}
89+
// custom ends
7590
assert TwoPhaseIterator.unwrap(conjunction) == null;
7691
this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]);
7792
}
@@ -81,12 +96,12 @@ public TwoPhaseIterator twoPhaseIterator() {
8196
return new TwoPhaseIterator(conjunction) {
8297
@Override
8398
public boolean matches() throws IOException {
84-
return phraseFreq() > 0;
99+
return phraseFreq() > 0; // custom – only necessary part left
85100
}
86101

87102
@Override
88103
public float matchCost() {
89-
return 0;
104+
return 0; // custom – default value
90105
}
91106
};
92107
}
@@ -98,7 +113,7 @@ public DocIdSetIterator iterator() {
98113

99114
@Override
100115
public String toString() {
101-
return "CustomExactPhraseScorer(" + weight + ")";
116+
return "CustomExactPhraseScorer(" + weight + ")"; // custom – renamed class
102117
}
103118

104119
@Override
@@ -108,7 +123,7 @@ public int docID() {
108123

109124
@Override
110125
public float score() {
111-
return 1;
126+
return 1; // custom – default value
112127
}
113128

114129
/** Advance the given pos enum to the first doc on or after {@code target}.
@@ -138,7 +153,7 @@ private int phraseFreq() throws IOException {
138153
int freq = 0;
139154
final PostingsAndPosition lead = postings[0];
140155

141-
BitIntsHolder positions = new BitIntsHolder();
156+
BitIntsHolder positions = null; // custom – store positions
142157

143158
advanceHead:
144159
while (true) {
@@ -162,7 +177,12 @@ private int phraseFreq() throws IOException {
162177
}
163178

164179
freq += 1;
180+
// custom begins – found a match
181+
if (positions == null) {
182+
positions = new BitIntsHolder();
183+
}
165184
positions.set(phrasePos + offset);
185+
// custom ends
166186

167187
if (lead.upTo == lead.freq) {
168188
break;
@@ -171,16 +191,21 @@ private int phraseFreq() throws IOException {
171191
lead.upTo += 1;
172192
}
173193

174-
if (!positions.isEmpty()) {
175-
map.put(docID(), positions);
194+
// custom begin – if some positions were found then store them
195+
if (positions != null) {
196+
documentToPositionsMap.put(docID(), positions);
176197
}
198+
// custom ends
177199

178200
return freq;
179201
}
180202

203+
// custom begins – special interface implementation
204+
/** {@inheritDoc} */
181205
@Override
182206
public IntsHolder getPositions(final int docId) {
183-
return map.get(docId);
207+
return documentToPositionsMap.get(docId);
184208
}
209+
// custom ends
185210

186211
}

suggester/src/main/java/org/opengrok/suggest/query/customized/CustomSloppyPhraseScorer.java

Lines changed: 59 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
* Modified Apache Lucene's SloppyPhraseScorer (now {@link org.apache.lucene.search.SloppyPhraseMatcher}) to remember
4141
* the positions of the matches.
4242
*/
43-
final class CustomSloppyPhraseScorer extends Scorer implements PhraseScorer {
43+
final class CustomSloppyPhraseScorer extends Scorer implements PhraseScorer { // custom – specific interface
4444

4545
private final DocIdSetIterator conjunction;
4646
private final PhrasePositions[] phrasePositions;
@@ -59,14 +59,29 @@ final class CustomSloppyPhraseScorer extends Scorer implements PhraseScorer {
5959
private PhrasePositions[][] rptGroups; // in each group are PPs that repeats each other (i.e. same term), sorted by (query) offset
6060
private PhrasePositions[] rptStack; // temporary stack for switching colliding repeating pps
6161

62+
// custom begins
6263
private int offset;
6364

64-
private Map<Integer, IntsHolder> map = new HashMap<>();
65+
private Map<Integer, IntsHolder> documentsToPositionsMap = new HashMap<>();
66+
// custom ends
6567

66-
CustomSloppyPhraseScorer(Weight weight, CustomPhraseQuery.PostingsAndFreq[] postings, int slop, int offset) {
68+
// custom – constructor parameters
69+
/**
70+
* Creates custom sloppy phrase scorer which remembers the positions of the found matches.
71+
* @param weight query weight
72+
* @param postings postings of the terms
73+
* @param slop "word edit distance"
74+
* @param offset the offset that is added to the found match position
75+
*/
76+
CustomSloppyPhraseScorer(
77+
final Weight weight,
78+
final CustomPhraseQuery.PostingsAndFreq[] postings,
79+
final int slop,
80+
final int offset
81+
) {
6782
super(weight);
6883
this.slop = slop;
69-
this.offset = offset;
84+
this.offset = offset; // custom
7085
this.numPostings = postings==null ? 0 : postings.length;
7186
pq = new PhraseQueue(postings.length);
7287
DocIdSetIterator[] iterators = new DocIdSetIterator[postings.length];
@@ -75,11 +90,13 @@ final class CustomSloppyPhraseScorer extends Scorer implements PhraseScorer {
7590
iterators[i] = postings[i].postings;
7691
phrasePositions[i] = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
7792
}
93+
// custom begins – support for single term
7894
if (iterators.length == 1) {
7995
conjunction = iterators[0];
8096
} else {
8197
conjunction = ConjunctionDISI.intersectIterators(Arrays.asList(iterators));
8298
}
99+
// custom ends
83100
assert TwoPhaseIterator.unwrap(conjunction) == null;
84101
}
85102

@@ -102,6 +119,7 @@ final class CustomSloppyPhraseScorer extends Scorer implements PhraseScorer {
102119
* We may want to fix this in the future (currently not, for performance reasons).
103120
*/
104121
private float phraseFreq() throws IOException {
122+
// custom begins
105123
BitIntsHolder allPositions = new BitIntsHolder();
106124

107125
BitIntsHolder positions = new BitIntsHolder();
@@ -120,25 +138,28 @@ private float phraseFreq() throws IOException {
120138
matchCount++;
121139
}
122140
if (!positions.isEmpty()) {
123-
map.put(docID(), positions);
141+
documentsToPositionsMap.put(docID(), positions);
124142
}
125143
return matchCount;
126144
}
145+
// custom ends
127146

128147
if (!initPhrasePositions()) {
129148
return 0.0f;
130149
}
131150

151+
// custom begins
132152
for (PhrasePositions phrasePositions : this.pq) {
133153
allPositions.set(phrasePositions.position + phrasePositions.offset);
134154
}
155+
// custom ends
135156

136157
int numMatches = 0;
137158
PhrasePositions pp = pq.pop();
138159
int matchLength = end - pp.position;
139160
int next = pq.top().position;
140161

141-
int lastEnd = this.end;
162+
int lastEnd = this.end; // custom – remember last matched position
142163

143164
while (advancePP(pp)) {
144165

@@ -151,35 +172,51 @@ private float phraseFreq() throws IOException {
151172
if (pp.position > next) { // done minimizing current match-length
152173
if (matchLength <= slop) {
153174
numMatches++;
175+
// custom – match found, remember positions
154176
addPositions(positions, allPositions, lastEnd, matchLength);
155177
}
156178
pq.add(pp);
157179
pp = pq.pop();
158180
next = pq.top().position;
159181
matchLength = end - pp.position;
160182

161-
lastEnd = this.end;
183+
lastEnd = this.end; // custom – remember position of last match
162184

163185
} else {
164186
int matchLength2 = end - pp.position;
165187
if (matchLength2 < matchLength) {
166188
matchLength = matchLength2;
167189
}
168190

169-
lastEnd = this.end;
191+
lastEnd = this.end; // custom – remember position of last match
170192
}
171193
}
172194
if (matchLength <= slop) {
173195
numMatches++;
174-
addPositions(positions, allPositions, lastEnd, matchLength);
196+
addPositions(positions, allPositions, lastEnd, matchLength); // custom – match found, remember positions
175197
}
198+
// custom begins – if some positions were found then store them
176199
if (!positions.isEmpty()) {
177-
map.put(docID(), positions);
200+
documentsToPositionsMap.put(docID(), positions);
178201
}
202+
// custom ends
179203
return numMatches;
180204
}
181205

182-
private void addPositions(BitIntsHolder positions, IntsHolder allPositions, int lastEnd, int matchLength) {
206+
// custom begins
207+
/**
208+
* Stores all the possible positions.
209+
* @param positions where to store the positions
210+
* @param allPositions positions already taken by the terms
211+
* @param lastEnd match position
212+
* @param matchLength how many words from "edit distance" was already taken to find this match
213+
*/
214+
private void addPositions(
215+
final BitIntsHolder positions,
216+
final IntsHolder allPositions,
217+
final int lastEnd,
218+
final int matchLength
219+
) {
183220
int expectedPos = lastEnd + offset;
184221

185222
int range = this.slop - matchLength;
@@ -190,6 +227,7 @@ private void addPositions(BitIntsHolder positions, IntsHolder allPositions, int
190227
}
191228
}
192229
}
230+
// custom ends
193231

194232
/** advance a PhrasePosition and update 'end', return false if exhausted */
195233
private boolean advancePP(PhrasePositions pp) throws IOException {
@@ -439,7 +477,7 @@ private ArrayList<ArrayList<PhrasePositions>> gatherRptGroups(LinkedHashMap<Term
439477
// simpler - no multi-terms - can base on positions in first doc
440478
for (int i=0; i<rpp.length; i++) {
441479
PhrasePositions pp = rpp[i];
442-
if (pp.rptGroup >=0) {
480+
if (pp.rptGroup >=0) { // custom – add braces because of checkstyle
443481
continue; // already marked as a repetition
444482
}
445483
int tpPos = tpPos(pp);
@@ -582,11 +620,13 @@ public int docID() {
582620

583621
@Override
584622
public float score() {
585-
return 1;
623+
return 1; // custom – default value
586624
}
587625

588626
@Override
589-
public String toString() { return "CustomSloppyPhraseScorer(" + weight + ")"; }
627+
public String toString() {
628+
return "CustomSloppyPhraseScorer(" + weight + ")"; // custom – renamed class
629+
}
590630

591631
@Override
592632
public TwoPhaseIterator twoPhaseIterator() {
@@ -599,7 +639,7 @@ public boolean matches() throws IOException {
599639

600640
@Override
601641
public float matchCost() {
602-
return 0;
642+
return 0; // custom – default value
603643
}
604644

605645
@Override
@@ -614,8 +654,11 @@ public DocIdSetIterator iterator() {
614654
return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator());
615655
}
616656

657+
// custom begins – special interface implementation
658+
/** {@inheritDoc} */
617659
@Override
618660
public IntsHolder getPositions(int docId) {
619-
return map.get(docId);
661+
return documentsToPositionsMap.get(docId);
620662
}
663+
// custom ends
621664
}

0 commit comments

Comments
 (0)