40
40
* Modified Apache Lucene's SloppyPhraseScorer (now {@link org.apache.lucene.search.SloppyPhraseMatcher}) to remember
41
41
* the positions of the matches.
42
42
*/
43
- final class CustomSloppyPhraseScorer extends Scorer implements PhraseScorer {
43
+ final class CustomSloppyPhraseScorer extends Scorer implements PhraseScorer { // custom – specific interface
44
44
45
45
private final DocIdSetIterator conjunction ;
46
46
private final PhrasePositions [] phrasePositions ;
@@ -59,14 +59,29 @@ final class CustomSloppyPhraseScorer extends Scorer implements PhraseScorer {
59
59
private PhrasePositions [][] rptGroups ; // in each group are PPs that repeats each other (i.e. same term), sorted by (query) offset
60
60
private PhrasePositions [] rptStack ; // temporary stack for switching colliding repeating pps
61
61
62
+ // custom begins
62
63
private int offset ;
63
64
64
- private Map <Integer , IntsHolder > map = new HashMap <>();
65
+ private Map <Integer , IntsHolder > documentsToPositionsMap = new HashMap <>();
66
+ // custom ends
65
67
66
- CustomSloppyPhraseScorer (Weight weight , CustomPhraseQuery .PostingsAndFreq [] postings , int slop , int offset ) {
68
+ // custom – constructor parameters
69
+ /**
70
+ * Creates custom sloppy phrase scorer which remembers the positions of the found matches.
71
+ * @param weight query weight
72
+ * @param postings postings of the terms
73
+ * @param slop "word edit distance"
74
+ * @param offset the offset that is added to the found match position
75
+ */
76
+ CustomSloppyPhraseScorer (
77
+ final Weight weight ,
78
+ final CustomPhraseQuery .PostingsAndFreq [] postings ,
79
+ final int slop ,
80
+ final int offset
81
+ ) {
67
82
super (weight );
68
83
this .slop = slop ;
69
- this .offset = offset ;
84
+ this .offset = offset ; // custom
70
85
this .numPostings = postings ==null ? 0 : postings .length ;
71
86
pq = new PhraseQueue (postings .length );
72
87
DocIdSetIterator [] iterators = new DocIdSetIterator [postings .length ];
@@ -75,11 +90,13 @@ final class CustomSloppyPhraseScorer extends Scorer implements PhraseScorer {
75
90
iterators [i ] = postings [i ].postings ;
76
91
phrasePositions [i ] = new PhrasePositions (postings [i ].postings , postings [i ].position , i , postings [i ].terms );
77
92
}
93
+ // custom begins – support for single term
78
94
if (iterators .length == 1 ) {
79
95
conjunction = iterators [0 ];
80
96
} else {
81
97
conjunction = ConjunctionDISI .intersectIterators (Arrays .asList (iterators ));
82
98
}
99
+ // custom ends
83
100
assert TwoPhaseIterator .unwrap (conjunction ) == null ;
84
101
}
85
102
@@ -102,6 +119,7 @@ final class CustomSloppyPhraseScorer extends Scorer implements PhraseScorer {
102
119
* We may want to fix this in the future (currently not, for performance reasons).
103
120
*/
104
121
private float phraseFreq () throws IOException {
122
+ // custom begins
105
123
BitIntsHolder allPositions = new BitIntsHolder ();
106
124
107
125
BitIntsHolder positions = new BitIntsHolder ();
@@ -120,25 +138,28 @@ private float phraseFreq() throws IOException {
120
138
matchCount ++;
121
139
}
122
140
if (!positions .isEmpty ()) {
123
- map .put (docID (), positions );
141
+ documentsToPositionsMap .put (docID (), positions );
124
142
}
125
143
return matchCount ;
126
144
}
145
+ // custom ends
127
146
128
147
if (!initPhrasePositions ()) {
129
148
return 0.0f ;
130
149
}
131
150
151
+ // custom begins
132
152
for (PhrasePositions phrasePositions : this .pq ) {
133
153
allPositions .set (phrasePositions .position + phrasePositions .offset );
134
154
}
155
+ // custom ends
135
156
136
157
int numMatches = 0 ;
137
158
PhrasePositions pp = pq .pop ();
138
159
int matchLength = end - pp .position ;
139
160
int next = pq .top ().position ;
140
161
141
- int lastEnd = this .end ;
162
+ int lastEnd = this .end ; // custom – remember last matched position
142
163
143
164
while (advancePP (pp )) {
144
165
@@ -151,35 +172,51 @@ private float phraseFreq() throws IOException {
151
172
if (pp .position > next ) { // done minimizing current match-length
152
173
if (matchLength <= slop ) {
153
174
numMatches ++;
175
+ // custom – match found, remember positions
154
176
addPositions (positions , allPositions , lastEnd , matchLength );
155
177
}
156
178
pq .add (pp );
157
179
pp = pq .pop ();
158
180
next = pq .top ().position ;
159
181
matchLength = end - pp .position ;
160
182
161
- lastEnd = this .end ;
183
+ lastEnd = this .end ; // custom – remember position of last match
162
184
163
185
} else {
164
186
int matchLength2 = end - pp .position ;
165
187
if (matchLength2 < matchLength ) {
166
188
matchLength = matchLength2 ;
167
189
}
168
190
169
- lastEnd = this .end ;
191
+ lastEnd = this .end ; // custom – remember position of last match
170
192
}
171
193
}
172
194
if (matchLength <= slop ) {
173
195
numMatches ++;
174
- addPositions (positions , allPositions , lastEnd , matchLength );
196
+ addPositions (positions , allPositions , lastEnd , matchLength ); // custom – match found, remember positions
175
197
}
198
+ // custom begins – if some positions were found then store them
176
199
if (!positions .isEmpty ()) {
177
- map .put (docID (), positions );
200
+ documentsToPositionsMap .put (docID (), positions );
178
201
}
202
+ // custom ends
179
203
return numMatches ;
180
204
}
181
205
182
- private void addPositions (BitIntsHolder positions , IntsHolder allPositions , int lastEnd , int matchLength ) {
206
+ // custom begins
207
+ /**
208
+ * Stores all the possible positions.
209
+ * @param positions where to store the positions
210
+ * @param allPositions positions already taken by the terms
211
+ * @param lastEnd match position
212
+ * @param matchLength how many words from "edit distance" was already taken to find this match
213
+ */
214
+ private void addPositions (
215
+ final BitIntsHolder positions ,
216
+ final IntsHolder allPositions ,
217
+ final int lastEnd ,
218
+ final int matchLength
219
+ ) {
183
220
int expectedPos = lastEnd + offset ;
184
221
185
222
int range = this .slop - matchLength ;
@@ -190,6 +227,7 @@ private void addPositions(BitIntsHolder positions, IntsHolder allPositions, int
190
227
}
191
228
}
192
229
}
230
+ // custom ends
193
231
194
232
/** advance a PhrasePosition and update 'end', return false if exhausted */
195
233
private boolean advancePP (PhrasePositions pp ) throws IOException {
@@ -439,7 +477,7 @@ private ArrayList<ArrayList<PhrasePositions>> gatherRptGroups(LinkedHashMap<Term
439
477
// simpler - no multi-terms - can base on positions in first doc
440
478
for (int i =0 ; i <rpp .length ; i ++) {
441
479
PhrasePositions pp = rpp [i ];
442
- if (pp .rptGroup >=0 ) {
480
+ if (pp .rptGroup >=0 ) { // custom – add braces because of checkstyle
443
481
continue ; // already marked as a repetition
444
482
}
445
483
int tpPos = tpPos (pp );
@@ -582,11 +620,13 @@ public int docID() {
582
620
583
621
@ Override
584
622
public float score () {
585
- return 1 ;
623
+ return 1 ; // custom – default value
586
624
}
587
625
588
626
@ Override
589
- public String toString () { return "CustomSloppyPhraseScorer(" + weight + ")" ; }
627
+ public String toString () {
628
+ return "CustomSloppyPhraseScorer(" + weight + ")" ; // custom – renamed class
629
+ }
590
630
591
631
@ Override
592
632
public TwoPhaseIterator twoPhaseIterator () {
@@ -599,7 +639,7 @@ public boolean matches() throws IOException {
599
639
600
640
@ Override
601
641
public float matchCost () {
602
- return 0 ;
642
+ return 0 ; // custom – default value
603
643
}
604
644
605
645
@ Override
@@ -614,8 +654,11 @@ public DocIdSetIterator iterator() {
614
654
return TwoPhaseIterator .asDocIdSetIterator (twoPhaseIterator ());
615
655
}
616
656
657
+ // custom begins – special interface implementation
658
+ /** {@inheritDoc} */
617
659
@ Override
618
660
public IntsHolder getPositions (int docId ) {
619
- return map .get (docId );
661
+ return documentsToPositionsMap .get (docId );
620
662
}
663
+ // custom ends
621
664
}
0 commit comments