Skip to content

Commit f96b2fc

Browse files
ahornaceVladimir Kotal
authored andcommitted
Properly normalize scores – improves multiple-project search
1 parent ec6b1ff commit f96b2fc

File tree

6 files changed

+31
-5
lines changed

6 files changed

+31
-5
lines changed

suggester/src/main/java/org/opengrok/suggest/Suggester.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ public final class Suggester implements Closeable {
9696
* @param projectsEnabled specifies if the OpenGrok projects are enabled
9797
* @param allowedFields fields for which should the suggester be enabled,
9898
* if {@code null} then enabled for all fields
99+
* @param timeThreshold time in milliseconds after which the suggestions requests should time out
99100
*/
100101
public Suggester(
101102
final File suggesterDir,
@@ -585,7 +586,7 @@ public boolean isPartialResult() {
585586
}
586587

587588
/**
588-
* Model classes for holding project name and path to ist index directory.
589+
* Model classes for holding project name and path to its index directory.
589590
*/
590591
public static class NamedIndexDir {
591592

suggester/src/main/java/org/opengrok/suggest/SuggesterSearcher.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,15 @@ class SuggesterSearcher extends IndexSearcher {
6868

6969
private boolean interrupted;
7070

71+
private final int numDocs;
72+
7173
/**
7274
* @param reader reader of the index for which to provide suggestions
7375
* @param resultSize size of the results
7476
*/
7577
SuggesterSearcher(final IndexReader reader, final int resultSize) {
7678
super(reader);
79+
numDocs = reader.numDocs();
7780
this.resultSize = resultSize;
7881
}
7982

@@ -180,7 +183,7 @@ private List<LookupResultItem> suggest(
180183

181184
int score;
182185
if (!needsDocumentIds) {
183-
score = termsEnum.docFreq();
186+
score = normalizeDocumentFrequency(termsEnum.docFreq(), numDocs);
184187
} else if (needPositionsAndFrequencies) {
185188
score = getPhraseScore(complexQueryData, leafReaderContext.docBase, postingsEnum);
186189
} else {
@@ -314,7 +317,7 @@ private int getDocumentFrequency(final IntsHolder documentIds, final int docBase
314317
weight++;
315318
}
316319
}
317-
return weight;
320+
return normalizeDocumentFrequency(weight, documentIds.numberOfElements());
318321
}
319322

320323
private boolean needPositionsAndFrequencies(final Query query) {
@@ -333,6 +336,10 @@ private boolean needPositionsAndFrequencies(final Query query) {
333336
return false;
334337
}
335338

339+
private static int normalizeDocumentFrequency(final int count, final int documents) {
340+
return (int) (((double) count / documents) * SuggesterUtils.NORMALIZED_DOCUMENT_FREQUENCY_MULTIPLIER);
341+
}
342+
336343
private static class ComplexQueryData {
337344

338345
private IntsHolder documentIds;

suggester/src/main/java/org/opengrok/suggest/SuggesterUtils.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,12 @@
4848
*/
4949
public class SuggesterUtils {
5050

51+
public static final int NORMALIZED_DOCUMENT_FREQUENCY_MULTIPLIER = 1000;
52+
5153
private static final Logger logger = Logger.getLogger(SuggesterUtils.class.getName());
5254

5355
private static final long DEFAULT_TERM_WEIGHT = 0;
5456

55-
private static final int NORMALIZED_DOCUMENT_FREQUENCY_MULTIPLIER = 1000;
56-
5757
private SuggesterUtils() {
5858
}
5959

suggester/src/main/java/org/opengrok/suggest/query/data/BitIntsHolder.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,10 @@ public boolean has(final int i) {
4242
return get(i);
4343
}
4444

45+
/** {@inheritDoc} */
46+
@Override
47+
public int numberOfElements() {
48+
return cardinality();
49+
}
50+
4551
}

suggester/src/main/java/org/opengrok/suggest/query/data/HashIntsHolder.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,10 @@ public boolean has(final int i) {
3535
return contains(i);
3636
}
3737

38+
/** {@inheritDoc} */
39+
@Override
40+
public int numberOfElements() {
41+
return size();
42+
}
43+
3844
}

suggester/src/main/java/org/opengrok/suggest/query/data/IntsHolder.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,10 @@ public interface IntsHolder {
3434
*/
3535
boolean has(int i);
3636

37+
/**
38+
* Returns number of elements.
39+
* @return number of elements.
40+
*/
41+
int numberOfElements();
42+
3743
}

0 commit comments

Comments
 (0)