-
-
Notifications
You must be signed in to change notification settings - Fork 193
BE: Issue#1332 Sort based on prefix offsets #1421
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
germanosin
wants to merge
7
commits into
main
Choose a base branch
from
issues/1332-be
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 1 commit
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
4a0d729
BE: Issue#1332 Sort based on prefix offsets
germanosin 78794cc
Apply suggestions from code review
germanosin 69cef8d
Ngram filters
germanosin e332e74
Ngram sortings
germanosin 8df5bfd
BE: #1332 fixed schema name sorting
germanosin 30c544a
Merge branch 'main' into issues/1332-be
germanosin 197d34c
Merge branch 'main' into issues/1332-be
germanosin File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
90 changes: 90 additions & 0 deletions
90
api/src/main/java/io/kafbat/ui/service/index/lucene/IndexedTextField.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,90 @@ | ||
| package io.kafbat.ui.service.index.lucene; | ||
|
|
||
|
|
||
| import java.io.Reader; | ||
| import org.apache.lucene.analysis.TokenStream; | ||
| import org.apache.lucene.document.Field; | ||
| import org.apache.lucene.document.FieldType; | ||
| import org.apache.lucene.document.StoredValue; | ||
| import org.apache.lucene.index.IndexOptions; | ||
|
|
||
| public class IndexedTextField extends Field { | ||
|
|
||
| /** Indexed, tokenized, not stored. */ | ||
| public static final FieldType TYPE_NOT_STORED = new FieldType(); | ||
|
|
||
| /** Indexed, tokenized, stored. */ | ||
| public static final FieldType TYPE_STORED = new FieldType(); | ||
|
|
||
| static { | ||
| TYPE_NOT_STORED.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); | ||
| TYPE_NOT_STORED.setTokenized(true); | ||
| TYPE_NOT_STORED.freeze(); | ||
|
|
||
| TYPE_STORED.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); | ||
| TYPE_STORED.setTokenized(true); | ||
| TYPE_STORED.setStored(true); | ||
| TYPE_STORED.setStoreTermVectors(true); | ||
| TYPE_STORED.setStoreTermVectorOffsets(true); | ||
| TYPE_STORED.setStoreTermVectorPositions(true); | ||
| TYPE_STORED.freeze(); | ||
| } | ||
|
|
||
| private final StoredValue storedValue; | ||
|
|
||
| /** | ||
| * Creates a new un-stored TextField with Reader value. | ||
| * | ||
| * @param name field name | ||
| * @param reader reader value | ||
| * @throws IllegalArgumentException if the field name is null | ||
| * @throws NullPointerException if the reader is null | ||
| */ | ||
| public IndexedTextField(String name, Reader reader) { | ||
| super(name, reader, TYPE_NOT_STORED); | ||
| storedValue = null; | ||
| } | ||
|
|
||
| /** | ||
| * Creates a new TextField with String value. | ||
| * | ||
| * @param name field name | ||
| * @param value string value | ||
| * @param store Store.YES if the content should also be stored | ||
| * @throws IllegalArgumentException if the field name or value is null. | ||
| */ | ||
| public IndexedTextField(String name, String value, Store store) { | ||
| super(name, value, store == Store.YES ? TYPE_STORED : TYPE_NOT_STORED); | ||
| if (store == Store.YES) { | ||
| storedValue = new StoredValue(value); | ||
| } else { | ||
| storedValue = null; | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Creates a new un-stored TextField with TokenStream value. | ||
| * | ||
| * @param name field name | ||
| * @param stream TokenStream value | ||
| * @throws IllegalArgumentException if the field name is null. | ||
| * @throws NullPointerException if the tokenStream is null | ||
| */ | ||
| public IndexedTextField(String name, TokenStream stream) { | ||
| super(name, stream, TYPE_NOT_STORED); | ||
| storedValue = null; | ||
| } | ||
|
|
||
| @Override | ||
| public void setStringValue(String value) { | ||
| super.setStringValue(value); | ||
| if (storedValue != null) { | ||
| storedValue.setStringValue(value); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public StoredValue storedValue() { | ||
| return storedValue; | ||
| } | ||
| } |
108 changes: 108 additions & 0 deletions
108
api/src/main/java/io/kafbat/ui/service/index/lucene/NameDistanceScoringFunction.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,108 @@ | ||
| package io.kafbat.ui.service.index.lucene; | ||
|
|
||
| import java.io.IOException; | ||
| import java.util.HashMap; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import org.apache.lucene.index.LeafReaderContext; | ||
| import org.apache.lucene.index.PostingsEnum; | ||
| import org.apache.lucene.index.Terms; | ||
| import org.apache.lucene.index.TermsEnum; | ||
| import org.apache.lucene.search.DocIdSetIterator; | ||
| import org.apache.lucene.search.DoubleValues; | ||
| import org.apache.lucene.search.DoubleValuesSource; | ||
| import org.apache.lucene.search.IndexSearcher; | ||
| import org.apache.lucene.util.BytesRef; | ||
|
|
||
| public class NameDistanceScoringFunction extends DoubleValuesSource { | ||
| private final String fieldName; | ||
| private final List<String> prefixes; | ||
|
|
||
| public NameDistanceScoringFunction(String fieldName, List<String> prefixes) { | ||
| this.fieldName = fieldName; | ||
| this.prefixes = prefixes; | ||
| } | ||
|
|
||
| @Override | ||
| public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { | ||
|
|
||
| Terms terms = ctx.reader().terms(fieldName); | ||
| Map<Integer, Integer> positions = new HashMap<>(); | ||
|
|
||
| for (String prefix : prefixes) { | ||
| TermsEnum iterator = terms.iterator(); | ||
| TermsEnum.SeekStatus seekStatus = iterator.seekCeil(new BytesRef(prefix)); | ||
| if (!seekStatus.equals(TermsEnum.SeekStatus.END)) { | ||
|
|
||
| PostingsEnum postings = iterator.postings( | ||
| null, | ||
| PostingsEnum.OFFSETS | PostingsEnum.FREQS | PostingsEnum.POSITIONS | ||
| ); | ||
|
|
||
| while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { | ||
| int doc = postings.docID(); | ||
| int smallest = Integer.MAX_VALUE; | ||
|
|
||
| for (int i = 0; i < postings.freq(); i++) { | ||
| postings.nextPosition(); | ||
| smallest = Math.min(smallest, postings.startOffset()); | ||
| } | ||
| int finalSmall = smallest; | ||
| int s = positions.computeIfAbsent(doc, d -> finalSmall); | ||
| if (finalSmall < s) { | ||
| positions.put(doc, finalSmall); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return new DoubleValues() { | ||
| int doc = -1; | ||
|
|
||
| @Override | ||
| public double doubleValue() { | ||
| Integer pos = positions.get(doc); | ||
| if (pos == null) { | ||
| return 1.0; | ||
| } | ||
| return 1.0 / (1.0 + pos); | ||
| } | ||
|
|
||
| @Override | ||
| public boolean advanceExact(int target) { | ||
| doc = target; | ||
| return true; | ||
| } | ||
| }; | ||
| } | ||
|
|
||
| @Override | ||
| public boolean needsScores() { | ||
| return false; | ||
| } | ||
|
|
||
| @Override | ||
| public DoubleValuesSource rewrite(IndexSearcher searcher) { | ||
| return this; | ||
| } | ||
|
|
||
| @Override | ||
| public int hashCode() { | ||
| return 0; | ||
| } | ||
|
|
||
| @Override | ||
| public boolean equals(Object obj) { | ||
| return false; | ||
germanosin marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
germanosin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| @Override | ||
| public String toString() { | ||
| return "NameDistanceScoringFunction"; | ||
| } | ||
|
|
||
| @Override | ||
| public boolean isCacheable(LeafReaderContext ctx) { | ||
| return false; | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.