Skip to content

Commit 6da3269

Browse files
committed
Ngram config and toics optimizations
1 parent bb49399 commit 6da3269

File tree

5 files changed

+99
-22
lines changed

5 files changed

+99
-22
lines changed

api/src/main/java/io/kafbat/ui/config/ClustersProperties.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ public static class CacheProperties {
223223
@AllArgsConstructor
224224
public static class FtsProperties {
225225
boolean enabled = false;
226+
boolean topicsNgramEnabled = false;
226227
int topicsMinNGram = 3;
227228
int topicsMaxNGram = 5;
228229
int filterMinNGram = 1;
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
package io.kafbat.ui.service.index;
2+
3+
import org.apache.lucene.analysis.Analyzer;
4+
import org.apache.lucene.analysis.LowerCaseFilter;
5+
import org.apache.lucene.analysis.TokenStream;
6+
import org.apache.lucene.analysis.Tokenizer;
7+
import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
8+
import org.apache.lucene.analysis.standard.StandardTokenizer;
9+
10+
public class ShortWordAnalyzer extends Analyzer {
11+
12+
public ShortWordAnalyzer() {}
13+
14+
@Override
15+
protected TokenStreamComponents createComponents(String fieldName) {
16+
Tokenizer tokenizer = new StandardTokenizer();
17+
18+
TokenStream tokenStream = new WordDelimiterGraphFilter(
19+
tokenizer,
20+
WordDelimiterGraphFilter.GENERATE_WORD_PARTS
21+
| WordDelimiterGraphFilter.SPLIT_ON_CASE_CHANGE
22+
| WordDelimiterGraphFilter.PRESERVE_ORIGINAL
23+
| WordDelimiterGraphFilter.GENERATE_NUMBER_PARTS
24+
| WordDelimiterGraphFilter.STEM_ENGLISH_POSSESSIVE,
25+
26+
null
27+
);
28+
29+
tokenStream = new LowerCaseFilter(tokenStream);
30+
31+
return new TokenStreamComponents(tokenizer, tokenStream);
32+
}
33+
}

api/src/main/java/io/kafbat/ui/service/index/TopicsIndex.java

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import java.util.concurrent.locks.ReadWriteLock;
1010
import java.util.concurrent.locks.ReentrantReadWriteLock;
1111
import org.apache.lucene.analysis.Analyzer;
12+
import org.apache.lucene.analysis.standard.StandardAnalyzer;
1213
import org.apache.lucene.document.Document;
1314
import org.apache.lucene.document.Field;
1415
import org.apache.lucene.document.IntPoint;
@@ -28,10 +29,12 @@
2829
import org.apache.lucene.search.ScoreDoc;
2930
import org.apache.lucene.search.Sort;
3031
import org.apache.lucene.search.SortField;
32+
import org.apache.lucene.search.SynonymQuery;
3133
import org.apache.lucene.search.TermQuery;
3234
import org.apache.lucene.search.TopDocs;
3335
import org.apache.lucene.store.ByteBuffersDirectory;
3436
import org.apache.lucene.store.Directory;
37+
import org.apache.lucene.util.QueryBuilder;
3538

3639
public class TopicsIndex implements AutoCloseable {
3740
public static final String FIELD_NAME_RAW = "name_raw";
@@ -48,13 +51,24 @@ public class TopicsIndex implements AutoCloseable {
4851
private final Analyzer analyzer;
4952
private final int maxSize;
5053
private final ReadWriteLock closeLock = new ReentrantReadWriteLock();
54+
private final boolean searchNgram;
5155

5256
public TopicsIndex(List<InternalTopic> topics) throws IOException {
53-
this(topics, 3, 5);
57+
this(topics, true);
5458
}
5559

56-
public TopicsIndex(List<InternalTopic> topics, int minNgram, int maxNgram) throws IOException {
57-
this.analyzer = new ShortWordNGramAnalyzer(minNgram, maxNgram);
60+
public TopicsIndex(List<InternalTopic> topics, boolean ngram) throws IOException {
61+
this(topics, ngram, 1, 5);
62+
}
63+
64+
public TopicsIndex(List<InternalTopic> topics, boolean ngram, int minNgram, int maxNgram) throws IOException {
65+
if (ngram) {
66+
this.analyzer = new ShortWordNGramAnalyzer(minNgram, maxNgram, false);
67+
} else {
68+
this.analyzer = new ShortWordAnalyzer();
69+
}
70+
71+
this.searchNgram = ngram;
5872
this.directory = build(topics);
5973
this.indexReader = DirectoryReader.open(directory);
6074
this.indexSearcher = new IndexSearcher(indexReader);
@@ -113,13 +127,34 @@ public List<String> find(String search, Boolean showInternal,
113127
String sortField, Integer count, float minScore) throws IOException {
114128
closeLock.readLock().lock();
115129
try {
116-
QueryParser queryParser = new QueryParser(FIELD_NAME, this.analyzer);
117-
queryParser.setDefaultOperator(QueryParser.Operator.AND);
118130
Query nameQuery;
119-
try {
120-
nameQuery = queryParser.parse(search);
121-
} catch (ParseException e) {
122-
throw new RuntimeException(e);
131+
if (this.searchNgram) {
132+
List<String> ngrams = NgramFilter.tokenizeStringSimple(this.analyzer, search);
133+
BooleanQuery.Builder builder = new BooleanQuery.Builder();
134+
for (String ng : ngrams) {
135+
builder.add(new TermQuery(new Term(FIELD_NAME, ng)), BooleanClause.Occur.MUST);
136+
}
137+
nameQuery = builder.build();
138+
} else {
139+
QueryParser queryParser = new QueryParser(FIELD_NAME, this.analyzer);
140+
queryParser.setDefaultOperator(QueryParser.Operator.AND);
141+
142+
try {
143+
nameQuery = queryParser.parse(search);
144+
145+
if (!search.contains(" ") && !search.contains("*")) {
146+
String wildcardSearch = search + "*";
147+
Query wildCardNameQuery = queryParser.parse(wildcardSearch);
148+
BooleanQuery.Builder withWildcard = new BooleanQuery.Builder();
149+
withWildcard.add(nameQuery, BooleanClause.Occur.SHOULD);
150+
withWildcard.add(wildCardNameQuery, BooleanClause.Occur.SHOULD);
151+
nameQuery = withWildcard.build();
152+
}
153+
154+
155+
} catch (Exception e) {
156+
throw new RuntimeException(e);
157+
}
123158
}
124159

125160
Query internalFilter = new TermQuery(new Term(FIELD_INTERNAL, "true"));

api/src/main/java/io/kafbat/ui/service/metrics/scrape/ScrapedClusterState.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ private static TopicsIndex buildTopicIndex(ClustersProperties clustersProperties
216216
try {
217217
return new TopicsIndex(topicStates.values().stream().map(
218218
topicState -> buildInternalTopic(topicState, clustersProperties)
219-
).toList(), fts.getTopicsMinNGram(), fts.getTopicsMaxNGram());
219+
).toList(), fts.isTopicsNgramEnabled(), fts.getTopicsMinNGram(), fts.getTopicsMaxNGram());
220220
} catch (Exception e) {
221221
log.error("Error creating topics index", e);
222222
}

api/src/test/java/io/kafbat/ui/service/index/TopicsIndexTest.java

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
package io.kafbat.ui.service.index;
22

3-
import static org.assertj.core.api.Assertions.assertThat;
4-
53
import io.kafbat.ui.model.InternalTopic;
64
import io.kafbat.ui.model.InternalTopicConfig;
75
import java.util.ArrayList;
6+
import java.util.HashMap;
87
import java.util.List;
98
import java.util.Map;
109
import java.util.stream.Stream;
@@ -28,7 +27,7 @@ void testFindTopicsByName() throws Exception {
2827
"audit.clients.state",
2928
"audit.clients.repartitioned.status",
3029
"reporting.payments.by.clientId",
31-
"reporting.payments.by.currencyid"
30+
"reporting.payments.by.currencyId"
3231
)
3332
.map(s -> InternalTopic.builder().name(s).partitions(Map.of()).build()).toList());
3433

@@ -46,17 +45,14 @@ void testFindTopicsByName() throws Exception {
4645
Map.entry("topic", testTopicsCount),
4746
Map.entry("8", 1),
4847
Map.entry("9", 0),
49-
Map.entry("tpic", testTopicsCount),
50-
Map.entry("dogs red", 1),
51-
Map.entry("tpic-1", 1),
52-
Map.entry("payments dlq", 1),
53-
Map.entry("paymnts dlq", 1),
48+
Map.entry("dog red", 1),
49+
Map.entry("topic-1", 1),
50+
Map.entry("payment dlq", 1),
5451
Map.entry("stats dlq", 0),
5552
Map.entry("stat", 3),
56-
Map.entry("chnges", 1),
57-
Map.entry("comands", 1),
58-
Map.entry("id", 1),
59-
Map.entry("config_retention:compact", 1)
53+
Map.entry("changes", 1),
54+
Map.entry("commands", 1),
55+
Map.entry("id", 2)
6056
);
6157

6258
SoftAssertions softly = new SoftAssertions();
@@ -68,6 +64,18 @@ void testFindTopicsByName() throws Exception {
6864
.isEqualTo(entry.getValue());
6965
}
7066
}
67+
68+
HashMap<String, Integer> indexExamples = new HashMap<>(examples);
69+
indexExamples.put("config_retention:compact", 1);
70+
71+
try (TopicsIndex index = new TopicsIndex(topics, false)) {
72+
for (Map.Entry<String, Integer> entry : indexExamples.entrySet()) {
73+
List<String> resultAll = index.find(entry.getKey(), null, topics.size());
74+
softly.assertThat(resultAll.size())
75+
.withFailMessage("Expected %d results for '%s', but got %s", entry.getValue(), entry.getKey(), resultAll)
76+
.isEqualTo(entry.getValue());
77+
}
78+
}
7179
softly.assertAll();
7280
}
7381

0 commit comments

Comments
 (0)