Added lucene

germanosin · germanosin · commit c27b32dc346b · 2025-08-13T10:19:52.000+03:00
diff --git a/api/build.gradle b/api/build.gradle
@@ -54,6 +54,10 @@ dependencies {
     antlr libs.antlr
     implementation libs.antlr.runtime
 
+    implementation libs.lucene
+    implementation libs.lucene.queryparser
+    implementation libs.lucene.analysis.common
+
     implementation libs.opendatadiscovery.oddrn
     implementation(libs.opendatadiscovery.client) {
         exclude group: 'org.springframework.boot', module: 'spring-boot-starter-webflux'
diff --git a/api/src/main/java/io/kafbat/ui/config/ClustersProperties.java b/api/src/main/java/io/kafbat/ui/config/ClustersProperties.java
@@ -41,6 +41,7 @@ public class ClustersProperties {
   MetricsStorage defaultMetricsStorage = new MetricsStorage();
 
   CacheProperties cache = new CacheProperties();
+  FtsProperties fts = new FtsProperties();
 
   @Data
   public static class Cluster {
@@ -217,6 +218,17 @@ public static class CacheProperties {
     Duration connectClusterCacheExpiry = Duration.ofHours(24);
   }
 
+  @Data
+  @NoArgsConstructor
+  @AllArgsConstructor
+  public static class FtsProperties {
+    boolean enabled = true;
+    int topicsMinNGram = 3;
+    int topicsMaxNGram = 5;
+    int filterMinNGram = 1;
+    int filterMaxNGram = 4;
+  }
+
   @PostConstruct
   public void validateAndSetDefaults() {
     if (clusters != null) {
diff --git a/api/src/main/java/io/kafbat/ui/model/Statistics.java b/api/src/main/java/io/kafbat/ui/model/Statistics.java
@@ -11,7 +11,7 @@
 
 @Value
 @Builder(toBuilder = true)
-public class Statistics {
+public class Statistics implements AutoCloseable {
   ServerStatusDTO status;
   Throwable lastKafkaException;
   String version;
@@ -46,4 +46,11 @@ public Stream<TopicDescription> topicDescriptions() {
   public Statistics withClusterState(UnaryOperator<ScrapedClusterState> stateUpdate) {
     return toBuilder().clusterState(stateUpdate.apply(clusterState)).build();
   }
+
+  @Override
+  public void close() throws Exception {
+    if (clusterState != null) {
+      clusterState.close();
+    }
+  }
 }
diff --git a/api/src/main/java/io/kafbat/ui/service/index/ConsumerGroupFilter.java b/api/src/main/java/io/kafbat/ui/service/index/ConsumerGroupFilter.java
@@ -0,0 +1,19 @@
+package io.kafbat.ui.service.index;
+
+import io.kafbat.ui.model.InternalConsumerGroup;
+import java.util.List;
+import reactor.util.function.Tuple2;
+import reactor.util.function.Tuples;
+
+public class ConsumerGroupFilter extends NgramFilter<InternalConsumerGroup> {
+  private final List<Tuple2<String, InternalConsumerGroup>> groups;
+
+  public ConsumerGroupFilter(List<InternalConsumerGroup> groups) {
+    this.groups = groups.stream().map(g -> Tuples.of(g.getGroupId(), g)).toList();
+  }
+
+  @Override
+  protected List<Tuple2<String, InternalConsumerGroup>> getItems() {
+    return this.groups;
+  }
+}
diff --git a/api/src/main/java/io/kafbat/ui/service/index/NgramFilter.java b/api/src/main/java/io/kafbat/ui/service/index/NgramFilter.java
@@ -0,0 +1,93 @@
+package io.kafbat.ui.service.index;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import lombok.SneakyThrows;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import reactor.util.function.Tuple2;
+
+@Slf4j
+public abstract class NgramFilter<T> {
+  private final Analyzer analyzer = new ShortWordNGramAnalyzer(1, 4, false);
+
+  protected abstract List<Tuple2<String, T>> getItems();
+  private static Map<String, List<String>> cache = new ConcurrentHashMap<>();
+
+  public List<T> find(String search) {
+    try {
+      List<SearchResult<T>> result = new ArrayList<>();
+      List<String> queryTokens = tokenizeString(analyzer, search);
+      Map<String, Integer> queryFreq = termFreq(queryTokens);
+
+      for (Tuple2<String, T> item : getItems()) {
+        List<String> itemTokens = tokenizeString(analyzer, item.getT1());
+        HashSet<String> itemTokensSet = new HashSet<>(itemTokens);
+        if (itemTokensSet.containsAll(queryTokens)) {
+          double score = cosineSimilarity(queryFreq, itemTokens);
+          result.add(new SearchResult<T>(item.getT2(), score));
+//          result.add(new SearchResult<T>(item.getT2(), 1));
+        }
+      }
+      result.sort((o1, o2) -> Double.compare(o2.score, o1.score));
+      return result.stream().map(r -> r.item).toList();
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  private record SearchResult<T>(T item, double score) { }
+
+
+  public static List<String> tokenizeString(Analyzer analyzer, String text) throws IOException {
+    return cache.computeIfAbsent(text, (t) -> tokenizeStringSimple(analyzer, text));
+  }
+
+  @SneakyThrows
+  public static List<String> tokenizeStringSimple(Analyzer analyzer, String text) {
+    List<String> tokens = new ArrayList<>();
+    try (TokenStream tokenStream = analyzer.tokenStream(null, text)) {
+      CharTermAttribute attr = tokenStream.addAttribute(CharTermAttribute.class);
+      tokenStream.reset();
+      while (tokenStream.incrementToken()) {
+        tokens.add(attr.toString());
+      }
+      tokenStream.end();
+    }
+    return tokens;
+  }
+
+  private static double cosineSimilarity(Map<String, Integer> queryFreq, List<String> itemTokens) {
+    // Build frequency maps
+    Map<String, Integer> terms = termFreq(itemTokens);
+
+    double dot = 0.0;
+    double mag1 = 0.0;
+    double mag2 = 0.0;
+
+    for (String term : terms.keySet()) {
+      int f1 = queryFreq.getOrDefault(term, 0);
+      int f2 = terms.getOrDefault(term, 0);
+      dot += f1 * f2;
+      mag1 += f1 * f1;
+      mag2 += f2 * f2;
+    }
+
+    return (mag1 == 0 || mag2 == 0) ? 0.0 : dot / (Math.sqrt(mag1) * Math.sqrt(mag2));
+  }
+
+  private static Map<String, Integer> termFreq(List<String> tokens) {
+    Map<String, Integer> freq = new HashMap<>();
+    for (String token : tokens) {
+      freq.put(token, freq.getOrDefault(token, 0) + 1);
+    }
+    return freq;
+  }
+}
diff --git a/api/src/main/java/io/kafbat/ui/service/index/ShortWordNGramAnalyzer.java b/api/src/main/java/io/kafbat/ui/service/index/ShortWordNGramAnalyzer.java
@@ -0,0 +1,48 @@
+package io.kafbat.ui.service.index;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
+import org.apache.lucene.analysis.ngram.NGramTokenFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+
+public class ShortWordNGramAnalyzer extends Analyzer {
+  private final int minGram;
+  private final int maxGram;
+  private final boolean preserveOriginal;
+
+  public ShortWordNGramAnalyzer(int minGram, int maxGram) {
+    this(minGram, maxGram, true);
+  }
+
+  public ShortWordNGramAnalyzer(int minGram, int maxGram, boolean preserveOriginal) {
+    this.minGram = minGram;
+    this.maxGram = maxGram;
+    this.preserveOriginal = preserveOriginal;
+  }
+
+
+
+  @Override
+  protected TokenStreamComponents createComponents(String fieldName) {
+    Tokenizer tokenizer = new StandardTokenizer();
+
+    TokenStream tokenStream = new WordDelimiterGraphFilter(
+        tokenizer,
+        WordDelimiterGraphFilter.GENERATE_WORD_PARTS |
+            WordDelimiterGraphFilter.SPLIT_ON_CASE_CHANGE |
+            //WordDelimiterGraphFilter.SPLIT_ON_NUMERICS |
+            WordDelimiterGraphFilter.STEM_ENGLISH_POSSESSIVE,
+        null
+    );
+
+    tokenStream = new LowerCaseFilter(tokenStream);
+
+    // Add n-gram generation from characters (min=2, max=4)
+    tokenStream = new NGramTokenFilter(tokenStream, minGram, maxGram, this.preserveOriginal);
+
+    return new TokenStreamComponents(tokenizer, tokenStream);
+  }
+}
diff --git a/api/src/main/java/io/kafbat/ui/service/index/TopicsIndex.java b/api/src/main/java/io/kafbat/ui/service/index/TopicsIndex.java
@@ -0,0 +1,137 @@
+package io.kafbat.ui.service.index;
+
+import io.kafbat.ui.model.InternalTopic;
+import io.kafbat.ui.model.InternalTopicConfig;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.ByteBuffersDirectory;
+import org.apache.lucene.store.Directory;
+
+public class TopicsIndex implements AutoCloseable {
+  public static final String FIELD_NAME_RAW = "name_raw";
+  public static final String FIELD_NAME = "name";
+  public static final String FIELD_INTERNAL = "internal";
+  public static final String FIELD_PARTITIONS = "partitions";
+  public static final String FIELD_REPLICATION = "replication";
+  public static final String FIELD_SIZE = "size";
+  public static final String FIELD_CONFIG_PREFIX = "config";
+
+  private final Directory directory;
+  private final DirectoryReader indexReader;
+  private final IndexSearcher indexSearcher;
+  private final Analyzer analyzer;
+
+  public TopicsIndex(List<InternalTopic> topics) throws IOException {
+    this(topics, 3,5);
+  }
+
+  public TopicsIndex(List<InternalTopic> topics, int minNgram, int maxNgram) throws IOException {
+    this.analyzer = new ShortWordNGramAnalyzer(minNgram, maxNgram);
+    this.directory = build(topics);
+    this.indexReader = DirectoryReader.open(directory);
+    this.indexSearcher = new IndexSearcher(indexReader);
+  }
+
+  private Directory build(List<InternalTopic> topics) {
+    Directory directory = new ByteBuffersDirectory();
+    try(IndexWriter directoryWriter = new IndexWriter(directory, new IndexWriterConfig(this.analyzer))) {
+      for (InternalTopic topic : topics) {
+        Document doc = new Document();
+        doc.add(new StringField(FIELD_NAME_RAW, topic.getName(), Field.Store.YES));
+        doc.add(new TextField(FIELD_NAME, topic.getName(), Field.Store.NO));
+        doc.add(new IntPoint(FIELD_PARTITIONS, topic.getPartitionCount()));
+        doc.add(new IntPoint(FIELD_REPLICATION, topic.getReplicationFactor()));
+        doc.add(new LongPoint(FIELD_SIZE, topic.getSegmentSize()));
+        if (topic.getTopicConfigs() != null && !topic.getTopicConfigs().isEmpty()) {
+          for (InternalTopicConfig topicConfig : topic.getTopicConfigs()) {
+            doc.add(new StringField(FIELD_CONFIG_PREFIX+"_"+topicConfig.getName(), topicConfig.getValue(), Field.Store.NO));
+          }
+        }
+        doc.add(new StringField(FIELD_INTERNAL, String.valueOf(topic.isInternal()), Field.Store.NO));
+        directoryWriter.addDocument(doc);
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    return directory;
+  }
+
+  @Override
+  public void close() throws Exception {
+    if (indexReader != null) {
+      this.indexReader.close();
+    }
+    if (this.directory != null) {
+      this.directory.close();
+    }
+  }
+
+  public List<String> find(String search, Boolean showInternal, int count) throws IOException {
+    return find(search, showInternal, FIELD_NAME, count, 0.0f, 2);
+  }
+
+  public List<String> find(String search, Boolean showInternal, String sort, int count) throws IOException {
+    return find(search, showInternal, sort, count, 0.0f, 2);
+  }
+
+  public List<String> find(String search, Boolean showInternal, String sortField, int count, float minScore, int maxEdits) throws IOException {
+    QueryParser queryParser = new QueryParser(FIELD_NAME, this.analyzer);
+    queryParser.setDefaultOperator(QueryParser.Operator.AND);
+    Query nameQuery = null;
+    try {
+      nameQuery = queryParser.parse(search);
+    } catch (ParseException e) {
+      throw new RuntimeException(e);
+    }
+
+    Query internalFilter = new TermQuery(new Term(FIELD_INTERNAL, "true"));
+
+    BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
+    queryBuilder.add(nameQuery, BooleanClause.Occur.MUST);
+    if (showInternal == null || !showInternal) {
+      queryBuilder.add(internalFilter, BooleanClause.Occur.MUST_NOT);
+    }
+
+    List<SortField> sortFields = new ArrayList<>();
+    sortFields.add(SortField.FIELD_SCORE);
+    if (!sortField.equals(FIELD_NAME)) {
+      sortFields.add(new SortField(sortField, SortField.Type.INT, true));
+    }
+
+    Sort sort = new Sort(sortFields.toArray(new SortField[0]));
+
+    TopDocs result = this.indexSearcher.search(queryBuilder.build(), count);
+
+    List<String> topics = new ArrayList<>();
+    for (ScoreDoc scoreDoc : result.scoreDocs) {
+      if (scoreDoc.score > minScore) {
+        Document document = this.indexSearcher.storedFields().document(scoreDoc.doc);
+        topics.add(document.get(FIELD_NAME_RAW));
+      }
+    }
+    return topics;
+  }
+}
diff --git a/api/src/main/java/io/kafbat/ui/service/metrics/scrape/ScrapedClusterState.java b/api/src/main/java/io/kafbat/ui/service/metrics/scrape/ScrapedClusterState.java
@@ -8,7 +8,9 @@
 import io.kafbat.ui.model.InternalLogDirStats;
 import io.kafbat.ui.model.InternalPartitionsOffsets;
 import io.kafbat.ui.service.ReactiveAdminClient;
+import io.kafbat.ui.service.index.TopicsIndex;
 import jakarta.annotation.Nullable;
+import java.io.Closeable;
 import java.time.Instant;
 import java.util.HashMap;
 import java.util.List;
@@ -31,12 +33,20 @@
 @Builder(toBuilder = true)
 @RequiredArgsConstructor
 @Value
-public class ScrapedClusterState {
+public class ScrapedClusterState implements AutoCloseable {
 
   Instant scrapeFinishedAt;
   Map<Integer, NodeState> nodesStates;
   Map<String, TopicState> topicStates;
   Map<String, ConsumerGroupState> consumerGroupsStates;
+  TopicsIndex topicsIndex;
+
+  @Override
+  public void close() throws Exception {
+    if (this.topicsIndex != null) {
+      this.topicsIndex.close();
+    }
+  }
 
   public record NodeState(int id,
                           Node node,
diff --git a/api/src/test/java/io/kafbat/ui/service/index/ConsumerGroupsFilterTest.java b/api/src/test/java/io/kafbat/ui/service/index/ConsumerGroupsFilterTest.java
diff --git a/api/src/test/java/io/kafbat/ui/service/index/TopicsIndexTest.java b/api/src/test/java/io/kafbat/ui/service/index/TopicsIndexTest.java
diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml