epugh · epugh · Dec 10, 2024 · Dec 6, 2024 · Dec 7, 2024 · Dec 8, 2024
diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java b/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java
@@ -121,5 +121,6 @@ public void initializeMetrics(SolrMetricsContext parentContext, String scope) {
           Map.entry(RealTimeGetComponent.COMPONENT_NAME, RealTimeGetComponent.class),
           Map.entry(ExpandComponent.COMPONENT_NAME, ExpandComponent.class),
           Map.entry(TermsComponent.COMPONENT_NAME, TermsComponent.class),
-          Map.entry(UBIComponent.COMPONENT_NAME, UBIComponent.class));
+          Map.entry(UBIComponent.COMPONENT_NAME, UBIComponent.class)// oh r'lly?? esp giving that it receive some expr via init args
+      );
 }
diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
@@ -135,6 +135,7 @@ public class SearchHandler extends RequestHandlerBase
   protected List<String> getDefaultComponents() {
     List<String> l = new ArrayList<String>(SearchComponent.STANDARD_COMPONENTS.keySet());
     moveToFirst(l, QueryComponent.COMPONENT_NAME);
+    l.remove(RealTimeGetComponent.COMPONENT_NAME); // pardon. it breaks my essential cloud test. there wasn't it there ever!
     return l;
   }
 

diff --git a/solr/core/src/java/org/apache/solr/handler/component/UBIComponent.java b/solr/core/src/java/org/apache/solr/handler/component/UBIComponent.java
@@ -51,6 +51,8 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.solr.handler.RequestHandlerBase.isInternalShardRequest;
+
 /**
  * User Behavior Insights (UBI) is an open standard for gathering query and event data from users
  * and storing it in a structured format. UBI can be used for in session personalization, implicit
@@ -231,33 +233,51 @@ public void process(ResponseBuilder rb) throws IOException {
     if (!params.getBool(COMPONENT_NAME, false)) {
       return;
     }
-
-    doStuff(rb);
+    if (!isInternalShardRequest(rb.req)) { // subordinate shard req shouldn't yield logs
+      doStuff(rb);
+    }
   }
 
   @Override
   public int distributedProcess(ResponseBuilder rb) throws IOException {
-
     SolrParams params = rb.req.getParams();
     if (!params.getBool(COMPONENT_NAME, false)) {
       return ResponseBuilder.STAGE_DONE;
     }
 
-    if (rb.stage != ResponseBuilder.STAGE_GET_FIELDS) {
-      return ResponseBuilder.STAGE_DONE;
+    if (rb.stage < ResponseBuilder.STAGE_GET_FIELDS) {
+      return ResponseBuilder.STAGE_GET_FIELDS;
     }
 
-    doStuff(rb);
+    if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
+      doDistribStuff(rb);
+      return ResponseBuilder.STAGE_DONE;
+    }
 
     return ResponseBuilder.STAGE_DONE;
   }
 
   public void doStuff(ResponseBuilder rb) throws IOException {
+    UBIQuery ubiQuery = getUbiQuery(rb);
+    if (ubiQuery == null) return;
+
+    ResultContext rc = (ResultContext) rb.rsp.getResponse();
+    DocList docs = rc.getDocList();
+    // DocList docs = rb.getResults().docList;
+
+    String docIds = extractDocIds(docs, rb.req.getSearcher());
+
+    ubiQuery.setDocIds(docIds);
 
-    // not sure why but sometimes we get it twoice...  how can a response have the
+    addUserBehaviorInsightsToResponse(ubiQuery, rb);
+    recordQuery(ubiQuery);
+  }
+
+  private static UBIQuery getUbiQuery(ResponseBuilder rb) {
+    // not sure why but sometimes we get it tw(o)ice...  how can a response have the
     // the same component run twice?
     if (rb.rsp.getValues().get("ubi") != null) {
-      return;
+      return null;
     }
     SolrParams params = rb.req.getParams();
 
@@ -270,9 +290,9 @@ public void doStuff(ResponseBuilder rb) throws IOException {
     ubiQuery.setApplication(params.get(APPLICATION));
     if (ubiQuery.getApplication() == null) {
       ubiQuery.setApplication(
-          rb.isDistrib
-              ? rb.req.getCloudDescriptor().getCollectionName()
-              : searcher.getCore().getName());
+              rb.isDistrib
+                      ? rb.req.getCloudDescriptor().getCollectionName()
+                      : searcher.getCore().getName());
     }
 
     String queryAttributes = params.get(QUERY_ATTRIBUTES);
@@ -292,12 +312,19 @@ public void doStuff(ResponseBuilder rb) throws IOException {
         }
       }
     }
+    return ubiQuery;
+  }
+
+  public void doDistribStuff(ResponseBuilder rb) throws IOException {
+
+    // not sure why but sometimes we get it tw(o)ice...  how can a response have the
+    // the same component run twice?
+    UBIQuery ubiQuery = getUbiQuery(rb);
+    if (ubiQuery == null) return;
 
-    ResultContext rc = (ResultContext) rb.rsp.getResponse();
-    DocList docs = rc.getDocList();
-    // DocList docs = rb.getResults().docList;
 
-    String docIds = extractDocIds(docs, searcher);
+    //String docIds = extractDocIds(docs, searcher);
+    String docIds =String.join(",", rb.resultIds.keySet().stream().map(Object::toString).toList());
     ubiQuery.setDocIds(docIds);
 
     addUserBehaviorInsightsToResponse(ubiQuery, rb);

diff --git a/solr/core/src/test/org/apache/solr/handler/component/UBIComponentDistrQueriesTest.java b/solr/core/src/test/org/apache/solr/handler/component/UBIComponentDistrQueriesTest.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.component;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.commons.io.input.ReversedLinesFileReader;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.solr.client.solrj.io.SolrClientCache;
+import org.apache.solr.client.solrj.io.Tuple;
+import org.apache.solr.client.solrj.io.stream.*;
+import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
+import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParser;
+import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.cloud.AbstractDistribZkTestBase;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.cluster.api.SimpleMap;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.params.MapSolrParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.embedded.JettySolrRunner;
+import org.apache.solr.handler.LoggingStream;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Path;
+import java.time.Instant;
+import java.util.*;
+
+@LuceneTestCase.SuppressCodecs({"Lucene3x", "Lucene40", "Lucene41", "Lucene42", "Lucene45"})
+public class UBIComponentDistrQueriesTest extends SolrCloudTestCase {
+
+  private static final String COLLECTIONORALIAS = "collection1";
+  private static final int TIMEOUT = DEFAULT_TIMEOUT;
+  private static final String id = "id";
+
+  private static boolean useAlias;
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    configureCluster(4)
+        .addConfig(
+            "conf", TEST_PATH().resolve("configsets").resolve("ubi-enabled").resolve("conf"))
+        .configure();
+
+    String collection;
+    useAlias = false; //random().nextBoolean();
+    if (useAlias) {
+      collection = COLLECTIONORALIAS + "_collection";
+    } else {
+      collection = COLLECTIONORALIAS;
+    }
+
+    CollectionAdminRequest.createCollection(collection, "conf", 2, 1)
+        .process(cluster.getSolrClient());
+
+    cluster.waitForActiveCollection(collection, 2, 2);
+
+    AbstractDistribZkTestBase.waitForRecoveriesToFinish(
+        collection, cluster.getZkStateReader(), false, true, TIMEOUT);
+    if (useAlias) {
+      CollectionAdminRequest.createAlias(COLLECTIONORALIAS, collection)
+          .process(cluster.getSolrClient());
+    }
+
+    // -------------------
+
+    CollectionAdminRequest.createCollection("ubi_queries",// it seems like a hardcoded name why?
+                    "_default", 1, 1)
+            .process(cluster.getSolrClient());
+
+    cluster.waitForActiveCollection("ubi_queries", 1, 1);
+
+    AbstractDistribZkTestBase.waitForRecoveriesToFinish(
+            "ubi_queries", cluster.getZkStateReader(), false, true, TIMEOUT);
+  }
+
+  @Before
+  public void cleanIndex() throws Exception {
+    new UpdateRequest().deleteByQuery("*:*").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
+  }
+
+  @Test
+  public void testUBIQueryStream() throws Exception {
+    cluster.getSolrClient(COLLECTIONORALIAS).add(List.of(new SolrInputDocument("id", "1", "subject", "aa"),
+            new SolrInputDocument("id", "2" /*"two"*/, "subject", "aa"),
+            new SolrInputDocument("id", "3", "subject", "aa")));
+    cluster.getSolrClient(COLLECTIONORALIAS).commit(true, true);
+    QueryResponse queryResponse = cluster.getSolrClient(COLLECTIONORALIAS).query(new MapSolrParams(
+            Map.of("q", "aa", "df","subject", "rows", "2", "ubi", "true"
+            )));
+    String qid = (String) ((SimpleMap<?>) queryResponse.getResponse().get("ubi")).get("query_id");
+    assertTrue(qid.length()>10);
+    Thread.sleep(10000); // I know what you think of
+    // TODO check that ids were recorded
+    QueryResponse queryCheck = cluster.getSolrClient("ubi_queries").query(new MapSolrParams(
+            Map.of("q", "id:"+qid //doesn't search it why? is it a race?
+            )));
+    // however I can't see doc ids found there. Shouldn't I ?
+    assertEquals(1L, queryCheck.getResults().getNumFound());
+    assertEquals(queryCheck.getResults().get(0).get("id"),qid);
+  }
+}