Skip to content

Commit 645887f

Browse files
authored
add new params to warm lucene query only (#843)
add new params to warm lucene query only
1 parent a96a1a8 commit 645887f

File tree

7 files changed

+515
-14
lines changed

7 files changed

+515
-14
lines changed

build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ java {
2626
}
2727

2828
allprojects {
29-
version = '0.43.0'
29+
version = '0.44.0'
3030
group = 'com.yelp.nrtsearch'
3131
}
3232

src/main/java/com/yelp/nrtsearch/server/luceneserver/IndexState.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@
2020
import com.yelp.nrtsearch.server.backup.Archiver;
2121
import com.yelp.nrtsearch.server.config.LuceneServerConfiguration;
2222
import com.yelp.nrtsearch.server.config.ThreadPoolConfiguration;
23-
import com.yelp.nrtsearch.server.grpc.*;
23+
import com.yelp.nrtsearch.server.grpc.Field;
24+
import com.yelp.nrtsearch.server.grpc.FieldType;
25+
import com.yelp.nrtsearch.server.grpc.Mode;
26+
import com.yelp.nrtsearch.server.grpc.ReplicationServerClient;
2427
import com.yelp.nrtsearch.server.luceneserver.doc.DocLookup;
2528
import com.yelp.nrtsearch.server.luceneserver.field.ContextSuggestFieldDef;
2629
import com.yelp.nrtsearch.server.luceneserver.field.FieldDef;
@@ -37,7 +40,10 @@
3740
import java.io.IOException;
3841
import java.nio.file.Files;
3942
import java.nio.file.Path;
40-
import java.util.*;
43+
import java.util.List;
44+
import java.util.Map;
45+
import java.util.Optional;
46+
import java.util.Set;
4147
import java.util.concurrent.ExecutorService;
4248
import java.util.concurrent.ThreadPoolExecutor;
4349
import java.util.regex.Pattern;
@@ -257,7 +263,8 @@ public void initWarmer(Archiver archiver, String indexName) {
257263
archiver,
258264
configuration.getServiceName(),
259265
indexName,
260-
warmerConfig.getMaxWarmingQueries());
266+
warmerConfig.getMaxWarmingQueries(),
267+
warmerConfig.getWarmBasicQueryOnlyPerc());
261268
}
262269
}
263270

src/main/java/com/yelp/nrtsearch/server/luceneserver/warming/Warmer.java

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import java.util.ArrayList;
3737
import java.util.Collections;
3838
import java.util.List;
39+
import java.util.Random;
3940
import java.util.concurrent.SynchronousQueue;
4041
import java.util.concurrent.ThreadPoolExecutor;
4142
import java.util.concurrent.TimeUnit;
@@ -56,15 +57,28 @@ public class Warmer {
5657
private final ReservoirSampler reservoirSampler;
5758
private final String index;
5859
private final int maxWarmingQueries;
60+
private final int warmBasicQueryOnlyPerc;
61+
protected final ThreadLocal<Random> randomThreadLocal;
5962

6063
public Warmer(Archiver archiver, String service, String index, int maxWarmingQueries) {
64+
this(archiver, service, index, maxWarmingQueries, 0);
65+
}
66+
67+
public Warmer(
68+
Archiver archiver,
69+
String service,
70+
String index,
71+
int maxWarmingQueries,
72+
int warmBasicQueryOnlyPerc) {
6173
this.archiver = archiver;
6274
this.service = service;
6375
this.index = index;
6476
this.resource = index + WARMING_QUERIES_RESOURCE;
6577
this.warmingRequests = Collections.synchronizedList(new ArrayList<>(maxWarmingQueries));
6678
this.reservoirSampler = new ReservoirSampler(maxWarmingQueries);
6779
this.maxWarmingQueries = maxWarmingQueries;
80+
this.warmBasicQueryOnlyPerc = warmBasicQueryOnlyPerc;
81+
this.randomThreadLocal = ThreadLocal.withInitial(Random::new);
6882
}
6983

7084
public int getNumWarmingRequests() {
@@ -162,15 +176,25 @@ void warmFromS3(IndexState indexState, int parallelism, SearchHandler searchHand
162176
}
163177
Path downloadDir = archiver.download(service, resource);
164178
Path warmingRequestsDir = downloadDir.resolve(WARMING_QUERIES_DIR);
179+
long startMS = System.currentTimeMillis();
165180
try (BufferedReader reader =
166181
Files.newBufferedReader(warmingRequestsDir.resolve(WARMING_QUERIES_FILE))) {
167182
String line;
168-
int count = 0;
183+
int count = 0, basicCount = 0;
169184
while ((line = reader.readLine()) != null) {
170-
processLine(indexState, searchHandler, threadPoolExecutor, line);
185+
boolean isStripped = randomThreadLocal.get().nextInt(100) < warmBasicQueryOnlyPerc;
186+
processLine(indexState, searchHandler, threadPoolExecutor, line, isStripped);
171187
count++;
188+
if (isStripped) {
189+
basicCount++;
190+
}
172191
}
173-
logger.info("Warmed index: {} with {} warming queries", index, count);
192+
logger.info(
193+
"Warmed index: {} with {} full and {} basic warming queries in {} seconds.",
194+
index,
195+
count - basicCount,
196+
basicCount,
197+
(System.currentTimeMillis() - startMS) / 1000.0);
174198
} finally {
175199
if (threadPoolExecutor != null) {
176200
threadPoolExecutor.shutdown();
@@ -186,10 +210,14 @@ private void processLine(
186210
IndexState indexState,
187211
SearchHandler searchHandler,
188212
ThreadPoolExecutor threadPoolExecutor,
189-
String line)
213+
String line,
214+
boolean warmBasicQuery)
190215
throws InvalidProtocolBufferException, SearchHandler.SearchHandlerException {
191216
SearchRequest.Builder builder = SearchRequest.newBuilder();
192217
JsonFormat.parser().merge(line, builder);
218+
if (warmBasicQuery) {
219+
WarmingUtils.simplifySearchRequestForWarming(builder);
220+
}
193221
SearchRequest searchRequest = builder.build();
194222
if (threadPoolExecutor == null) {
195223
searchHandler.handle(indexState, searchRequest);

src/main/java/com/yelp/nrtsearch/server/luceneserver/warming/WarmerConfig.java

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,32 @@ public class WarmerConfig {
2121
private static final String CONFIG_PREFIX = "warmer.";
2222
private static final int DEFAULT_MAX_WARMING_QUERIES = 0;
2323
private static final int DEFAULT_WARMING_PARALLELISM = 1;
24+
private static final int DEFAULT_WARM_BASIC_QUERY_ONLY_PERC = 0;
2425
private static final boolean DEFAULT_WARM_ON_STARTUP = false;
2526

2627
private final int maxWarmingQueries;
2728
private final int warmingParallelism;
2829
private final boolean warmOnStartup;
30+
private final int warmBasicQueryOnlyPerc;
2931

3032
/**
3133
* Configuration for warmer.
3234
*
3335
* @param maxWarmingQueries maximum queries to store for warming
3436
* @param warmingParallelism number of parallel queries while warming on startup
3537
* @param warmOnStartup if true will try to download queries from S3 and use them to warm
38+
* @param warmBasicQueryOnlyPerc percentage of warming queries that should be basic queries for
39+
* the fast boostrap
3640
*/
37-
public WarmerConfig(int maxWarmingQueries, int warmingParallelism, boolean warmOnStartup) {
41+
public WarmerConfig(
42+
int maxWarmingQueries,
43+
int warmingParallelism,
44+
boolean warmOnStartup,
45+
int warmBasicQueryOnlyPerc) {
3846
this.maxWarmingQueries = maxWarmingQueries;
3947
this.warmingParallelism = warmingParallelism;
4048
this.warmOnStartup = warmOnStartup;
49+
this.warmBasicQueryOnlyPerc = warmBasicQueryOnlyPerc;
4150
}
4251

4352
public static WarmerConfig fromConfig(YamlConfigReader configReader) {
@@ -47,8 +56,12 @@ public static WarmerConfig fromConfig(YamlConfigReader configReader) {
4756
configReader.getInteger(CONFIG_PREFIX + "warmingParallelism", DEFAULT_WARMING_PARALLELISM);
4857
boolean warmOnStartup =
4958
configReader.getBoolean(CONFIG_PREFIX + "warmOnStartup", DEFAULT_WARM_ON_STARTUP);
59+
int warmBasicQueryOnlyPerc =
60+
configReader.getInteger(
61+
CONFIG_PREFIX + "warmBasicQueryOnlyPerc", DEFAULT_WARM_BASIC_QUERY_ONLY_PERC);
5062

51-
return new WarmerConfig(maxWarmingQueries, warmingParallelism, warmOnStartup);
63+
return new WarmerConfig(
64+
maxWarmingQueries, warmingParallelism, warmOnStartup, warmBasicQueryOnlyPerc);
5265
}
5366

5467
public int getMaxWarmingQueries() {
@@ -62,4 +75,8 @@ public int getWarmingParallelism() {
6275
public boolean isWarmOnStartup() {
6376
return warmOnStartup;
6477
}
78+
79+
public int getWarmBasicQueryOnlyPerc() {
80+
return warmBasicQueryOnlyPerc;
81+
}
6582
}
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/*
2+
* Copyright 2020 Yelp Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.yelp.nrtsearch.server.luceneserver.warming;
17+
18+
import com.yelp.nrtsearch.server.grpc.BooleanClause;
19+
import com.yelp.nrtsearch.server.grpc.BooleanQuery;
20+
import com.yelp.nrtsearch.server.grpc.ConstantScoreQuery;
21+
import com.yelp.nrtsearch.server.grpc.DisjunctionMaxQuery;
22+
import com.yelp.nrtsearch.server.grpc.MultiFunctionScoreQuery;
23+
import com.yelp.nrtsearch.server.grpc.NestedQuery;
24+
import com.yelp.nrtsearch.server.grpc.Query;
25+
import com.yelp.nrtsearch.server.grpc.SearchRequest;
26+
import org.slf4j.Logger;
27+
import org.slf4j.LoggerFactory;
28+
29+
public class WarmingUtils {
30+
private static final Logger logger = LoggerFactory.getLogger(WarmingUtils.class);
31+
32+
public static SearchRequest simplifySearchRequestForWarming(
33+
SearchRequest.Builder searchRequestBuilder) {
34+
Query basicSearchRequest = stripScriptQuery(searchRequestBuilder.getQuery());
35+
SearchRequest searchRequest =
36+
searchRequestBuilder
37+
.setQuery(basicSearchRequest)
38+
.clearQuerySort()
39+
.clearRescorers()
40+
.clearRetrieveFields()
41+
.clearFetchTasks()
42+
.clearFacets()
43+
.clearCollectors()
44+
.clearHighlight()
45+
.setProfile(false)
46+
.build();
47+
return searchRequest;
48+
}
49+
50+
private static Query stripScriptQuery(Query query) {
51+
if (query.hasFunctionScoreQuery()) {
52+
return stripScriptQuery(query.getFunctionScoreQuery().getQuery());
53+
}
54+
if (query.hasFunctionFilterQuery()) {
55+
return Query.newBuilder().build();
56+
}
57+
if (query.hasMultiFunctionScoreQuery()) {
58+
MultiFunctionScoreQuery multiFunctionScoreQuery = query.getMultiFunctionScoreQuery();
59+
for (MultiFunctionScoreQuery.FilterFunction function :
60+
multiFunctionScoreQuery.getFunctionsList()) {
61+
if (function.hasScript()) {
62+
return stripScriptQuery(function.getFilter());
63+
}
64+
}
65+
}
66+
67+
Query.Builder queryBuilder = query.toBuilder();
68+
switch (query.getQueryNodeCase()) {
69+
case BOOLEANQUERY -> queryBuilder.setBooleanQuery(stripBooleanQuery(query.getBooleanQuery()));
70+
case DISJUNCTIONMAXQUERY -> queryBuilder.setDisjunctionMaxQuery(
71+
stripDisjunctionMaxQuery(query.getDisjunctionMaxQuery()));
72+
case NESTEDQUERY -> queryBuilder.setNestedQuery(stripNestedQuery(query.getNestedQuery()));
73+
case CONSTANTSCOREQUERY -> queryBuilder.setConstantScoreQuery(
74+
stripConstantScoreQuery(query.getConstantScoreQuery()));
75+
default -> {}
76+
}
77+
// Add other cases as needed
78+
return queryBuilder.build();
79+
}
80+
81+
private static BooleanQuery stripBooleanQuery(BooleanQuery booleanQuery) {
82+
BooleanQuery.Builder booleanQueryBuilder = booleanQuery.toBuilder();
83+
for (int i = 0; i < booleanQuery.getClausesCount(); i++) {
84+
BooleanClause clause = booleanQuery.getClauses(i);
85+
BooleanClause.Builder clauseBuilder = clause.toBuilder();
86+
clauseBuilder.setQuery(stripScriptQuery(clause.getQuery()));
87+
booleanQueryBuilder.setClauses(i, clauseBuilder.build());
88+
}
89+
return booleanQueryBuilder.build();
90+
}
91+
92+
private static DisjunctionMaxQuery stripDisjunctionMaxQuery(
93+
DisjunctionMaxQuery disjunctionMaxQuery) {
94+
DisjunctionMaxQuery.Builder disjunctionMaxQueryBuilder = disjunctionMaxQuery.toBuilder();
95+
for (int i = 0; i < disjunctionMaxQuery.getDisjunctsCount(); i++) {
96+
disjunctionMaxQueryBuilder.setDisjuncts(
97+
i, stripScriptQuery(disjunctionMaxQuery.getDisjuncts(i)));
98+
}
99+
return disjunctionMaxQueryBuilder.build();
100+
}
101+
102+
private static NestedQuery stripNestedQuery(NestedQuery nestedQuery) {
103+
NestedQuery.Builder nestedQueryBuilder = nestedQuery.toBuilder();
104+
nestedQueryBuilder.setQuery(stripScriptQuery(nestedQuery.getQuery()));
105+
return nestedQueryBuilder.build();
106+
}
107+
108+
private static ConstantScoreQuery stripConstantScoreQuery(ConstantScoreQuery constantScoreQuery) {
109+
ConstantScoreQuery.Builder constantScoreQueryBuilder = constantScoreQuery.toBuilder();
110+
constantScoreQueryBuilder.setFilter(stripScriptQuery(constantScoreQuery.getFilter()));
111+
return constantScoreQueryBuilder.build();
112+
}
113+
}

src/test/java/com/yelp/nrtsearch/server/luceneserver/warming/WarmerTest.java

Lines changed: 67 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@
2424
import com.yelp.nrtsearch.server.backup.Archiver;
2525
import com.yelp.nrtsearch.server.backup.ArchiverImpl;
2626
import com.yelp.nrtsearch.server.backup.TarImpl;
27+
import com.yelp.nrtsearch.server.grpc.FunctionScoreQuery;
2728
import com.yelp.nrtsearch.server.grpc.Query;
29+
import com.yelp.nrtsearch.server.grpc.Script;
2830
import com.yelp.nrtsearch.server.grpc.SearchRequest;
2931
import com.yelp.nrtsearch.server.grpc.TermQuery;
3032
import com.yelp.nrtsearch.server.luceneserver.IndexState;
@@ -110,6 +112,37 @@ public void testWarmFromS3()
110112
verifyNoMoreInteractions(mockSearchHandler);
111113
}
112114

115+
@Test
116+
public void testWarmFromS3_basic()
117+
throws IOException, SearchHandler.SearchHandlerException, InterruptedException {
118+
Warmer warmerWithBasic = new Warmer(archiver, service, index, 2, 30);
119+
Path warmingQueriesDir = folder.newFolder("warming_queries").toPath();
120+
try (BufferedWriter writer =
121+
Files.newBufferedWriter(warmingQueriesDir.resolve("warming_queries.txt"))) {
122+
List<String> testSearchRequestsJson = getTestSearchRequestsAsJsonStrings();
123+
for (String line : testSearchRequestsJson) {
124+
writer.write(line);
125+
writer.newLine();
126+
}
127+
writer.flush();
128+
}
129+
String versionHash =
130+
archiver.upload(service, resource, warmingQueriesDir, List.of(), List.of(), false);
131+
archiver.blessVersion(service, resource, versionHash);
132+
133+
IndexState mockIndexState = mock(IndexState.class);
134+
SearchHandler mockSearchHandler = mock(SearchHandler.class);
135+
136+
// nextInt(100) for this seed is: 28, 33, 20, 10
137+
warmerWithBasic.randomThreadLocal.get().setSeed(1234);
138+
warmerWithBasic.warmFromS3(mockIndexState, 0, mockSearchHandler);
139+
140+
for (SearchRequest testRequest : getTestBasicSearchRequests()) {
141+
verify(mockSearchHandler).handle(mockIndexState, testRequest);
142+
}
143+
verifyNoMoreInteractions(mockSearchHandler);
144+
}
145+
113146
@Test
114147
public void testWarmFromS3_multiple()
115148
throws IOException, SearchHandler.SearchHandlerException, InterruptedException {
@@ -180,17 +213,47 @@ private List<SearchRequest> getTestSearchRequests() {
180213
.setIndexName(index)
181214
.setQuery(
182215
Query.newBuilder()
183-
.setTermQuery(TermQuery.newBuilder().setField("field" + i).build())
184-
.build())
216+
.setFunctionScoreQuery(
217+
FunctionScoreQuery.newBuilder()
218+
.setQuery(
219+
Query.newBuilder()
220+
.setTermQuery(TermQuery.newBuilder().setField("field" + i)))
221+
.setScript(Script.newBuilder().setLang("js").setSource("3 * 5"))))
185222
.build();
186223
testRequests.add(searchRequest);
187224
}
188225
return testRequests;
189226
}
190227

228+
private List<SearchRequest> getTestBasicSearchRequests() {
229+
List<SearchRequest> testRequests = new ArrayList<>();
230+
SearchRequest searchRequest =
231+
SearchRequest.newBuilder()
232+
.setIndexName(index)
233+
.setQuery(Query.newBuilder().setTermQuery(TermQuery.newBuilder().setField("field0")))
234+
.build();
235+
testRequests.add(searchRequest);
236+
237+
searchRequest =
238+
SearchRequest.newBuilder()
239+
.setIndexName(index)
240+
.setQuery(
241+
Query.newBuilder()
242+
.setFunctionScoreQuery(
243+
FunctionScoreQuery.newBuilder()
244+
.setQuery(
245+
Query.newBuilder()
246+
.setTermQuery(TermQuery.newBuilder().setField("field1")))
247+
.setScript(Script.newBuilder().setLang("js").setSource("3 * 5"))))
248+
.build();
249+
testRequests.add(searchRequest);
250+
251+
return testRequests;
252+
}
253+
191254
private List<String> getTestSearchRequestsAsJsonStrings() {
192255
return List.of(
193-
"{\"indexName\":\"test_index\",\"query\":{\"termQuery\":{\"field\":\"field0\"}}}",
194-
"{\"indexName\":\"test_index\",\"query\":{\"termQuery\":{\"field\":\"field1\"}}}");
256+
"{\"indexName\":\"test_index\",\"query\":{\"functionScoreQuery\":{\"query\":{\"termQuery\":{\"field\":\"field0\"}},\"script\":{\"lang\":\"js\",\"source\":\"3 * 5\"}}}}",
257+
"{\"indexName\":\"test_index\",\"query\":{\"functionScoreQuery\":{\"query\":{\"termQuery\":{\"field\":\"field1\"}},\"script\":{\"lang\":\"js\",\"source\":\"3 * 5\"}}}}");
195258
}
196259
}

0 commit comments

Comments
 (0)