Skip to content

Commit 0a70212

Browse files
authored
add bootstrap metrics to v0.x (#828)
bootstrap metrics
1 parent 8746589 commit 0a70212

File tree

8 files changed

+147
-10
lines changed

8 files changed

+147
-10
lines changed

src/main/java/com/yelp/nrtsearch/server/grpc/LuceneServer.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import com.google.protobuf.util.JsonFormat;
3636
import com.yelp.nrtsearch.LuceneServerModule;
3737
import com.yelp.nrtsearch.server.MetricsRequestHandler;
38+
import com.yelp.nrtsearch.server.Version;
3839
import com.yelp.nrtsearch.server.backup.Archiver;
3940
import com.yelp.nrtsearch.server.config.LuceneServerConfiguration;
4041
import com.yelp.nrtsearch.server.config.QueryCacheConfig;
@@ -72,6 +73,7 @@
7273
import io.grpc.stub.ServerCallStreamObserver;
7374
import io.grpc.stub.StreamObserver;
7475
import io.prometheus.client.CollectorRegistry;
76+
import io.prometheus.client.Gauge;
7577
import io.prometheus.client.hotspot.DefaultExports;
7678
import java.io.File;
7779
import java.io.FileOutputStream;
@@ -120,6 +122,8 @@ public LuceneServer(
120122

121123
@VisibleForTesting
122124
public void start() throws IOException {
125+
Gauge.Timer timer =
126+
BootstrapMetrics.nrtsearchBootstrapTimer.labels(Version.CURRENT.toString()).startTimer();
123127
List<Plugin> plugins = pluginsService.loadPlugins();
124128
String serviceName = luceneServerConfiguration.getServiceName();
125129
String nodeName = luceneServerConfiguration.getNodeName();
@@ -196,6 +200,7 @@ public void start() throws IOException {
196200
.start();
197201
logger.info(
198202
"Server started, listening on " + luceneServerConfiguration.getPort() + " for messages");
203+
timer.close();
199204
}
200205

201206
@VisibleForTesting
@@ -228,6 +233,8 @@ private void registerMetrics(GlobalState globalState) {
228233
// register thread pool metrics
229234
new ThreadPoolCollector().register(collectorRegistry);
230235
collectorRegistry.register(RejectionCounterWrapper.rejectionCounter);
236+
// register bootstrap metrics
237+
BootstrapMetrics.register(collectorRegistry);
231238
// register nrt metrics
232239
NrtMetrics.register(collectorRegistry);
233240
// register index metrics

src/main/java/com/yelp/nrtsearch/server/luceneserver/ShardState.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
*/
1616
package com.yelp.nrtsearch.server.luceneserver;
1717

18+
import com.yelp.nrtsearch.server.Version;
1819
import com.yelp.nrtsearch.server.config.LuceneServerConfiguration;
1920
import com.yelp.nrtsearch.server.grpc.DeadlineUtils;
2021
import com.yelp.nrtsearch.server.grpc.IndexLiveSettings;
@@ -26,10 +27,12 @@
2627
import com.yelp.nrtsearch.server.luceneserver.index.IndexStateManager;
2728
import com.yelp.nrtsearch.server.luceneserver.index.NrtIndexWriter;
2829
import com.yelp.nrtsearch.server.luceneserver.warming.WarmerConfig;
30+
import com.yelp.nrtsearch.server.monitoring.BootstrapMetrics;
2931
import com.yelp.nrtsearch.server.monitoring.IndexMetrics;
3032
import com.yelp.nrtsearch.server.utils.FileUtil;
3133
import com.yelp.nrtsearch.server.utils.HostPort;
3234
import io.grpc.StatusRuntimeException;
35+
import io.prometheus.client.Gauge;
3336
import java.io.Closeable;
3437
import java.io.IOException;
3538
import java.io.OutputStream;
@@ -966,8 +969,13 @@ public synchronized void startReplica(ReplicationServerClient primaryAddress, lo
966969
}
967970

968971
if (configuration.getSyncInitialNrtPoint()) {
972+
Gauge.Timer timer =
973+
BootstrapMetrics.initialNRTTimer
974+
.labels(name.split(":")[0], Version.CURRENT.toString())
975+
.startTimer();
969976
nrtReplicaNode.syncFromCurrentPrimary(
970977
configuration.getInitialSyncPrimaryWaitMs(), configuration.getInitialSyncMaxTimeMs());
978+
timer.close();
971979
}
972980

973981
startSearcherPruningThread(indexState.getGlobalState().getShutdownLatch());

src/main/java/com/yelp/nrtsearch/server/luceneserver/StartIndexHandler.java

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
*/
1616
package com.yelp.nrtsearch.server.luceneserver;
1717

18+
import com.yelp.nrtsearch.server.Version;
1819
import com.yelp.nrtsearch.server.backup.Archiver;
1920
import com.yelp.nrtsearch.server.grpc.Mode;
2021
import com.yelp.nrtsearch.server.grpc.ReplicationServerClient;
@@ -23,7 +24,10 @@
2324
import com.yelp.nrtsearch.server.grpc.StartIndexRequest;
2425
import com.yelp.nrtsearch.server.grpc.StartIndexResponse;
2526
import com.yelp.nrtsearch.server.luceneserver.index.IndexStateManager;
27+
import com.yelp.nrtsearch.server.luceneserver.state.BackendGlobalState;
28+
import com.yelp.nrtsearch.server.monitoring.BootstrapMetrics;
2629
import com.yelp.nrtsearch.server.utils.FileUtil;
30+
import io.prometheus.client.Gauge;
2731
import java.io.IOException;
2832
import java.nio.file.Files;
2933
import java.nio.file.Path;
@@ -90,13 +94,20 @@ public StartIndexResponse handle(IndexState indexState, StartIndexRequest startI
9094
Files.createDirectories(indexState.getRootDir());
9195
deleteDownloadedBackupDirectories(restoreIndex.getResourceName());
9296
}
93-
94-
dataPath =
95-
downloadArtifact(
96-
restoreIndex.getServiceName(),
97-
restoreIndex.getResourceName(),
98-
INDEXED_DATA_TYPE.DATA,
99-
restoreFromIncArchiver);
97+
try (Gauge.Timer _timer =
98+
BootstrapMetrics.dataRestoreTimer
99+
.labels(
100+
BackendGlobalState.getBaseIndexName(startIndexRequest.getIndexName()),
101+
startIndexRequest.getIndexName(),
102+
Version.CURRENT.toString())
103+
.startTimer()) {
104+
dataPath =
105+
downloadArtifact(
106+
restoreIndex.getServiceName(),
107+
restoreIndex.getResourceName(),
108+
INDEXED_DATA_TYPE.DATA,
109+
restoreFromIncArchiver);
110+
}
100111
} else {
101112
throw new IllegalStateException(
102113
"Index " + indexState.getName() + " already restored");

src/main/java/com/yelp/nrtsearch/server/luceneserver/index/ImmutableIndexState.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,6 @@ public void start(
401401
if (isStarted()) {
402402
throw new IllegalStateException("index \"" + getName() + "\" was already started");
403403
}
404-
405404
// restore data if provided
406405
if (dataPath != null) {
407406
restoreIndexData(dataPath, getRootDir());

src/main/java/com/yelp/nrtsearch/server/luceneserver/state/BackendGlobalState.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,22 @@ public static String getUniqueIndexName(String indexName, String id) {
9797
return indexName + "-" + id;
9898
}
9999

100+
/**
101+
* Get the base index name from the unique index identifier
102+
*
103+
* @param uniqueIndexName unique index identifier
104+
* @return index name
105+
*/
106+
public static String getBaseIndexName(String uniqueIndexName) {
107+
String[] parts = uniqueIndexName.split("-");
108+
// suffix is 32 hexadecimal characters with four hyphens, so if there are additional hyphens,
109+
// they belongs to the base index name.
110+
if (parts.length > 5) {
111+
return String.join("-", java.util.Arrays.copyOf(parts, parts.length - 5));
112+
}
113+
return uniqueIndexName; // Return original if it doesn't match expected format
114+
}
115+
100116
/**
101117
* Constructor.
102118
*

src/main/java/com/yelp/nrtsearch/server/luceneserver/warming/Warmer.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,14 @@
1919
import com.google.common.base.Strings;
2020
import com.google.protobuf.InvalidProtocolBufferException;
2121
import com.google.protobuf.util.JsonFormat;
22+
import com.yelp.nrtsearch.server.Version;
2223
import com.yelp.nrtsearch.server.backup.Archiver;
2324
import com.yelp.nrtsearch.server.grpc.SearchRequest;
2425
import com.yelp.nrtsearch.server.luceneserver.IndexState;
2526
import com.yelp.nrtsearch.server.luceneserver.SearchHandler;
27+
import com.yelp.nrtsearch.server.luceneserver.state.BackendGlobalState;
28+
import com.yelp.nrtsearch.server.monitoring.BootstrapMetrics;
29+
import io.prometheus.client.Gauge;
2630
import java.io.BufferedReader;
2731
import java.io.BufferedWriter;
2832
import java.io.IOException;
@@ -125,8 +129,14 @@ public synchronized void backupWarmingQueriesToS3(String service) throws IOExcep
125129

126130
public void warmFromS3(IndexState indexState, int parallelism)
127131
throws IOException, SearchHandler.SearchHandlerException, InterruptedException {
128-
SearchHandler searchHandler = new SearchHandler(indexState.getSearchThreadPoolExecutor(), true);
129-
warmFromS3(indexState, parallelism, searchHandler);
132+
try (Gauge.Timer _timer =
133+
BootstrapMetrics.warmingQueryTimer
134+
.labels(service, BackendGlobalState.getBaseIndexName(index), Version.CURRENT.toString())
135+
.startTimer()) {
136+
SearchHandler searchHandler =
137+
new SearchHandler(indexState.getSearchThreadPoolExecutor(), true);
138+
warmFromS3(indexState, parallelism, searchHandler);
139+
}
130140
}
131141

132142
@VisibleForTesting
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/*
2+
* Copyright 2025 Yelp Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.yelp.nrtsearch.server.monitoring;
17+
18+
import io.prometheus.client.CollectorRegistry;
19+
import io.prometheus.client.Gauge;
20+
21+
/**
22+
* Class for collecting the timing of the overall bootstrap time and the major components. The
23+
* timers shall only be updated during the bootstrap.
24+
*/
25+
public class BootstrapMetrics {
26+
27+
public static final Gauge nrtsearchBootstrapTimer =
28+
Gauge.build()
29+
.name("total_bootstrap_time_seconds")
30+
.help("timer to record the total bootstrap time.")
31+
.labelNames("nrtsearch_version")
32+
.create();
33+
34+
public static final Gauge pluginInitializationTimer =
35+
Gauge.build()
36+
.name("plugin_initialization_time_seconds")
37+
.help("timer to record the boostrap time spent on plugin initialization.")
38+
.labelNames("plugin_name", "plugin_version", "nrtsearch_version")
39+
.create();
40+
41+
public static final Gauge dataRestoreTimer =
42+
Gauge.build()
43+
.name("data_restore_time_seconds")
44+
.help(
45+
"timer to record the boostrap time spent on restoring the stored data from local or remote source.")
46+
.labelNames("index", "unique_index_name", "nrtsearch_version")
47+
.create();
48+
49+
public static final Gauge initialNRTTimer =
50+
Gauge.build()
51+
.name("initial_nrt_time_seconds")
52+
.help("timer to record the boostrap time spent on initial nrt")
53+
.labelNames("index", "nrtsearch_version")
54+
.create();
55+
56+
public static final Gauge warmingQueryTimer =
57+
Gauge.build()
58+
.name("warming_time_seconds")
59+
.help("timer to record the boostrap time spent on plugin initialization.")
60+
.labelNames("service", "index", "nrtsearch_version")
61+
.create();
62+
63+
/**
64+
* Add all bootstrap metrics to the collector registry.
65+
*
66+
* @param registry collector registry
67+
*/
68+
public static void register(CollectorRegistry registry) {
69+
registry.register(nrtsearchBootstrapTimer);
70+
registry.register(pluginInitializationTimer);
71+
registry.register(dataRestoreTimer);
72+
registry.register(initialNRTTimer);
73+
registry.register(warmingQueryTimer);
74+
}
75+
}

src/main/java/com/yelp/nrtsearch/server/plugins/PluginsService.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,12 @@
1515
*/
1616
package com.yelp.nrtsearch.server.plugins;
1717

18+
import static io.prometheus.client.Collector.NANOSECONDS_PER_SECOND;
19+
1820
import com.amazonaws.services.s3.AmazonS3;
21+
import com.yelp.nrtsearch.server.Version;
1922
import com.yelp.nrtsearch.server.config.LuceneServerConfiguration;
23+
import com.yelp.nrtsearch.server.monitoring.BootstrapMetrics;
2024
import io.prometheus.client.CollectorRegistry;
2125
import java.io.File;
2226
import java.net.MalformedURLException;
@@ -69,10 +73,17 @@ public List<Plugin> loadPlugins() {
6973
List<Plugin> loadedPlugins = new ArrayList<>();
7074
PluginDownloader pluginDownloader = new PluginDownloader(amazonS3, config);
7175
for (String plugin : config.getPlugins()) {
76+
long startNs = System.nanoTime();
7277
logger.info("Loading plugin: " + plugin);
7378
PluginDescriptor descriptor = loadPlugin(plugin, pluginSearchPath, pluginDownloader);
7479
loadedPluginDescriptors.add(descriptor);
7580
loadedPlugins.add(descriptor.getPlugin());
81+
BootstrapMetrics.pluginInitializationTimer
82+
.labels(
83+
descriptor.getPluginMetadata().getName(),
84+
descriptor.getPluginMetadata().getVersion(),
85+
Version.CURRENT.toString())
86+
.set((System.nanoTime() - startNs) / NANOSECONDS_PER_SECOND);
7687
}
7788
pluginDownloader.close();
7889
return loadedPlugins;

0 commit comments

Comments
 (0)