Skip to content

Commit a4fbad5

Browse files
SOLR-17198: AttributeFetcher no longer fails when it observes multiple shard leaders (#2335)
AffinityPlacementFactory can fail if Shard leadership changes occur while it is collecting metrics. Co-authored-by: Paul McArthur <[email protected]>
1 parent 3912cf6 commit a4fbad5

File tree

3 files changed

+108
-8
lines changed

3 files changed

+108
-8
lines changed

solr/CHANGES.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ Optimizations
2727

2828
Bug Fixes
2929
---------------------
30+
(No changes)
3031

3132
Deprecation Removals
3233
----------------------
@@ -139,6 +140,9 @@ Bug Fixes
139140

140141
* SOLR-17197: Fix getting fieldType by its name in FileBasedSpellChecker (Andrey Bozhko via Eric Pugh)
141142

143+
* SOLR-17198: AffinityPlacementFactory can fail if Shard leadership changes occur while it is collecting metrics.
144+
(Paul McArthur)
145+
142146
Dependency Upgrades
143147
---------------------
144148
(No changes)

solr/core/src/java/org/apache/solr/cluster/placement/impl/CollectionMetricsBuilder.java

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
*/
1717
package org.apache.solr.cluster.placement.impl;
1818

19+
import java.lang.invoke.MethodHandles;
1920
import java.util.HashMap;
2021
import java.util.Iterator;
2122
import java.util.Map;
@@ -24,10 +25,14 @@
2425
import org.apache.solr.cluster.placement.ReplicaMetric;
2526
import org.apache.solr.cluster.placement.ReplicaMetrics;
2627
import org.apache.solr.cluster.placement.ShardMetrics;
28+
import org.slf4j.Logger;
29+
import org.slf4j.LoggerFactory;
2730

2831
/** Builder class for constructing instances of {@link CollectionMetrics}. */
2932
public class CollectionMetricsBuilder {
3033

34+
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
35+
3136
final Map<String, ShardMetricsBuilder> shardMetricsBuilders = new HashMap<>();
3237

3338
public Map<String, ShardMetricsBuilder> getShardMetricsBuilders() {
@@ -78,15 +83,13 @@ public ShardMetrics build() {
7883
ReplicaMetrics metrics = replicaBuilder.build();
7984
metricsMap.put(name, metrics);
8085
if (replicaBuilder.leader) {
81-
if (leaderMetricsBuilder == null) {
82-
leaderMetricsBuilder = replicaBuilder;
83-
} else if (!leaderMetricsBuilder.replicaName.equals(replicaBuilder.replicaName)) {
84-
throw new RuntimeException(
85-
"two replicas claim to be the shard leader! existing="
86-
+ leaderMetricsBuilder
87-
+ " and current "
88-
+ replicaBuilder);
86+
if (leaderMetricsBuilder != null
87+
&& !leaderMetricsBuilder.replicaName.equals(replicaBuilder.replicaName)) {
88+
log.warn(
89+
"Multiple replicas claim to be shard leader, selecting the latest candidate ({}) for metrics purposes",
90+
replicaBuilder.replicaName);
8991
}
92+
leaderMetricsBuilder = replicaBuilder;
9093
}
9194
});
9295
final ReplicaMetrics finalLeaderMetrics =
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.solr.cluster.placement.impl;
18+
19+
import java.util.Arrays;
20+
import org.apache.solr.SolrTestCaseJ4;
21+
import org.apache.solr.cluster.placement.CollectionMetrics;
22+
import org.apache.solr.cluster.placement.ReplicaMetric;
23+
import org.apache.solr.cluster.placement.ReplicaMetrics;
24+
import org.apache.solr.cluster.placement.ShardMetrics;
25+
import org.junit.Test;
26+
27+
public class CollectionMetricsBuilderTest extends SolrTestCaseJ4 {
28+
29+
@Test
30+
public void testMultipleShardLeaders() {
31+
CollectionMetricsBuilder.ReplicaMetricsBuilder r1 =
32+
createReplicaMetricsBuilder(
33+
"r1", ReplicaMetricImpl.INDEX_SIZE_GB, 1.5 * MetricImpl.GB, true);
34+
CollectionMetricsBuilder.ReplicaMetricsBuilder r2 =
35+
createReplicaMetricsBuilder(
36+
"r2", ReplicaMetricImpl.INDEX_SIZE_GB, 2.5 * MetricImpl.GB, true);
37+
38+
CollectionMetrics metrics = collectionMetricsFromShardReplicaBuilders("shard1", r1, r2);
39+
ShardMetrics shardMetrics = metrics.getShardMetrics("shard1").get();
40+
41+
assertTrue("Shard metrics not found", shardMetrics.getLeaderMetrics().isPresent());
42+
assertTrue(
43+
"No metrics were present for leader replica", shardMetrics.getLeaderMetrics().isPresent());
44+
ReplicaMetrics leaderMetrics = shardMetrics.getLeaderMetrics().get();
45+
46+
// Both replicas claimed to be shard leader, so either metric value is acceptable, and an
47+
// exception should not be raised
48+
Double indexSize = leaderMetrics.getReplicaMetric(ReplicaMetricImpl.INDEX_SIZE_GB).get();
49+
assertTrue(
50+
"Metric value " + indexSize + " should have matched one of the replica's values",
51+
indexSize.equals(1.5) || indexSize.equals(2.5));
52+
}
53+
54+
@Test
55+
public void testNoShardLeader() {
56+
CollectionMetricsBuilder.ReplicaMetricsBuilder r1 =
57+
createReplicaMetricsBuilder(
58+
"r1", ReplicaMetricImpl.INDEX_SIZE_GB, 1.5 * MetricImpl.GB, false);
59+
CollectionMetricsBuilder.ReplicaMetricsBuilder r2 =
60+
createReplicaMetricsBuilder(
61+
"r2", ReplicaMetricImpl.INDEX_SIZE_GB, 2.5 * MetricImpl.GB, false);
62+
63+
CollectionMetrics metrics = collectionMetricsFromShardReplicaBuilders("shard1", r1, r2);
64+
assertTrue("Shard metrics not found", metrics.getShardMetrics("shard1").isPresent());
65+
ShardMetrics shardMetrics = metrics.getShardMetrics("shard1").get();
66+
67+
assertFalse(
68+
"No replica was leader, so leader metrics should not be present",
69+
shardMetrics.getLeaderMetrics().isPresent());
70+
}
71+
72+
private <T> CollectionMetricsBuilder.ReplicaMetricsBuilder createReplicaMetricsBuilder(
73+
final String name, final ReplicaMetric<T> metric, final T value, final boolean leader) {
74+
CollectionMetricsBuilder.ReplicaMetricsBuilder replicaMetricsBuilder =
75+
new CollectionMetricsBuilder.ReplicaMetricsBuilder(name);
76+
replicaMetricsBuilder.addMetric(metric, value);
77+
replicaMetricsBuilder.setLeader(leader);
78+
return replicaMetricsBuilder;
79+
}
80+
81+
private CollectionMetrics collectionMetricsFromShardReplicaBuilders(
82+
String shardName, CollectionMetricsBuilder.ReplicaMetricsBuilder... replicaMetrics) {
83+
CollectionMetricsBuilder.ShardMetricsBuilder shardMetricsBuilder =
84+
new CollectionMetricsBuilder.ShardMetricsBuilder(shardName);
85+
Arrays.stream(replicaMetrics)
86+
.forEach(r -> shardMetricsBuilder.replicaMetricsBuilders.put(r.replicaName, r));
87+
88+
CollectionMetricsBuilder builder = new CollectionMetricsBuilder();
89+
builder.shardMetricsBuilders.put(shardName, shardMetricsBuilder);
90+
91+
return builder.build();
92+
}
93+
}

0 commit comments

Comments
 (0)