Skip to content

Commit afda625

Browse files
CNDB-12620 Users should get INDEX_BUILD_IN_PROGRESS instead of INDEX NOT AVAILABLE when they cannot query because an index is building.
We add a new reason, `RequestFailureReason.INDEX_BUILD_IN_PROGRESS`, to differentiate indexes that are currently building in error messages Co-authored-by: Sergio Bossa <[email protected]>
1 parent 4b16c61 commit afda625

File tree

7 files changed

+340
-55
lines changed

7 files changed

+340
-55
lines changed

src/java/org/apache/cassandra/exceptions/RequestFailureReason.java

Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,20 @@
1818
package org.apache.cassandra.exceptions;
1919

2020
import java.io.IOException;
21+
import java.util.HashMap;
22+
import java.util.Map;
2123

2224
import com.google.common.primitives.Ints;
2325

2426
import org.apache.cassandra.db.filter.TombstoneOverwhelmingException;
27+
import org.apache.cassandra.index.IndexBuildInProgressException;
28+
import org.apache.cassandra.index.IndexNotAvailableException;
2529
import org.apache.cassandra.index.sai.utils.AbortedOperationException;
2630
import org.apache.cassandra.io.IVersionedSerializer;
2731
import org.apache.cassandra.io.util.DataInputPlus;
2832
import org.apache.cassandra.io.util.DataOutputPlus;
2933
import org.apache.cassandra.utils.vint.VIntCoding;
3034

31-
import static java.lang.Math.max;
3235
import static org.apache.cassandra.net.MessagingService.VERSION_40;
3336

3437
public enum RequestFailureReason
@@ -37,10 +40,13 @@ public enum RequestFailureReason
3740
READ_TOO_MANY_TOMBSTONES (1),
3841
TIMEOUT (2),
3942
INCOMPATIBLE_SCHEMA (3),
40-
INDEX_NOT_AVAILABLE (4),
41-
UNKNOWN_COLUMN (5),
42-
UNKNOWN_TABLE (6),
43-
REMOTE_STORAGE_FAILURE (7);
43+
INDEX_NOT_AVAILABLE (6), // We match it to Apache Cassandra's INDEX_NOT_AVAILABLE code introduced in 5.0
44+
// The following codes are not present in Apache Cassandra's RequestFailureReason
45+
// We should add new codes in HCD (which do not exist in Apache Cassandra) only with big numbers, to avoid conflicts
46+
UNKNOWN_COLUMN (500),
47+
UNKNOWN_TABLE (501),
48+
REMOTE_STORAGE_FAILURE (502),
49+
INDEX_BUILD_IN_PROGRESS (503);
4450

4551
public static final Serializer serializer = new Serializer();
4652

@@ -58,26 +64,29 @@ public int codeForNativeProtocol()
5864
return code;
5965
}
6066

61-
private static final RequestFailureReason[] codeToReasonMap;
67+
private static final Map<Integer, RequestFailureReason> codeToReasonMap = new HashMap<>();
68+
private static final Map<Class<? extends Throwable>, RequestFailureReason> exceptionToReasonMap = new HashMap<>();
6269

6370
static
6471
{
6572
RequestFailureReason[] reasons = values();
6673

67-
int max = -1;
68-
for (RequestFailureReason r : reasons)
69-
max = max(r.code, max);
70-
71-
RequestFailureReason[] codeMap = new RequestFailureReason[max + 1];
72-
7374
for (RequestFailureReason reason : reasons)
7475
{
75-
if (codeMap[reason.code] != null)
76+
if (codeToReasonMap.put(reason.code, reason) != null)
7677
throw new RuntimeException("Two RequestFailureReason-s that map to the same code: " + reason.code);
77-
codeMap[reason.code] = reason;
7878
}
7979

80-
codeToReasonMap = codeMap;
80+
exceptionToReasonMap.put(TombstoneOverwhelmingException.class, READ_TOO_MANY_TOMBSTONES);
81+
exceptionToReasonMap.put(IncompatibleSchemaException.class, INCOMPATIBLE_SCHEMA);
82+
exceptionToReasonMap.put(AbortedOperationException.class, TIMEOUT);
83+
exceptionToReasonMap.put(IndexNotAvailableException.class, INDEX_NOT_AVAILABLE);
84+
exceptionToReasonMap.put(UnknownColumnException.class, UNKNOWN_COLUMN);
85+
exceptionToReasonMap.put(UnknownTableException.class, UNKNOWN_TABLE);
86+
exceptionToReasonMap.put(IndexBuildInProgressException.class, INDEX_BUILD_IN_PROGRESS);
87+
88+
if (exceptionToReasonMap.size() != reasons.length-2)
89+
throw new RuntimeException("A new RequestFailureReasons was probably added and you may need to update the exceptionToReasonMap");
8190
}
8291

8392
public static RequestFailureReason fromCode(int code)
@@ -86,19 +95,18 @@ public static RequestFailureReason fromCode(int code)
8695
throw new IllegalArgumentException("RequestFailureReason code must be non-negative (got " + code + ')');
8796

8897
// be forgiving and return UNKNOWN if we aren't aware of the code - for forward compatibility
89-
return code < codeToReasonMap.length ? codeToReasonMap[code] : UNKNOWN;
98+
return codeToReasonMap.getOrDefault(code, UNKNOWN);
9099
}
91100

92101
public static RequestFailureReason forException(Throwable t)
93102
{
94-
if (t instanceof TombstoneOverwhelmingException)
95-
return READ_TOO_MANY_TOMBSTONES;
96-
97-
if (t instanceof IncompatibleSchemaException)
98-
return INCOMPATIBLE_SCHEMA;
103+
RequestFailureReason r = exceptionToReasonMap.get(t.getClass());
104+
if (r != null)
105+
return r;
99106

100-
if (t instanceof AbortedOperationException)
101-
return TIMEOUT;
107+
for (Map.Entry<Class<? extends Throwable>, RequestFailureReason> entry : exceptionToReasonMap.entrySet())
108+
if (entry.getKey().isInstance(t))
109+
return entry.getValue();
102110

103111
return UNKNOWN;
104112
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Copyright DataStax, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.apache.cassandra.index;
18+
19+
import org.apache.cassandra.exceptions.RequestFailureReason;
20+
import org.apache.cassandra.exceptions.UncheckedInternalRequestExecutionException;
21+
22+
/**
23+
* Thrown if a secondary index is not currently available because it is building.
24+
*/
25+
public final class IndexBuildInProgressException extends UncheckedInternalRequestExecutionException
26+
{
27+
/**
28+
* Creates a new <code>IndexIsBuildingException</code> for the specified index.
29+
* @param index the index
30+
*/
31+
public IndexBuildInProgressException(Index index)
32+
{
33+
super(RequestFailureReason.INDEX_BUILD_IN_PROGRESS,
34+
String.format("The secondary index '%s' is not yet available as it is building", index.getIndexMetadata().name));
35+
}
36+
}

src/java/org/apache/cassandra/index/SecondaryIndexManager.java

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -357,17 +357,30 @@ public synchronized Future<?> addIndex(IndexMetadata indexDef, boolean isNewCF)
357357

358358
/**
359359
* Throws an {@link IndexNotAvailableException} if any of the indexes in the specified {@link Index.QueryPlan} is
360-
* not queryable, as it's defined by {@link #isIndexQueryable(Index)}.
360+
* not queryable, as it's defined by {@link #isIndexQueryable(Index)}. If the reason for the index to be not available
361+
* is that it's building, it will throw an {@link IndexBuildInProgressException}.
361362
*
362363
* @param queryPlan a query plan
363364
* @throws IndexNotAvailableException if the query plan has any index that is not queryable
364365
*/
365366
public void checkQueryability(Index.QueryPlan queryPlan)
366367
{
368+
InetAddressAndPort endpoint = FBUtilities.getBroadcastAddressAndPort();
369+
367370
for (Index index : queryPlan.getIndexes())
368371
{
372+
String indexName = index.getIndexMetadata().name;
373+
Index.Status indexStatus = getIndexStatus(endpoint, keyspace.getName(), indexName);
374+
369375
if (!isIndexQueryable(index))
376+
{
377+
// In Astra index can be queryable during index build, thus we need to check both not queryable and building
378+
// Plus isQueryable is always true for non-SAI index implementations
379+
if (indexStatus == Index.Status.FULL_REBUILD_STARTED)
380+
throw new IndexBuildInProgressException(index);
381+
370382
throw new IndexNotAvailableException(index);
383+
}
371384
}
372385
}
373386

@@ -839,6 +852,8 @@ private synchronized void markIndexFailed(Index index, boolean isInitialBuild)
839852

840853
if (!index.getSupportedLoadTypeOnFailure(isInitialBuild).supportsReads() && queryableIndexes.remove(indexName))
841854
logger.info("Index [{}] became not-queryable because of failed build.", indexName);
855+
856+
makeIndexNonQueryable(index, Index.Status.BUILD_FAILED);
842857
}
843858
}
844859

@@ -1810,13 +1825,18 @@ public static void shutdownAndWait(long timeout, TimeUnit units) throws Interrup
18101825
*/
18111826
public static <E extends Endpoints<E>> E filterForQuery(E liveEndpoints, Keyspace keyspace, Index.QueryPlan indexQueryPlan, ConsistencyLevel level)
18121827
{
1828+
Map<InetAddressAndPort, Index.Status> indexStatusMap = new HashMap<>();
1829+
18131830
E queryableEndpoints = liveEndpoints.filter(replica -> {
18141831

18151832
for (Index index : indexQueryPlan.getIndexes())
18161833
{
18171834
Index.Status status = getIndexStatus(replica.endpoint(), keyspace.getName(), index.getIndexMetadata().name);
18181835
if (!index.isQueryable(status))
1836+
{
1837+
indexStatusMap.put(replica.endpoint(), status);
18191838
return false;
1839+
}
18201840
}
18211841

18221842
return true;
@@ -1834,7 +1854,13 @@ public static <E extends Endpoints<E>> E filterForQuery(E liveEndpoints, Keyspac
18341854
{
18351855
Map<InetAddressAndPort, RequestFailureReason> failureReasons = new HashMap<>();
18361856
liveEndpoints.without(queryableEndpoints.endpoints())
1837-
.forEach(replica -> failureReasons.put(replica.endpoint(), RequestFailureReason.INDEX_NOT_AVAILABLE));
1857+
.forEach(replica -> {
1858+
Index.Status status = indexStatusMap.get(replica.endpoint());
1859+
if (status == Index.Status.FULL_REBUILD_STARTED)
1860+
failureReasons.put(replica.endpoint(), RequestFailureReason.INDEX_BUILD_IN_PROGRESS);
1861+
else
1862+
failureReasons.put(replica.endpoint(), RequestFailureReason.INDEX_NOT_AVAILABLE);
1863+
});
18381864

18391865
throw new ReadFailureException(level, filtered, required, false, failureReasons);
18401866
}
@@ -1941,9 +1967,6 @@ public synchronized static void receivePeerIndexStatus(InetAddressAndPort endpoi
19411967

19421968
private synchronized static void propagateLocalIndexStatus(String keyspace, String index, Index.Status status)
19431969
{
1944-
if (!Gossiper.instance.isEnabled())
1945-
return;
1946-
19471970
try
19481971
{
19491972
Map<String, Index.Status> states = peerIndexStatus.computeIfAbsent(FBUtilities.getBroadcastAddressAndPort(),
@@ -1955,6 +1978,11 @@ private synchronized static void propagateLocalIndexStatus(String keyspace, Stri
19551978
else
19561979
states.put(keyspaceIndex, status);
19571980

1981+
// Make sure to check this after peerIndexStatus is populated but before the JSON is built: we don't need
1982+
// the latter if there's no Gossiper (as in CNDB), so we can avoid the related CPU and memory usage.
1983+
if (!Gossiper.instance.isEnabled())
1984+
return;
1985+
19581986
String newStatus = JSONValue.toJSONString(states.entrySet().stream()
19591987
.collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().toString())));
19601988
statusPropagationExecutor.submit(() -> {

0 commit comments

Comments
 (0)