Skip to content

Commit f553ace

Browse files
committed
Avoid availability gap between UP and queryability marking for already built SAI indexes on bounce
patch by Caleb Rackliffe; reviewed by David Capwell and Dmitry Konstantinov for CASSANDRA-20732
1 parent 8de4c92 commit f553ace

File tree

7 files changed

+85
-10
lines changed

7 files changed

+85
-10
lines changed

CHANGES.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
5.0.5
2+
* Avoid availability gap between UP and queryability marking for already built SAI indexes on bounce (CASSANDRA-20732)
23
* Make Commitlog flush data safely in Direct IO mode (CASSANDRA-20692)
34
* Get SAI MemtableIndex refs before SSTableIndex refs at query time (CASSANDRA-20709)
45
* Fix MAX_SEGMENT_SIZE < chunkSize in MmappedRegions::updateState (CASSANDRA-20636)

src/java/org/apache/cassandra/index/sai/StorageAttachedIndex.java

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -331,9 +331,30 @@ public IndexMetadata getIndexMetadata()
331331
public Callable<?> getInitializationTask()
332332
{
333333
// New storage-attached indexes will be available for queries after on disk index data are built.
334-
// Memtable data will be indexed via flushing triggered by schema change
335-
// We only want to validate the index files if we are starting up
336-
IndexValidation validation = StorageService.instance.isStarting() ? IndexValidation.HEADER_FOOTER : IndexValidation.NONE;
334+
// Memtable data will be indexed via flushing triggered by schema change.
335+
// We only want to validate the index files if we are starting up.
336+
boolean isStarting = StorageService.instance.isStarting();
337+
IndexValidation validation = isStarting ? IndexValidation.HEADER_FOOTER : IndexValidation.NONE;
338+
339+
// Only attempt to make the index queryable if we are starting up. Otherwise, if we create a new index on top
340+
// of nothing but existing Memtable data (i.e. no SSTables), that data will temporarily be lost until flush.
341+
if (isStarting)
342+
{
343+
StorageAttachedIndexGroup indexGroup = StorageAttachedIndexGroup.getIndexGroup(baseCfs);
344+
assert indexGroup != null : "Index group does not exist for table " + baseCfs.keyspace + '.' + baseCfs.name;
345+
346+
Collection<SSTableReader> nonIndexed = findNonIndexedSSTables(baseCfs, indexGroup, validation);
347+
348+
if (nonIndexed.isEmpty())
349+
{
350+
// If the index is complete, mark it queryable and avoid an initial build:
351+
baseCfs.indexManager.makeIndexQueryable(this, Status.BUILD_SUCCEEDED);
352+
logger.debug(indexIdentifier.logMessage("Skipping initial build, as index is already queryable..."));
353+
initBuildStarted = true;
354+
return () -> ImmediateFuture.success(null);
355+
}
356+
}
357+
337358
return () -> startInitialBuild(baseCfs, validation).get();
338359
}
339360

@@ -843,15 +864,12 @@ private Future<?> startInitialBuild(ColumnFamilyStore baseCfs, IndexValidation v
843864
// Force another flush to make sure on disk index is generated for memtable data before marking it queryable.
844865
// In the case of offline scrub, there are no live memtables.
845866
if (!baseCfs.getTracker().getView().liveMemtables.isEmpty())
846-
{
847867
baseCfs.forceBlockingFlush(ColumnFamilyStore.FlushReason.INDEX_BUILD_STARTED);
848-
}
849868

850869
// It is now safe to flush indexes directly from flushing Memtables.
851870
initBuildStarted = true;
852871

853872
StorageAttachedIndexGroup indexGroup = StorageAttachedIndexGroup.getIndexGroup(baseCfs);
854-
855873
assert indexGroup != null : "Index group does not exist for table " + baseCfs.keyspace + '.' + baseCfs.name;
856874

857875
List<SSTableReader> nonIndexed = findNonIndexedSSTables(baseCfs, indexGroup, validation);
@@ -888,8 +906,7 @@ private Future<?> startPreJoinTask()
888906
}
889907

890908
StorageAttachedIndexGroup indexGroup = StorageAttachedIndexGroup.getIndexGroup(baseCfs);
891-
892-
assert indexGroup != null : "Index group does not exist for table";
909+
assert indexGroup != null : "Index group does not exist for table " + baseCfs.keyspace + '.' + baseCfs.name;
893910

894911
Collection<SSTableReader> nonIndexed = findNonIndexedSSTables(baseCfs, indexGroup, IndexValidation.HEADER_FOOTER);
895912

src/java/org/apache/cassandra/service/StorageService.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2029,6 +2029,12 @@ public void setNormalModeUnsafe()
20292029
setMode(Mode.NORMAL, true);
20302030
}
20312031

2032+
@VisibleForTesting
2033+
public void setStartingModeUnsafe()
2034+
{
2035+
setMode(Mode.STARTING, true);
2036+
}
2037+
20322038
private void setMode(Mode m, boolean log)
20332039
{
20342040
setMode(m, null, log);

test/unit/org/apache/cassandra/index/sai/cql/EmptyStringLifecycleTest.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,21 @@
1818

1919
package org.apache.cassandra.index.sai.cql;
2020

21+
import org.junit.BeforeClass;
2122
import org.junit.Test;
2223

2324
import org.apache.cassandra.cql3.UntypedResultSet;
2425
import org.apache.cassandra.index.sai.SAITester;
2526

2627
public class EmptyStringLifecycleTest extends SAITester
2728
{
29+
@BeforeClass
30+
public static void setup()
31+
{
32+
setUpClass();
33+
requireNetwork(); // Ensure the node has advanced out of STARTING mode
34+
}
35+
2836
@Test
2937
public void testBeforeAndAfterFlush()
3038
{

test/unit/org/apache/cassandra/index/sai/cql/StorageAttachedIndexDDLTest.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
import org.apache.cassandra.io.sstable.format.SSTableReader;
7777
import org.apache.cassandra.schema.SchemaConstants;
7878
import org.apache.cassandra.schema.TableMetadata;
79+
import org.apache.cassandra.service.StorageService;
7980
import org.apache.cassandra.utils.Throwables;
8081
import org.assertj.core.api.Assertions;
8182
import org.mockito.Mockito;
@@ -1311,6 +1312,40 @@ public void nodetoolStopInitialIndexBuild() throws Throwable
13111312
assertTrue(verifyChecksum(numericIndexTermType, numericIndexIdentifier));
13121313
}
13131314

1315+
@Test
1316+
public void shouldMarkQueryableInInitializationTask() throws Throwable
1317+
{
1318+
createTable(CREATE_TABLE_TEMPLATE);
1319+
disableCompaction(KEYSPACE);
1320+
IndexIdentifier idxIdentifier = createIndexIdentifier(createIndexAsync(String.format(CREATE_INDEX_TEMPLATE, "v1")));
1321+
1322+
// create 10 SSTables
1323+
for (int i = 0; i < 10; i++)
1324+
{
1325+
execute("INSERT INTO %s (id1, v1, v2) VALUES (?, ?, ?)", String.valueOf(i), i, String.valueOf(i));
1326+
flush();
1327+
}
1328+
1329+
ResultSet rows = executeNet("SELECT id1 FROM %s WHERE v1 >= 5");
1330+
assertEquals(5, rows.all().size());
1331+
1332+
// Make the index artificially non-queryable:
1333+
ColumnFamilyStore cfs = getCurrentColumnFamilyStore();
1334+
Index index = cfs.indexManager.getIndexByName(idxIdentifier.indexName);
1335+
cfs.indexManager.makeIndexNonQueryable(index, Index.Status.BUILD_FAILED);
1336+
1337+
// Query should fail with the index in an artificially non-queryable state:
1338+
assertThatThrownBy(() -> executeNet("SELECT id1 FROM %s WHERE v1 >= 5")).isInstanceOf(ReadFailureException.class);
1339+
1340+
// Node must be in STARTING mode for it to be necessary for the initialization task to pre-emptively validate:
1341+
StorageService.instance.setStartingModeUnsafe();
1342+
// Simply getting the initialization task (and not running it) will validate and mark the index queryable again:
1343+
cfs.indexManager.buildIndex(index);
1344+
StorageService.instance.setNormalModeUnsafe();
1345+
rows = executeNet("SELECT id1 FROM %s WHERE v1 >= 5");
1346+
assertEquals(5, rows.all().size());
1347+
}
1348+
13141349
@Test
13151350
public void shouldRejectQueriesWithCustomExpressions()
13161351
{

test/unit/org/apache/cassandra/index/sai/functional/FailureTest.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
*/
2121
package org.apache.cassandra.index.sai.functional;
2222

23+
import org.junit.BeforeClass;
2324
import org.junit.Test;
2425

2526
import org.apache.cassandra.db.marshal.Int32Type;
@@ -36,6 +37,13 @@
3637

3738
public class FailureTest extends SAITester
3839
{
40+
@BeforeClass
41+
public static void setup()
42+
{
43+
setUpClass();
44+
requireNetwork(); // Ensure the node has advanced out of STARTING mode
45+
}
46+
3947
@Test
4048
public void shouldMakeIndexNonQueryableOnSSTableContextFailureDuringFlush() throws Throwable
4149
{

test/unit/org/apache/cassandra/index/sai/virtual/IndexesSystemViewTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import org.junit.BeforeClass;
2222
import org.junit.Test;
2323

24-
import org.apache.cassandra.cql3.CQLTester;
2524
import org.apache.cassandra.db.ColumnFamilyStore;
2625
import org.apache.cassandra.db.virtual.VirtualKeyspace;
2726
import org.apache.cassandra.db.virtual.VirtualKeyspaceRegistry;
@@ -60,7 +59,8 @@ public static void setup()
6059
{
6160
VirtualKeyspaceRegistry.instance.register(new VirtualKeyspace(SchemaConstants.VIRTUAL_VIEWS, ImmutableList.of(new ColumnIndexesSystemView(SchemaConstants.VIRTUAL_VIEWS))));
6261

63-
CQLTester.setUpClass();
62+
setUpClass();
63+
requireNetwork(); // Ensure the node has advanced out of STARTING mode
6464
}
6565

6666
@Test

0 commit comments

Comments
 (0)