Skip to content

Commit ae1c14c

Browse files
authored
CNDB-13565: allow to set memtable shard lock fairness via JMX and a system property (#1785)
### What is the issue Memtable shard lock (required for `put`) is non-fair. We suspect this leads to elevated latencies in case of bursty load, as in #13565 ### What does this PR fix and why was it fixed This change introduces `cassandra.trie.memtable.shard.lock.fairness system` property and `LockFairness` property of `org.apache.cassandra.db:type=TrieMemtableConfig` JMX object to configure it persistently or on-line. The on-line change is effective once a new memtable is created (i.e. after flush). If forcing a flush is not desired, one can watch `BytesFlushed` metric for the table
1 parent b667550 commit ae1c14c

File tree

5 files changed

+47
-10
lines changed

5 files changed

+47
-10
lines changed

src/java/org/apache/cassandra/config/CassandraRelevantProperties.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.util.concurrent.TimeUnit;
2222

2323
import org.apache.cassandra.concurrent.Stage;
24+
import org.apache.cassandra.db.memtable.TrieMemtable;
2425
import org.apache.cassandra.exceptions.ConfigurationException;
2526
import org.apache.cassandra.io.compress.AdaptiveCompressor;
2627
import org.apache.cassandra.io.compress.LZ4Compressor;
@@ -276,7 +277,7 @@ public enum CassandraRelevantProperties
276277

277278
/** Represents the maximum size (in bytes) of a serialized mutation that can be cached **/
278279
CACHEABLE_MUTATION_SIZE_LIMIT("cassandra.cacheable_mutation_size_limit_bytes", Long.toString(1_000_000)),
279-
280+
280281
MIGRATION_DELAY("cassandra.migration_delay_ms", "60000"),
281282
/** Defines how often schema definitions are pulled from the other nodes */
282283
SCHEMA_PULL_INTERVAL_MS("cassandra.schema_pull_interval_ms", "60000"),
@@ -316,7 +317,7 @@ public enum CassandraRelevantProperties
316317
* Number of polls without gossip state change to consider gossip as settled.
317318
*/
318319
GOSSIP_SETTLE_POLL_SUCCESSES_REQUIRED("cassandra.gossip_settle_poll_success_required", "3"),
319-
320+
320321
/** Which class to use for token metadata provider */
321322
CUSTOM_TMD_PROVIDER_PROPERTY("cassandra.custom_token_metadata_provider_class"),
322323

@@ -641,7 +642,17 @@ public enum CassandraRelevantProperties
641642
* Allows custom implementation of {@link OperationContext.Factory} to optionally create and configure custom
642643
* {@link OperationContext} instances.
643644
*/
644-
OPERATION_CONTEXT_FACTORY("cassandra.operation_context_factory_class");
645+
OPERATION_CONTEXT_FACTORY("cassandra.operation_context_factory_class"),
646+
647+
/**
648+
* Number of shards for TrieMemtable. If not specified, defaults to {@link TrieMemtable#autoShardCount}
649+
*/
650+
TRIE_MEMTABLE_SHARD_COUNT("cassandra.trie.memtable.shard.count"),
651+
652+
/**
653+
* Whether to use fair locking for TrieMemtable shard locks. Defaults to false.
654+
*/
655+
TRIE_MEMTABLE_SHARD_LOCK_FAIRNESS("cassandra.trie.memtable.shard.lock.fairness", "false");
645656

646657
CassandraRelevantProperties(String key, String defaultVal)
647658
{

src/java/org/apache/cassandra/db/memtable/TrieMemtable.java

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.slf4j.Logger;
3131
import org.slf4j.LoggerFactory;
3232

33+
import org.apache.cassandra.config.CassandraRelevantProperties;
3334
import org.apache.cassandra.config.DatabaseDescriptor;
3435
import org.apache.cassandra.db.BufferDecoratedKey;
3536
import org.apache.cassandra.db.ColumnFamilyStore;
@@ -49,7 +50,6 @@
4950
import org.apache.cassandra.db.partitions.TriePartitionUpdate;
5051
import org.apache.cassandra.db.partitions.TriePartitionUpdater;
5152
import org.apache.cassandra.db.rows.EncodingStats;
52-
import org.apache.cassandra.db.rows.Unfiltered;
5353
import org.apache.cassandra.db.rows.UnfilteredRowIterator;
5454
import org.apache.cassandra.db.tries.Direction;
5555
import org.apache.cassandra.db.tries.InMemoryTrie;
@@ -150,10 +150,8 @@ public class TrieMemtable extends AbstractAllocatorMemtable
150150
*/
151151
private volatile MemtableAverageRowSize estimatedAverageRowSize;
152152

153-
@VisibleForTesting
154-
public static final String SHARD_COUNT_PROPERTY = "cassandra.trie.memtable.shard.count";
155-
156-
public static volatile int SHARD_COUNT = Integer.getInteger(SHARD_COUNT_PROPERTY, autoShardCount());
153+
public static volatile int SHARD_COUNT = CassandraRelevantProperties.TRIE_MEMTABLE_SHARD_COUNT.getInt(autoShardCount());
154+
public static volatile boolean SHARD_LOCK_FAIRNESS = CassandraRelevantProperties.TRIE_MEMTABLE_SHARD_LOCK_FAIRNESS.getBoolean();
157155

158156
private static int autoShardCount()
159157
{
@@ -595,7 +593,7 @@ public static class MemtableShard
595593
private volatile int partitionCount = 0;
596594

597595
@Unmetered
598-
private ReentrantLock writeLock = new ReentrantLock();
596+
private ReentrantLock writeLock = new ReentrantLock(SHARD_LOCK_FAIRNESS);
599597

600598
// Content map for the given shard. This is implemented as a memtable trie which uses the prefix-free
601599
// byte-comparable ByteSource representations of the keys to address the partitions.
@@ -886,12 +884,14 @@ public void setShardCount(String shardCount)
886884
if ("auto".equalsIgnoreCase(shardCount))
887885
{
888886
SHARD_COUNT = autoShardCount();
887+
CassandraRelevantProperties.TRIE_MEMTABLE_SHARD_COUNT.setInt(SHARD_COUNT);
889888
}
890889
else
891890
{
892891
try
893892
{
894893
SHARD_COUNT = Integer.valueOf(shardCount);
894+
CassandraRelevantProperties.TRIE_MEMTABLE_SHARD_COUNT.setInt(SHARD_COUNT);
895895
}
896896
catch (NumberFormatException ex)
897897
{
@@ -908,6 +908,20 @@ public String getShardCount()
908908
{
909909
return "" + SHARD_COUNT;
910910
}
911+
912+
@Override
913+
public void setLockFairness(String fairness)
914+
{
915+
SHARD_LOCK_FAIRNESS = Boolean.parseBoolean(fairness);
916+
CassandraRelevantProperties.TRIE_MEMTABLE_SHARD_LOCK_FAIRNESS.setBoolean(SHARD_LOCK_FAIRNESS);
917+
logger.info("Requested setting shard lock fairness to {}; set to: {}", fairness, SHARD_LOCK_FAIRNESS);
918+
}
919+
920+
@Override
921+
public String getLockFairness()
922+
{
923+
return "" + SHARD_LOCK_FAIRNESS;
924+
}
911925
}
912926

913927
}

src/java/org/apache/cassandra/db/memtable/TrieMemtableConfigMXBean.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,8 @@ public interface TrieMemtableConfigMXBean
2323
public void setShardCount(String numShards);
2424

2525
public String getShardCount();
26+
27+
public void setLockFairness(String fairness);
28+
29+
public String getLockFairness();
2630
}

test/unit/org/apache/cassandra/cql3/validation/operations/AlterTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ public static void setUpClass()
5454
{
5555
// AlterTest uses Murmur3 partitioner, but injects OrderPreservingPartitioner.StringToken
5656
// into TokenMetadata; expect trouble
57-
System.setProperty(TrieMemtable.SHARD_COUNT_PROPERTY, "1");
57+
System.setProperty(CassandraRelevantProperties.TRIE_MEMTABLE_SHARD_COUNT.getKey(), "1");
5858
CQLTester.setUpClass();
5959
}
6060

test/unit/org/apache/cassandra/db/memtable/TrieMemtableConfigTest.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,4 +59,12 @@ public void testAutoShardCount() throws MalformedObjectNameException, Reflection
5959
jmxConnection.setAttribute(new ObjectName(TRIE_MEMTABLE_CONFIG_OBJECT_NAME), new Attribute("ShardCount", "auto"));
6060
assertEquals(4 * FBUtilities.getAvailableProcessors(), Integer.parseInt(new TrieMemtable.TrieMemtableConfig().getShardCount()));
6161
}
62+
63+
@Test
64+
public void testShardLockFairnessSetByJMX() throws MalformedObjectNameException, ReflectionException, AttributeNotFoundException, InstanceNotFoundException, MBeanException, IOException, InvalidAttributeValueException
65+
{
66+
assertEquals(false, Boolean.parseBoolean(new TrieMemtable.TrieMemtableConfig().getLockFairness()));
67+
jmxConnection.setAttribute(new ObjectName(TRIE_MEMTABLE_CONFIG_OBJECT_NAME), new Attribute("LockFairness", "TrUe"));
68+
assertEquals(true, Boolean.parseBoolean(new TrieMemtable.TrieMemtableConfig().getLockFairness()));
69+
}
6270
}

0 commit comments

Comments
 (0)