Skip to content

Commit 494c3c7

Browse files
committed
Add AccordExecutorMetrics
Also Introduce: - Sharded/LogLinearDecayingHistogram Also Improve: - Do not take a reference to CFK unless relevant Also Fix: - Sharded/LogLinearHistogram - ExecuteFlags serialization bug in ReadData patch by Benedict; reviewed by Alex Petrov for CASSANDRA-21017
1 parent cf80503 commit 494c3c7

22 files changed

+1452
-122
lines changed

src/java/org/apache/cassandra/metrics/AccordCacheMetrics.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ public static class AccordCacheGlobalMetrics
4444

4545
public AccordCacheGlobalMetrics()
4646
{
47-
DefaultNameFactory factory = new DefaultNameFactory("AccordCache");
47+
DefaultNameFactory factory = new DefaultNameFactory(ACCORD_CACHE);
4848
this.usedBytes = Metrics.gauge(factory.createMetricName("UsedBytes"), fromAccordService(sumExecutors(executor -> executor.cacheUnsafe().weightedSize()), 0L));
4949
this.unreferencedBytes = Metrics.gauge(factory.createMetricName("UnreferencedBytes"), fromAccordService(sumExecutors(executor -> executor.cacheUnsafe().unreferencedBytes()), 0L));
5050
}
@@ -89,7 +89,7 @@ public Shard(ShardedHitRate.HitRateShard hitRate, LogLinearHistogram objectSize)
8989

9090
public AccordCacheMetrics(String subTypeName)
9191
{
92-
DefaultNameFactory factory = new DefaultNameFactory("AccordCache", subTypeName);
92+
DefaultNameFactory factory = new DefaultNameFactory(ACCORD_CACHE, subTypeName);
9393
this.objectSize = Metrics.shardedHistogram(factory.createMetricName("EntrySize"));
9494
this.hits = Metrics.gauge(factory.createMetricName("Hits"), hitRate::totalHits);
9595
this.misses = Metrics.gauge(factory.createMetricName("Misses"), hitRate::totalMisses);

src/java/org/apache/cassandra/metrics/AccordCoordinatorMetrics.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ public class AccordCoordinatorMetrics
4242
{
4343
public final static AccordCoordinatorMetrics readMetrics = new AccordCoordinatorMetrics("ro");
4444
public final static AccordCoordinatorMetrics writeMetrics = new AccordCoordinatorMetrics("rw");
45+
public final static AccordCoordinatorMetrics syncPointMetrics = new AccordCoordinatorMetrics("rx");
4546

4647
public static final String ACCORD_COORDINATOR = "AccordCoordinator";
4748
public static final String COORDINATOR_EPOCHS = "Epochs";
@@ -192,7 +193,7 @@ public String toString()
192193
{
193194
throw new RuntimeException(e);
194195
}
195-
builder.append("]");
196+
builder.append(']');
196197
return builder.toString();
197198
}
198199

@@ -206,6 +207,8 @@ private AccordCoordinatorMetrics forTransaction(TxnId txnId)
206207
return writeMetrics;
207208
else if (txnId.isSomeRead())
208209
return readMetrics;
210+
else if (txnId.isSyncPoint())
211+
return syncPointMetrics;
209212
}
210213
return null;
211214
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.cassandra.metrics;
20+
21+
import java.util.concurrent.TimeUnit;
22+
23+
import com.codahale.metrics.Gauge;
24+
import org.apache.cassandra.metrics.ShardedDecayingHistograms.ShardedDecayingHistogram;
25+
import org.apache.cassandra.service.accord.AccordExecutor;
26+
27+
import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics;
28+
import static org.apache.cassandra.service.accord.AccordExecutor.HISTOGRAMS;
29+
30+
public class AccordExecutorMetrics
31+
{
32+
public static final String ACCORD_EXECUTOR = "AccordExecutor";
33+
public static final AccordExecutorMetrics INSTANCE = new AccordExecutorMetrics();
34+
35+
public final ShardedLongGauges<AccordExecutor> gauges = new ShardedLongGauges<>();
36+
37+
// latency
38+
public final ShardedDecayingHistogram elapsedPreparingToRun = HISTOGRAMS.newHistogram(TimeUnit.SECONDS.toNanos(1L));
39+
public final ShardedDecayingHistogram elapsedWaitingToRun = HISTOGRAMS.newHistogram(TimeUnit.SECONDS.toNanos(1L));
40+
public final ShardedDecayingHistogram elapsedRunning = HISTOGRAMS.newHistogram(TimeUnit.SECONDS.toNanos(1L));
41+
42+
// number of keys involved
43+
public final ShardedDecayingHistogram keys = HISTOGRAMS.newHistogram(1 << 12);
44+
45+
public final Gauge<Long> preparingToRun;
46+
public final Gauge<Long> waitingToRun;
47+
public final Gauge<Long> running;
48+
49+
public AccordExecutorMetrics()
50+
{
51+
DefaultNameFactory factory = new DefaultNameFactory(ACCORD_EXECUTOR);
52+
Metrics.register(factory.createMetricName("ElapsedPreparingToRun"), elapsedPreparingToRun);
53+
Metrics.register(factory.createMetricName("ElapsedWaitingToRun"), elapsedWaitingToRun);
54+
Metrics.register(factory.createMetricName("ElapsedRunning"), elapsedRunning);
55+
56+
Metrics.register(factory.createMetricName("Keys"), keys);
57+
preparingToRun = Metrics.register(factory.createMetricName("PreparingToRun"), gauges.newGauge(AccordExecutor::unsafePreparingToRunCount, Long::sum));
58+
waitingToRun = Metrics.register(factory.createMetricName("WaitingToRun"), gauges.newGauge(AccordExecutor::unsafeWaitingToRunCount, Long::sum));
59+
running = Metrics.register(factory.createMetricName("Running"), gauges.newGauge(AccordExecutor::unsafeRunningCount, Long::sum));
60+
}
61+
}

src/java/org/apache/cassandra/metrics/AccordReplicaMetrics.java

Lines changed: 80 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -25,63 +25,85 @@
2525
import accord.local.Command;
2626
import accord.local.SafeCommandStore;
2727
import accord.primitives.PartialDeps;
28-
import accord.primitives.Timestamp;
2928
import accord.primitives.TxnId;
3029
import com.codahale.metrics.Counting;
31-
import com.codahale.metrics.Histogram;
32-
import com.codahale.metrics.Timer;
33-
import org.apache.cassandra.service.accord.api.AccordTimeService;
30+
import org.apache.cassandra.metrics.LogLinearDecayingHistograms.LogLinearDecayingHistogram;
31+
import org.apache.cassandra.metrics.ShardedDecayingHistograms.DecayingHistogramsShard;
32+
import org.apache.cassandra.metrics.ShardedDecayingHistograms.ShardedDecayingHistogram;
33+
import org.apache.cassandra.service.accord.AccordCommandStore;
34+
import org.apache.cassandra.service.accord.AccordSafeCommandStore;
3435
import org.apache.cassandra.tracing.Tracing;
3536

3637
import static org.apache.cassandra.metrics.CassandraMetricsRegistry.Metrics;
38+
import static org.apache.cassandra.service.accord.AccordExecutor.HISTOGRAMS;
39+
import static org.apache.cassandra.utils.Clock.Global.currentTimeMillis;
3740

3841
public class AccordReplicaMetrics
3942
{
4043
public final static AccordReplicaMetrics readMetrics = new AccordReplicaMetrics("ro");
4144
public final static AccordReplicaMetrics writeMetrics = new AccordReplicaMetrics("rw");
45+
public final static AccordReplicaMetrics syncPointMetrics = new AccordReplicaMetrics("rx");
4246

4347
public static final String ACCORD_REPLICA = "AccordReplica";
4448
public static final String REPLICA_STABLE_LATENCY = "StableLatency";
4549
public static final String REPLICA_PREAPPLY_LATENCY = "PreApplyLatency";
4650
public static final String REPLICA_APPLY_LATENCY = "ApplyLatency";
47-
public static final String REPLICA_APPLY_DURATION = "ApplyDuration";
4851
public static final String REPLICA_DEPENDENCIES = "Dependencies";
4952

53+
static final class SubShard
54+
{
55+
final LogLinearDecayingHistogram stableLatency;
56+
final LogLinearDecayingHistogram preapplyLatency;
57+
final LogLinearDecayingHistogram applyLatency;
58+
final LogLinearDecayingHistogram dependencies;
59+
60+
private SubShard(AccordReplicaMetrics metrics, DecayingHistogramsShard shard)
61+
{
62+
this.stableLatency = metrics.stableLatency.forShard(shard);
63+
this.preapplyLatency = metrics.preapplyLatency.forShard(shard);
64+
this.applyLatency = metrics.applyLatency.forShard(shard);
65+
this.dependencies = metrics.dependencies.forShard(shard);
66+
}
67+
}
68+
69+
public static final class Shard
70+
{
71+
final SubShard reads, writes, syncPoints;
72+
public Shard(DecayingHistogramsShard shard)
73+
{
74+
reads = new SubShard(readMetrics, shard);
75+
writes = new SubShard(writeMetrics, shard);
76+
syncPoints = new SubShard(syncPointMetrics, shard);
77+
}
78+
}
79+
5080
/**
5181
* The time between start on the coordinator and commit on this replica.
5282
*/
53-
public final Timer stableLatency;
83+
public final ShardedDecayingHistogram stableLatency = HISTOGRAMS.newHistogram(TimeUnit.SECONDS.toNanos(1L));
5484

5585
/**
5686
* The time between start on the coordinator and arrival of the result on this replica.
5787
*/
58-
public final Timer preapplyLatency;
88+
public final ShardedDecayingHistogram preapplyLatency = HISTOGRAMS.newHistogram(TimeUnit.SECONDS.toNanos(1L));
5989

6090
/**
6191
* The time between start on the coordinator and application on this replica.
6292
*/
63-
public final Timer applyLatency;
64-
65-
/**
66-
* TODO (expected): probably more interesting is latency from preapplied to apply;
67-
* we already track local write latencies, whch this effectively duplicates (but including queueing latencies)
68-
* Duration of applying changes.
69-
*/
70-
public final Timer applyDuration;
93+
public final ShardedDecayingHistogram applyLatency = HISTOGRAMS.newHistogram(TimeUnit.SECONDS.toNanos(1L));
7194

7295
/**
7396
* A histogram of the number of dependencies per transaction at this replica.
7497
*/
75-
public final Histogram dependencies;
98+
public final ShardedDecayingHistogram dependencies = HISTOGRAMS.newHistogram(1 << 12);
7699

77100
private AccordReplicaMetrics(String scope)
78101
{
79102
DefaultNameFactory replica = new DefaultNameFactory(ACCORD_REPLICA, scope);
80-
stableLatency = Metrics.timer(replica.createMetricName(REPLICA_STABLE_LATENCY));
81-
preapplyLatency = Metrics.timer(replica.createMetricName(REPLICA_PREAPPLY_LATENCY));
82-
applyLatency = Metrics.timer(replica.createMetricName(REPLICA_APPLY_LATENCY));
83-
applyDuration = Metrics.timer(replica.createMetricName(REPLICA_APPLY_DURATION));
84-
dependencies = Metrics.histogram(replica.createMetricName(REPLICA_DEPENDENCIES), true);
103+
Metrics.register(replica.createMetricName(REPLICA_STABLE_LATENCY), stableLatency);
104+
Metrics.register(replica.createMetricName(REPLICA_PREAPPLY_LATENCY), preapplyLatency);
105+
Metrics.register(replica.createMetricName(REPLICA_APPLY_LATENCY), applyLatency);
106+
Metrics.register(replica.createMetricName(REPLICA_DEPENDENCIES), dependencies);
85107
}
86108

87109
@Override
@@ -106,64 +128,79 @@ public String toString()
106128
{
107129
throw new RuntimeException(e);
108130
}
109-
builder.append("]");
131+
builder.append(']');
110132
return builder.toString();
111133
}
112134

113135
public static class Listener implements ReplicaEventListener
114136
{
115-
private AccordReplicaMetrics forTransaction(TxnId txnId)
137+
private SubShard forTransaction(SafeCommandStore safeStore, TxnId txnId)
116138
{
117139
if (txnId != null)
118140
{
141+
Shard shard = ((AccordCommandStore) safeStore.commandStore()).executor().replicaMetrics;
119142
if (txnId.isWrite())
120-
return writeMetrics;
143+
return shard.writes;
121144
else if (txnId.isSomeRead())
122-
return readMetrics;
145+
return shard.reads;
146+
else if (txnId.isSyncPoint())
147+
return shard.syncPoints;
123148
}
124149
return null;
125150
}
126151

152+
private static long unixNanos()
153+
{
154+
return currentTimeMillis() * 1_000_000;
155+
}
156+
157+
private static long elapsed(TxnId txnId)
158+
{
159+
return elapsed(unixNanos(), txnId);
160+
}
161+
162+
private static long elapsed(long unixNanos, TxnId txnId)
163+
{
164+
return Math.max(0, unixNanos - (txnId.hlc() * 1000));
165+
}
166+
167+
private static LogLinearDecayingHistograms.Buffer buffer(SafeCommandStore safeStore)
168+
{
169+
return ((AccordSafeCommandStore) safeStore).histogramBuffer();
170+
}
171+
127172
@Override
128173
public void onStable(SafeCommandStore safeStore, Command cmd)
129174
{
130175
Tracing.trace("Stable {} on {}", cmd.txnId(), safeStore.commandStore());
131-
long now = AccordTimeService.nowMicros();
132-
AccordReplicaMetrics metrics = forTransaction(cmd.txnId());
176+
SubShard metrics = forTransaction(safeStore, cmd.txnId());
133177
if (metrics != null)
134-
{
135-
long trxTimestamp = cmd.txnId().hlc();
136-
metrics.stableLatency.update(now - trxTimestamp, TimeUnit.MICROSECONDS);
137-
}
178+
metrics.stableLatency.add(buffer(safeStore), elapsed(cmd.txnId()));
138179
}
139180

140181
@Override
141182
public void onPreApplied(SafeCommandStore safeStore, Command cmd)
142183
{
143184
Tracing.trace("Preapplied {} on {}", cmd.txnId(), safeStore.commandStore());
144-
long now = AccordTimeService.nowMicros();
145-
AccordReplicaMetrics metrics = forTransaction(cmd.txnId());
185+
SubShard metrics = forTransaction(safeStore, cmd.txnId());
146186
if (metrics != null)
147187
{
148-
Timestamp trxTimestamp = cmd.txnId();
149-
metrics.preapplyLatency.update(now - trxTimestamp.hlc(), TimeUnit.MICROSECONDS);
188+
long elapsed = elapsed(cmd.txnId());
189+
metrics.preapplyLatency.add(buffer(safeStore), elapsed);
150190
PartialDeps deps = cmd.partialDeps();
151-
metrics.dependencies.update(deps != null ? deps.txnIdCount() : 0);
191+
metrics.dependencies.add(buffer(safeStore), deps != null ? deps.txnIdCount() : 0);
152192
}
153193
}
154194

155195
@Override
156-
public void onApplied(SafeCommandStore safeStore, Command cmd, long applyStartedAt)
196+
public void onApplied(SafeCommandStore safeStore, Command cmd)
157197
{
158198
Tracing.trace("Applied {} on {}", cmd.txnId(), safeStore.commandStore());
159-
long now = AccordTimeService.nowMicros();
160-
AccordReplicaMetrics metrics = forTransaction(cmd.txnId());
199+
SubShard metrics = forTransaction(safeStore, cmd.txnId());
161200
if (metrics != null)
162201
{
163-
Timestamp trxTimestamp = cmd.txnId();
164-
metrics.applyLatency.update(now - trxTimestamp.hlc(), TimeUnit.MICROSECONDS);
165-
if (applyStartedAt > 0)
166-
metrics.applyDuration.update(now - applyStartedAt, TimeUnit.MICROSECONDS);
202+
long now = unixNanos();
203+
metrics.applyLatency.add(buffer(safeStore), elapsed(now, cmd.txnId()));
167204
}
168205
}
169206
}

src/java/org/apache/cassandra/metrics/CassandraMetricsRegistry.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ public class CassandraMetricsRegistry extends MetricRegistry
118118
.add(AccordCoordinatorMetrics.ACCORD_COORDINATOR)
119119
.add(AccordCacheMetrics.ACCORD_CACHE)
120120
.add(AccordReplicaMetrics.ACCORD_REPLICA)
121+
.add(AccordExecutorMetrics.ACCORD_EXECUTOR)
121122
.add(AccordSystemMetrics.ACCORD_SYSTEM)
122123
.add(BatchMetrics.TYPE_NAME)
123124
.add(BufferPoolMetrics.TYPE_NAME)

0 commit comments

Comments
 (0)