@@ -13,6 +13,7 @@ import (
1313 "os"
1414 "path"
1515 "strconv"
16+ "strings"
1617 "testing"
1718 "time"
1819
@@ -46,6 +47,7 @@ import (
4647 "github.com/cockroachdb/cockroach/pkg/util/hlc"
4748 "github.com/cockroachdb/cockroach/pkg/util/leaktest"
4849 "github.com/cockroachdb/cockroach/pkg/util/log"
50+ "github.com/cockroachdb/cockroach/pkg/util/metric"
4951 "github.com/cockroachdb/cockroach/pkg/util/randutil"
5052 "github.com/cockroachdb/cockroach/pkg/util/stop"
5153 "github.com/cockroachdb/cockroach/pkg/util/syncutil"
@@ -698,19 +700,91 @@ func testKVNemesisImpl(t testing.TB, cfg kvnemesisTestCfg) {
698700 env := & Env {SQLDBs : sqlDBs , Tracker : tr , L : logger , Partitioner : & partitioner , Restarter : tc }
699701 failures , err := RunNemesis (ctx , rng , env , config , cfg .concurrency , cfg .numSteps , cfg .mode , dbs ... )
700702
701- for i := 0 ; i < cfg .numNodes ; i ++ {
702- t .Logf ("[%d] proposed: %d" , i ,
703- tc .GetFirstStoreFromServer (t , i ).Metrics ().RaftCommandsProposed .Count ())
704- t .Logf ("[%d] reproposed unchanged: %d" , i ,
705- tc .GetFirstStoreFromServer (t , i ).Metrics ().RaftCommandsReproposed .Count ())
706- t .Logf ("[%d] reproposed with new LAI: %d" , i ,
707- tc .GetFirstStoreFromServer (t , i ).Metrics ().RaftCommandsReproposedLAI .Count ())
708- }
703+ logMetricsReport (t , tc )
709704
710705 require .NoError (t , err , `%+v` , err )
711706 require .Zero (t , len (failures ), "kvnemesis detected failures" ) // they've been logged already
712707}
713708
709+ func logMetricsReport (t testing.TB , tc * testcluster.TestCluster ) {
710+ metricsOfInterest := []string {
711+ // Raft command metrics
712+ "raft.commands.proposed" ,
713+ "raft.commands.reproposed.new-lai" ,
714+ "raft.commands.reproposed.unchanged" ,
715+ // Transaction metrics
716+ "txn.aborts" ,
717+ "txn.durations" ,
718+ "txn.restarts.writetooold" ,
719+ "txn.restarts.serializable" ,
720+ "txn.restarts.readwithinuncertainty" ,
721+ "txn.server_side.1PC.success" ,
722+ "txnrecovery.failures" ,
723+ "txnrecovery.successes.aborted" ,
724+ "txnrecovery.successes.committed" ,
725+ "txnwaitqueue.deadlocks_total" ,
726+ }
727+
728+ numNodes := tc .NumServers ()
729+ nodeMetrics := make ([]map [string ]string , numNodes )
730+ metricsMap := make (map [string ]struct {}, len (metricsOfInterest ))
731+ for _ , m := range metricsOfInterest {
732+ metricsMap [m ] = struct {}{}
733+ }
734+ for i := 0 ; i < numNodes ; i ++ {
735+ nodeMetrics [i ] = make (map [string ]string )
736+ processMetric := func (name string , v interface {}) {
737+ switch val := v .(type ) {
738+ case * metric.Counter :
739+ nodeMetrics [i ][name ] = fmt .Sprintf ("%d" , val .Count ())
740+ case * metric.Gauge :
741+ nodeMetrics [i ][name ] = fmt .Sprintf ("%d" , val .Value ())
742+ case metric.IHistogram :
743+ // TODO(mira): Currently, histograms are assumed to store values of type
744+ // time duration.
745+ snapshot := val .CumulativeSnapshot ()
746+ makePretty := func (nanos float64 ) time.Duration {
747+ return time .Duration (nanos ).Round (time .Millisecond )
748+ }
749+ meanDur := makePretty (snapshot .Mean ())
750+ p50Dur := makePretty (snapshot .ValueAtQuantile (50 ))
751+ p99Dur := makePretty (snapshot .ValueAtQuantile (99 ))
752+ nodeMetrics [i ][name ] = fmt .Sprintf ("μ=%v p50=%v p99=%v" , meanDur , p50Dur , p99Dur )
753+ default :
754+ nodeMetrics [i ][name ] = fmt .Sprintf ("unknown:%T" , v )
755+ }
756+ }
757+
758+ clientRegistry := tc .Server (i ).StorageLayer ().MetricsRecorder ().AppRegistry ()
759+ nodeRegistry := tc .Server (i ).StorageLayer ().MetricsRecorder ().NodeRegistry ()
760+ storeID := tc .GetFirstStoreFromServer (t , i ).StoreID ()
761+ storeRegistry := tc .Server (i ).StorageLayer ().MetricsRecorder ().StoreRegistry (storeID )
762+ clientRegistry .Select (metricsMap , func (name string , v interface {}) { processMetric (name , v ) })
763+ nodeRegistry .Select (metricsMap , func (name string , v interface {}) { processMetric (name , v ) })
764+ storeRegistry .Select (metricsMap , func (name string , v interface {}) { processMetric (name , v ) })
765+ }
766+
767+ header := fmt .Sprintf ("%-35s" , "Metric" )
768+ for i := 0 ; i < numNodes ; i ++ {
769+ header += fmt .Sprintf (" | %-30s" , fmt .Sprintf ("Node %d" , i + 1 ))
770+ }
771+ t .Log (header )
772+
773+ separator := strings .Repeat ("-" , 35 )
774+ for i := 0 ; i < numNodes ; i ++ {
775+ separator += "-+-" + strings .Repeat ("-" , 30 )
776+ }
777+ t .Log (separator )
778+
779+ for _ , metricName := range metricsOfInterest {
780+ row := fmt .Sprintf ("%-35s" , metricName )
781+ for i := 0 ; i < numNodes ; i ++ {
782+ row += fmt .Sprintf (" | %-30s" , nodeMetrics [i ][metricName ])
783+ }
784+ t .Log (row )
785+ }
786+ }
787+
714788// TestRunReproductionSteps is a helper that allows quickly running a kvnemesis
715789// history.
716790func TestRunReproductionSteps (t * testing.T ) {
0 commit comments