smartcontractkit
diff --git a/‎book/src/libs/wasp/benchspy/first_test.md‎
Lines changed: 2 additions & 2 deletions b/‎book/src/libs/wasp/benchspy/first_test.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎book/src/libs/wasp/benchspy/loki_custom.md‎
Lines changed: 1 addition & 1 deletion b/‎book/src/libs/wasp/benchspy/loki_custom.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎book/src/libs/wasp/benchspy/loki_std.md‎
Lines changed: 26 additions & 9 deletions b/‎book/src/libs/wasp/benchspy/loki_std.md‎
Lines changed: 26 additions & 9 deletions
diff --git a/‎book/src/libs/wasp/benchspy/prometheus_std.md‎
Lines changed: 2 additions & 0 deletions b/‎book/src/libs/wasp/benchspy/prometheus_std.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎book/src/libs/wasp/benchspy/simplest_metrics.md‎
Lines changed: 8 additions & 4 deletions b/‎book/src/libs/wasp/benchspy/simplest_metrics.md‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎wasp/benchspy/direct.go‎
Lines changed: 12 additions & 8 deletions b/‎wasp/benchspy/direct.go‎
Lines changed: 12 additions & 8 deletions
diff --git a/‎wasp/benchspy/direct_test.go‎
Lines changed: 3 additions & 3 deletions b/‎wasp/benchspy/direct_test.go‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎wasp/benchspy/loki.go‎
Lines changed: 25 additions & 14 deletions b/‎wasp/benchspy/loki.go‎
Lines changed: 25 additions & 14 deletions
diff --git a/‎wasp/benchspy/loki_test.go‎
Lines changed: 31 additions & 8 deletions b/‎wasp/benchspy/loki_test.go‎
Lines changed: 31 additions & 8 deletions
@@ -43,7 +43,7 @@ With load data available, let's generate a baseline performance report and store
 ```go
 baseLineReport, err := benchspy.NewStandardReport(
     // random hash, this should be the commit or hash of the Application Under Test (AUT)
-    "e7fc5826a572c09f8b93df3b9f674113372ce924",
+    "v1.0.0",
     // use built-in queries for an executor that fetches data directly from the WASP generator
     benchspy.WithStandardQueries(benchspy.StandardQueryExecutor_Direct),
     // WASP generators
@@ -98,7 +98,7 @@ defer cancelFn()
 currentReport, previousReport, err := benchspy.FetchNewStandardReportAndLoadLatestPrevious(
     fetchCtx,
     // commit or tag of the new application version
-    "e7fc5826a572c09f8b93df3b9f674113372ce925",
+    "v2.0.0",
     benchspy.WithStandardQueries(benchspy.StandardQueryExecutor_Direct),
     benchspy.WithGenerators(newGen),
 )
 
@@ -29,7 +29,7 @@ Now, let’s create a `StandardReport` using our custom queries:
 
 ```go
 baseLineReport, err := benchspy.NewStandardReport(
-    "2d1fa3532656c51991c0212afce5f80d2914e34e",
+    "v1.0.0",
     // notice the different functional option used to pass Loki executor with custom queries
     benchspy.WithQueryExecutors(lokiQueryExecutor),
     benchspy.WithGenerators(gen),
 
@@ -44,7 +44,7 @@ require.NoError(t, err)
 gen.Run(true)
 
 baseLineReport, err := benchspy.NewStandardReport(
-    "c2cf545d733eef8bad51d685fcb302e277d7ca14",
+    "v1.0.0",
     // notice the different standard query executor type
     benchspy.WithStandardQueries(benchspy.StandardQueryExecutor_Loki),
     benchspy.WithGenerators(gen),
@@ -67,16 +67,27 @@ Since the next steps are very similar to those in the first test, we’ll skip t
 By default, the `LokiQueryExecutor` returns results as the `[]string` data type. Let’s use dedicated convenience functions to cast them from `interface{}` to string slices:
 
 ```go
-currentAsStringSlice := benchspy.MustAllLokiResults(currentReport)
-previousAsStringSlice := benchspy.MustAllLokiResults(previousReport)
+allCurrentAsStringSlice := benchspy.MustAllLokiResults(currentReport)
+allPreviousAsStringSlice := benchspy.MustAllLokiResults(previousReport)
+
+require.NotEmpty(t, allCurrentAsStringSlice, "current report is empty")
+require.NotEmpty(t, allPreviousAsStringSlice, "previous report is empty")
+
+currentAsStringSlice := allCurrentAsStringSlice[gen.Cfg.GenName]
+previousAsStringSlice := allPreviousAsStringSlice[gen.Cfg.GenName]
 ```
 
+An explanation is needed here: this function separates metrics for each generator, hence it returns a `map[string]map[string][]string`. Let's break it down:
+- outer map's key is generator name
+- inner map's key is metric name and the value is a series of measurements
+In our case there's only a single generator, but in a complex test there might be a few.
+
 ## Step 4: Compare Metrics
 
 Now, let’s compare metrics. Since we have `[]string`, we’ll first convert it to `[]float64`, calculate the median, and ensure the difference between the averages is less than 1%. Again, this is just an example—you should decide the best way to validate your metrics. Here we are explicitly aggregating them using an average to get a single number representation of each metric, but for your case a median or percentile or yet some other aggregate might be more appropriate.
 
 ```go
-var compareAverages = func(t *testing.T, metricName string, currentAsStringSlice, previousAsStringSlice map[string][]string) {
+var compareAverages = func(t *testing.T, metricName string, currentAsStringSlice, previousAsStringSlice map[string][]string, maxPrecentageDiff float64) {
 	require.NotEmpty(t, currentAsStringSlice[metricName], "%s results were missing from current report", metricName)
 	require.NotEmpty(t, previousAsStringSlice[metricName], "%s results were missing from previous report", metricName)
 
@@ -98,13 +109,19 @@ var compareAverages = func(t *testing.T, metricName string, currentAsStringSlice
 	} else {
 		diffPrecentage = 100.0
 	}
-	assert.LessOrEqual(t, math.Abs(diffPrecentage), 1.0, "%s medians are more than 1% different", metricName, fmt.Sprintf("%.4f", diffPrecentage))
+	assert.LessOrEqual(t, math.Abs(diffPrecentage), maxPrecentageDiff, "%s medians are more than 1% different", metricName, fmt.Sprintf("%.4f", diffPrecentage))
 }
 
-compareAverages(t, string(benchspy.MedianLatency), currentAsStringSlice, previousAsStringSlice)
-compareAverages(t, string(benchspy.Percentile95Latency), currentAsStringSlice, previousAsStringSlice)
-compareAverages(t, string(benchspy.MaxLatency), currentAsStringSlice, previousAsStringSlice)
-compareAverages(t, string(benchspy.ErrorRate), currentAsStringSlice, previousAsStringSlice)
+compareAverages(
+    t,
+    string(benchspy.MedianLatency),
+    currentAsStringSlice,
+    previousAsStringSlice,
+    1.0,
+)
+compareAverages(t, string(benchspy.Percentile95Latency), currentAsStringSlice, previousAsStringSlice, 1.0)
+compareAverages(t, string(benchspy.MaxLatency), currentAsStringSlice, previousAsStringSlice, 1.0)
+compareAverages(t, string(benchspy.ErrorRate), currentAsStringSlice, previousAsStringSlice, 1.0)
 ```
 
 > [!WARNING]
 
@@ -95,6 +95,8 @@ currentMedianCPUUsageVector := currentMedianCPUUsage.(model.Vector)
 previousMedianCPUUsageVector := previousMedianCPUUsage.(model.Vector)
 ```
 
+Since these metrics are not related to load generation, the convenience function a `map[string](model.Value)`, where key is resource metric name.
+
 > [!WARNING]
 > All standard Prometheus metrics bundled with `BenchSpy` return `model.Vector`.
 > However, if you use custom queries, you must manually verify their return types.
 
@@ -23,6 +23,8 @@ hasErrors, errors := benchspy.CompareDirectWithThresholds(
 require.False(t, hasErrors, fmt.Sprintf("errors found: %v", errors))
 ```
 
+If there are errors they will be returned as `map[string][]errors`, where key is the name of a generator.
+
 > [!NOTE]
 > Both `Direct` and `Loki` query executors support following standard performance metrics out of the box:
 > - `median_latency`
@@ -32,14 +34,16 @@ require.False(t, hasErrors, fmt.Sprintf("errors found: %v", errors))
 
 The function also prints a table with the differences between two reports, regardless whether they were meaningful:
 ```bash
+Generator: vu1
+==============
 +-------------------------+---------+---------+---------+
-|         METRIC          | V1.0.0  | V1.1.0  | DIFF %  |
+|         METRIC          |   V1    |   V2    | DIFF %  |
 +-------------------------+---------+---------+---------+
-| median_latency          | 50.4256 | 61.0009 | 20.9722 |
+| median_latency          | 50.1300 | 50.1179 | -0.0242 |
 +-------------------------+---------+---------+---------+
-| 95th_percentile_latency | 51.0082 | 61.1052 | 19.7949 |
+| 95th_percentile_latency | 50.7387 | 50.7622 | 0.0463  |
 +-------------------------+---------+---------+---------+
-| max_latency             | 52.1362 | 61.2028 | 17.3903 |
+| max_latency             | 55.7195 | 51.7248 | -7.1692 |
 +-------------------------+---------+---------+---------+
 | error_rate              | 0.0000  | 0.0000  | 0.0000  |
 +-------------------------+---------+---------+---------+
 
@@ -12,7 +12,7 @@ import (
 	"github.com/smartcontractkit/chainlink-testing-framework/wasp"
 )
 
-type DirectQueryFn = func(responses *wasp.SliceBuffer[wasp.Response]) (float64, error)
+type DirectQueryFn = func(responses *wasp.SliceBuffer[*wasp.Response]) (float64, error)
 
 type DirectQueryExecutor struct {
 	KindName     string                   `json:"kind"`
@@ -49,6 +49,10 @@ func NewDirectQueryExecutor(generator *wasp.Generator, queries map[string]Direct
 	return g, nil
 }
 
+func (g *DirectQueryExecutor) GeneratorName() string {
+	return g.Generator.Cfg.GenName
+}
+
 // Results returns the query results as a map of string keys to interface{} values.
 // It allows users to access the outcomes of executed queries, facilitating further processing or type assertions.
 func (g *DirectQueryExecutor) Results() map[string]interface{} {
@@ -122,14 +126,14 @@ func (g *DirectQueryExecutor) Execute(_ context.Context) error {
 			return fmt.Errorf("generator %s has no data", g.Generator.Cfg.GenName)
 		}
 		length := len(g.Generator.GetData().FailResponses.Data) + len(g.Generator.GetData().OKData.Data)
-		allResponses := wasp.NewSliceBuffer[wasp.Response](length)
+		allResponses := wasp.NewSliceBuffer[*wasp.Response](length)
 
 		for _, response := range g.Generator.GetData().OKResponses.Data {
-			allResponses.Append(*response)
+			allResponses.Append(response)
 		}
 
 		for _, response := range g.Generator.GetData().FailResponses.Data {
-			allResponses.Append(*response)
+			allResponses.Append(response)
 		}
 
 		if len(allResponses.Data) == 0 {
@@ -170,7 +174,7 @@ func (g *DirectQueryExecutor) generateStandardQueries() (map[string]DirectQueryF
 func (g *DirectQueryExecutor) standardQuery(standardMetric StandardLoadMetric) (DirectQueryFn, error) {
 	switch standardMetric {
 	case MedianLatency:
-		medianFn := func(responses *wasp.SliceBuffer[wasp.Response]) (float64, error) {
+		medianFn := func(responses *wasp.SliceBuffer[*wasp.Response]) (float64, error) {
 			var asMiliDuration []float64
 			for _, response := range responses.Data {
 				// get duration as nanoseconds and convert to milliseconds in order to not lose precision
@@ -182,7 +186,7 @@ func (g *DirectQueryExecutor) standardQuery(standardMetric StandardLoadMetric) (
 		}
 		return medianFn, nil
 	case Percentile95Latency:
-		p95Fn := func(responses *wasp.SliceBuffer[wasp.Response]) (float64, error) {
+		p95Fn := func(responses *wasp.SliceBuffer[*wasp.Response]) (float64, error) {
 			var asMiliDuration []float64
 			for _, response := range responses.Data {
 				// get duration as nanoseconds and convert to milliseconds in order to not lose precision
@@ -194,7 +198,7 @@ func (g *DirectQueryExecutor) standardQuery(standardMetric StandardLoadMetric) (
 		}
 		return p95Fn, nil
 	case MaxLatency:
-		maxFn := func(responses *wasp.SliceBuffer[wasp.Response]) (float64, error) {
+		maxFn := func(responses *wasp.SliceBuffer[*wasp.Response]) (float64, error) {
 			var asMiliDuration []float64
 			for _, response := range responses.Data {
 				// get duration as nanoseconds and convert to milliseconds in order to not lose precision
@@ -206,7 +210,7 @@ func (g *DirectQueryExecutor) standardQuery(standardMetric StandardLoadMetric) (
 		}
 		return maxFn, nil
 	case ErrorRate:
-		errorRateFn := func(responses *wasp.SliceBuffer[wasp.Response]) (float64, error) {
+		errorRateFn := func(responses *wasp.SliceBuffer[*wasp.Response]) (float64, error) {
 			if len(responses.Data) == 0 {
 				return 0, nil
 			}
 
@@ -234,7 +234,7 @@ func TestBenchSpy_DirectQueryExecutor_Execute(t *testing.T) {
 		// 4 responses with ~150ms latency (150ms sleep + some execution overhead)
 		// and 2-3 responses with ~200ms latency (200ms sleep + some execution overhead)
 		// expected median latency: (150ms, 151ms>
-		resultsAsFloats, err := ResultsAs(0.0, []QueryExecutor{executor}, StandardQueryExecutor_Direct, string(MedianLatency), string(Percentile95Latency), string(ErrorRate))
+		resultsAsFloats, err := ResultsAs(0.0, executor, string(MedianLatency), string(Percentile95Latency), string(ErrorRate))
 		assert.NoError(t, err)
 		require.Equal(t, 3, len(resultsAsFloats))
 		require.InDelta(t, 151.0, resultsAsFloats[string(MedianLatency)], 1.0)
@@ -342,10 +342,10 @@ func TestBenchSpy_DirectQueryExecutor_MarshalJSON(t *testing.T) {
 		original.QueryResults["test2"] = 12.1
 
 		original.Queries = map[string]DirectQueryFn{
-			"test": func(responses *wasp.SliceBuffer[wasp.Response]) (float64, error) {
+			"test": func(responses *wasp.SliceBuffer[*wasp.Response]) (float64, error) {
 				return 2.0, nil
 			},
-			"test2": func(responses *wasp.SliceBuffer[wasp.Response]) (float64, error) {
+			"test2": func(responses *wasp.SliceBuffer[*wasp.Response]) (float64, error) {
 				return 12.1, nil
 			},
 		}
 
@@ -23,34 +23,36 @@ var (
 	Loki_ErrorRate   = `sum(max_over_time({branch=~"%s", commit=~"%s", go_test_name=~"%s", test_data_type=~"stats", gen_name=~"%s"} | json| unwrap failed [%s]) by (node_id, go_test_name, gen_name)) by (__stream_shard__)`
 )
 
-// NewLokiQueryExecutor creates a new LokiQueryExecutor instance.
-// It initializes the executor with provided queries and Loki configuration,
-// enabling efficient querying of logs from Loki in a structured manner.
-func NewLokiQueryExecutor(queries map[string]string, lokiConfig *wasp.LokiConfig) *LokiQueryExecutor {
+func NewLokiQueryExecutor(generatorName string, queries map[string]string, lokiConfig *wasp.LokiConfig) *LokiQueryExecutor {
 	return &LokiQueryExecutor{
-		KindName:     string(StandardQueryExecutor_Loki),
-		Queries:      queries,
-		Config:       lokiConfig,
-		QueryResults: make(map[string]interface{}),
+		KindName:            string(StandardQueryExecutor_Loki),
+		GeneratorNameString: generatorName,
+		Queries:             queries,
+		Config:              lokiConfig,
+		QueryResults:        make(map[string]interface{}),
 	}
 }
 
 type LokiQueryExecutor struct {
-	KindName string `json:"kind"`
+	KindName            string `json:"kind"`
+	GeneratorNameString string `json:"generator_name"`
+
 	// Test metrics
 	StartTime time.Time `json:"start_time"`
 	EndTime   time.Time `json:"end_time"`
 
-	// Performance queries
 	// a map of name to query template, ex: "average cpu usage": "avg(rate(cpu_usage_seconds_total[5m]))"
 	Queries map[string]string `json:"queries"`
-	// Performance queries results
 	// can be anything, avg RPS, amount of errors, 95th percentile of CPU utilization, etc
 	QueryResults map[string]interface{} `json:"query_results"`
 
 	Config *wasp.LokiConfig `json:"-"`
 }
 
+func (l *LokiQueryExecutor) GeneratorName() string {
+	return l.GeneratorNameString
+}
+
 // Results returns the query results as a map of string to interface{}.
 // It allows users to access the outcomes of executed queries, facilitating further processing or type assertions.
 func (l *LokiQueryExecutor) Results() map[string]interface{} {
@@ -73,6 +75,11 @@ func (l *LokiQueryExecutor) IsComparable(otherQueryExecutor QueryExecutor) error
 		return fmt.Errorf("expected type %s, got %s", reflect.TypeOf(l), otherType)
 	}
 
+	otherAsLoki := otherQueryExecutor.(*LokiQueryExecutor)
+	if l.GeneratorNameString != otherAsLoki.GeneratorNameString {
+		return fmt.Errorf("generator name is different. Expected %s, got %s", l.GeneratorNameString, otherAsLoki.GeneratorNameString)
+	}
+
 	return l.compareQueries(otherQueryExecutor.(*LokiQueryExecutor).Queries)
 }
 
@@ -86,6 +93,9 @@ func (l *LokiQueryExecutor) Validate() error {
 	if l.Config == nil {
 		return errors.New("loki config is missing. Please set it and try again")
 	}
+	if l.GeneratorNameString == "" {
+		return errors.New("generator name is missing. Please set it and try again")
+	}
 
 	return nil
 }
@@ -220,9 +230,10 @@ func (l *LokiQueryExecutor) UnmarshalJSON(data []byte) error {
 // It generates queries based on provided test parameters and time range, returning the executor or an error if query generation fails.
 func NewStandardMetricsLokiExecutor(lokiConfig *wasp.LokiConfig, testName, generatorName, branch, commit string, startTime, endTime time.Time) (*LokiQueryExecutor, error) {
 	lq := &LokiQueryExecutor{
-		KindName:     string(StandardQueryExecutor_Loki),
-		Config:       lokiConfig,
-		QueryResults: make(map[string]interface{}),
+		KindName:            string(StandardQueryExecutor_Loki),
+		GeneratorNameString: generatorName,
+		Config:              lokiConfig,
+		QueryResults:        make(map[string]interface{}),
 	}
 
 	standardQueries, queryErr := lq.generateStandardQueries(testName, generatorName, branch, commit, startTime, endTime)
 
@@ -22,7 +22,7 @@ func TestBenchSpy_NewLokiQueryExecutor(t *testing.T) {
 		BasicAuth: "user:pass",
 	}
 
-	executor := NewLokiQueryExecutor(queries, config)
+	executor := NewLokiQueryExecutor("some_generator", queries, config)
 	assert.Equal(t, "loki", executor.KindName)
 	assert.Equal(t, queries, executor.Queries)
 	assert.Equal(t, config, executor.Config)
@@ -63,26 +63,40 @@ func (a *anotherQueryExecutor) TimeRange(_, _ time.Time) {
 
 func TestBenchSpy_LokiQueryExecutor_IsComparable(t *testing.T) {
 	executor1 := &LokiQueryExecutor{
-		Queries: map[string]string{"q1": "query1"},
+		GeneratorNameString: "generator",
+		Queries:             map[string]string{"q1": "query1"},
 	}
 	executor2 := &LokiQueryExecutor{
-		Queries: map[string]string{"q1": "query2"},
+		GeneratorNameString: "generator",
+		Queries:             map[string]string{"q1": "query2"},
 	}
 	executor3 := &LokiQueryExecutor{
-		Queries: map[string]string{"q2": "query1"},
+		GeneratorNameString: "generator",
+		Queries:             map[string]string{"q2": "query1"},
 	}
 	executor4 := &LokiQueryExecutor{
-		Queries: map[string]string{"q1": "query1", "q2": "query2"},
+		GeneratorNameString: "generator",
+		Queries:             map[string]string{"q1": "query1", "q2": "query2"},
 	}
 	executor5 := &LokiQueryExecutor{
-		Queries: map[string]string{"q1": "query1", "q3": "query3"},
+		GeneratorNameString: "generator",
+		Queries:             map[string]string{"q1": "query1", "q3": "query3"},
+	}
+	executor6 := &LokiQueryExecutor{
+		GeneratorNameString: "other",
+		Queries:             map[string]string{"q1": "query1"},
 	}
 
 	t.Run("same queries", func(t *testing.T) {
 		err := executor1.IsComparable(executor1)
 		assert.NoError(t, err)
 	})
 
+	t.Run("different generator names", func(t *testing.T) {
+		err := executor1.IsComparable(executor6)
+		assert.Error(t, err)
+	})
+
 	t.Run("same queries, different names", func(t *testing.T) {
 		err := executor1.IsComparable(executor3)
 		assert.Error(t, err)
@@ -114,13 +128,22 @@ func TestBenchSpy_LokiQueryExecutor_IsComparable(t *testing.T) {
 func TestBenchSpy_LokiQueryExecutor_Validate(t *testing.T) {
 	t.Run("valid configuration", func(t *testing.T) {
 		executor := &LokiQueryExecutor{
-			Queries: map[string]string{"q1": "query1"},
-			Config:  &wasp.LokiConfig{},
+			GeneratorNameString: "generator",
+			Queries:             map[string]string{"q1": "query1"},
+			Config:              &wasp.LokiConfig{},
 		}
 		err := executor.Validate()
 		assert.NoError(t, err)
 	})
 
+	t.Run("missing generator name", func(t *testing.T) {
+		executor := &LokiQueryExecutor{
+			Config: &wasp.LokiConfig{},
+		}
+		err := executor.Validate()
+		assert.Error(t, err)
+	})
+
 	t.Run("missing queries", func(t *testing.T) {
 		executor := &LokiQueryExecutor{
 			Config: &wasp.LokiConfig{},