Skip to content

Commit 808fd79

Browse files
agrawrohhuikang
authored andcommitted
feat(experiment): Add Measurements Retention Limit Option for Metrics
Signed-off-by: Rohit Agrawal <[email protected]>
1 parent 86dc378 commit 808fd79

File tree

14 files changed

+693
-440
lines changed

14 files changed

+693
-440
lines changed

docs/features/analysis.md

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -714,7 +714,7 @@ spec:
714714
## Measurements Retention
715715

716716
!!! important
717-
Available since v1.2
717+
Available since v1.2
718718

719719
`measurementRetention` can be used to retain other than the latest ten results for the metrics running in any mode
720720
(dry/non-dry). Setting this option to `0` would disable it and, the controller will revert to the existing behavior of
@@ -804,6 +804,36 @@ spec:
804804
limit: 20
805805
```
806806

807+
### Measurements Retention for Experiments
808+
809+
If an experiment wants to retain more results of its analysis metrics, it simply needs to specify the
810+
`measurementRetention` field under its specs. In the following example, all the metrics from `analyze-job` matching the
811+
RegEx rule `test.*` will have their latest twenty measurements get retained instead of the default ten.
812+
813+
```yaml hl_lines="20 21 22"
814+
kind: Experiment
815+
spec:
816+
templates:
817+
- name: baseline
818+
selector:
819+
matchLabels:
820+
app: rollouts-demo
821+
template:
822+
metadata:
823+
labels:
824+
app: rollouts-demo
825+
spec:
826+
containers:
827+
- name: rollouts-demo
828+
image: argoproj/rollouts-demo:blue
829+
analyses:
830+
- name: analyze-job
831+
templateName: analyze-job
832+
measurementRetention:
833+
- metricName: test.*
834+
limit: 20
835+
```
836+
807837
## Inconclusive Runs
808838

809839
Analysis runs can also be considered `Inconclusive`, which indicates the run was neither successful,

experiments/experiment.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ func (ec *experimentContext) reconcile() *v1alpha1.ExperimentStatus {
100100
}
101101

102102
for _, analysis := range ec.ex.Spec.Analyses {
103-
ec.reconcileAnalysisRun(analysis, ec.ex.Spec.DryRun)
103+
ec.reconcileAnalysisRun(analysis, ec.ex.Spec.DryRun, ec.ex.Spec.MeasurementRetention)
104104
}
105105

106106
newStatus := ec.calculateStatus()
@@ -371,7 +371,7 @@ func calculateEnqueueDuration(ex *v1alpha1.Experiment, newStatus *v1alpha1.Exper
371371

372372
// reconcileAnalysisRun reconciles a single analysis run, creating or terminating it as necessary.
373373
// Updates the analysis run statuses, which may subsequently fail the experiment.
374-
func (ec *experimentContext) reconcileAnalysisRun(analysis v1alpha1.ExperimentAnalysisTemplateRef, dryRunMetrics []v1alpha1.DryRun) {
374+
func (ec *experimentContext) reconcileAnalysisRun(analysis v1alpha1.ExperimentAnalysisTemplateRef, dryRunMetrics []v1alpha1.DryRun, measurementRetentionMetrics []v1alpha1.MeasurementRetention) {
375375
logCtx := ec.log.WithField("analysis", analysis.Name)
376376
logCtx.Infof("Reconciling analysis")
377377
prevStatus := experimentutil.GetAnalysisRunStatus(ec.ex.Status, analysis.Name)
@@ -427,7 +427,7 @@ func (ec *experimentContext) reconcileAnalysisRun(analysis v1alpha1.ExperimentAn
427427
logCtx.Warnf("Skipping AnalysisRun creation for analysis %s: experiment is terminating", analysis.Name)
428428
return
429429
}
430-
run, err := ec.createAnalysisRun(analysis, dryRunMetrics)
430+
run, err := ec.createAnalysisRun(analysis, dryRunMetrics, measurementRetentionMetrics)
431431
if err != nil {
432432
msg := fmt.Sprintf("Failed to create AnalysisRun for analysis '%s': %v", analysis.Name, err.Error())
433433
newStatus.Phase = v1alpha1.AnalysisPhaseError
@@ -474,13 +474,13 @@ func (ec *experimentContext) reconcileAnalysisRun(analysis v1alpha1.ExperimentAn
474474
// createAnalysisRun creates the analysis run. If an existing runs exists with same name, is
475475
// semantically equal, and is not complete, returns the existing one, otherwise creates a new
476476
// run with a collision counter increase.
477-
func (ec *experimentContext) createAnalysisRun(analysis v1alpha1.ExperimentAnalysisTemplateRef, dryRunMetrics []v1alpha1.DryRun) (*v1alpha1.AnalysisRun, error) {
477+
func (ec *experimentContext) createAnalysisRun(analysis v1alpha1.ExperimentAnalysisTemplateRef, dryRunMetrics []v1alpha1.DryRun, measurementRetentionMetrics []v1alpha1.MeasurementRetention) (*v1alpha1.AnalysisRun, error) {
478478
analysisRunIf := ec.argoProjClientset.ArgoprojV1alpha1().AnalysisRuns(ec.ex.Namespace)
479479
args, err := ec.ResolveAnalysisRunArgs(analysis.Args)
480480
if err != nil {
481481
return nil, err
482482
}
483-
run, err := ec.newAnalysisRun(analysis, args, dryRunMetrics)
483+
run, err := ec.newAnalysisRun(analysis, args, dryRunMetrics, measurementRetentionMetrics)
484484
if err != nil {
485485
return nil, err
486486
}
@@ -616,7 +616,7 @@ func (ec *experimentContext) assessAnalysisRuns() (v1alpha1.AnalysisPhase, strin
616616
}
617617

618618
// newAnalysisRun generates an AnalysisRun from the experiment and template
619-
func (ec *experimentContext) newAnalysisRun(analysis v1alpha1.ExperimentAnalysisTemplateRef, args []v1alpha1.Argument, dryRunMetrics []v1alpha1.DryRun) (*v1alpha1.AnalysisRun, error) {
619+
func (ec *experimentContext) newAnalysisRun(analysis v1alpha1.ExperimentAnalysisTemplateRef, args []v1alpha1.Argument, dryRunMetrics []v1alpha1.DryRun, measurementRetentionMetrics []v1alpha1.MeasurementRetention) (*v1alpha1.AnalysisRun, error) {
620620

621621
if analysis.ClusterScope {
622622
clusterTemplate, err := ec.clusterAnalysisTemplateLister.Get(analysis.TemplateName)
@@ -626,7 +626,7 @@ func (ec *experimentContext) newAnalysisRun(analysis v1alpha1.ExperimentAnalysis
626626
name := fmt.Sprintf("%s-%s", ec.ex.Name, analysis.Name)
627627

628628
clusterAnalysisTemplates := []*v1alpha1.ClusterAnalysisTemplate{clusterTemplate}
629-
run, err := analysisutil.NewAnalysisRunFromTemplates(nil, clusterAnalysisTemplates, args, dryRunMetrics, []v1alpha1.MeasurementRetention{}, name, "", ec.ex.Namespace)
629+
run, err := analysisutil.NewAnalysisRunFromTemplates(nil, clusterAnalysisTemplates, args, dryRunMetrics, measurementRetentionMetrics, name, "", ec.ex.Namespace)
630630
if err != nil {
631631
return nil, err
632632
}
@@ -644,7 +644,7 @@ func (ec *experimentContext) newAnalysisRun(analysis v1alpha1.ExperimentAnalysis
644644
name := fmt.Sprintf("%s-%s", ec.ex.Name, analysis.Name)
645645

646646
analysisTemplates := []*v1alpha1.AnalysisTemplate{template}
647-
run, err := analysisutil.NewAnalysisRunFromTemplates(analysisTemplates, nil, args, dryRunMetrics, []v1alpha1.MeasurementRetention{}, name, "", ec.ex.Namespace)
647+
run, err := analysisutil.NewAnalysisRunFromTemplates(analysisTemplates, nil, args, dryRunMetrics, measurementRetentionMetrics, name, "", ec.ex.Namespace)
648648
if err != nil {
649649
return nil, err
650650
}

manifests/crds/experiment-crd.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,19 @@ spec:
9595
type: array
9696
duration:
9797
type: string
98+
measurementRetention:
99+
items:
100+
properties:
101+
limit:
102+
format: int32
103+
type: integer
104+
metricName:
105+
type: string
106+
required:
107+
- limit
108+
- metricName
109+
type: object
110+
type: array
98111
progressDeadlineSeconds:
99112
format: int32
100113
type: integer

manifests/install.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8460,6 +8460,19 @@ spec:
84608460
type: array
84618461
duration:
84628462
type: string
8463+
measurementRetention:
8464+
items:
8465+
properties:
8466+
limit:
8467+
format: int32
8468+
type: integer
8469+
metricName:
8470+
type: string
8471+
required:
8472+
- limit
8473+
- metricName
8474+
type: object
8475+
type: array
84638476
progressDeadlineSeconds:
84648477
format: int32
84658478
type: integer

manifests/namespace-install.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8460,6 +8460,19 @@ spec:
84608460
type: array
84618461
duration:
84628462
type: string
8463+
measurementRetention:
8464+
items:
8465+
properties:
8466+
limit:
8467+
format: int32
8468+
type: integer
8469+
metricName:
8470+
type: string
8471+
required:
8472+
- limit
8473+
- metricName
8474+
type: object
8475+
type: array
84638476
progressDeadlineSeconds:
84648477
format: int32
84658478
type: integer

pkg/apis/api-rules/violation_exceptions.list

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis
1414
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,ExperimentAnalysisTemplateRef,Args
1515
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,ExperimentSpec,Analyses
1616
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,ExperimentSpec,DryRun
17+
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,ExperimentSpec,MeasurementRetention
1718
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,ExperimentSpec,Templates
1819
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,ExperimentStatus,AnalysisRuns
1920
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,ExperimentStatus,Conditions

pkg/apis/rollouts/v1alpha1/experiment_types.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ type ExperimentSpec struct {
6060
// +patchStrategy=merge
6161
// +optional
6262
DryRun []DryRun `json:"dryRun,omitempty" patchStrategy:"merge" patchMergeKey:"metricName" protobuf:"bytes,7,rep,name=dryRun"`
63+
// MeasurementRetention object contains the settings for retaining the number of measurements during the analysis
64+
// +patchMergeKey=metricName
65+
// +patchStrategy=merge
66+
// +optional
67+
MeasurementRetention []MeasurementRetention `json:"measurementRetention,omitempty" patchStrategy:"merge" patchMergeKey:"metricName" protobuf:"bytes,8,rep,name=measurementRetention"`
6368
}
6469

6570
type TemplateSpec struct {

0 commit comments

Comments
 (0)