gitops-infrastructure-demo/rollouts/canary-strategy.yaml at main · somethingwithproof/gitops-infrastructure-demo · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# Argo Rollouts Canary Strategy Example
apiVersion: argoproj.io/v1alpha1
kind: Rollout
metadata:
  name: sample-app
  namespace: production
spec:
  replicas: 10
  revisionHistoryLimit: 3
  selector:
    matchLabels:
      app: sample-app
  template:
    metadata:
      labels:
        app: sample-app
    spec:
      containers:
      - name: app
        image: sample-app:latest
        ports:
        - containerPort: 8080
        resources:
          requests:
            memory: "128Mi"
            cpu: "100m"
          limits:
            memory: "256Mi"
            cpu: "200m"
        livenessProbe:
          httpGet:
            path: /health
            port: 8080
          initialDelaySeconds: 10
          periodSeconds: 10
        readinessProbe:
          httpGet:
            path: /ready
            port: 8080
          initialDelaySeconds: 5
          periodSeconds: 5
  strategy:
    canary:
      # Traffic routing
      canaryService: sample-app-canary
      stableService: sample-app-stable

      # Rollout steps
      steps:
      # Step 1: 10% traffic to canary
      - setWeight: 10
      - pause: {duration: 2m}

      # Step 2: Analyze metrics
      - analysis:
          templates:
          - templateName: success-rate
          args:
          - name: service-name
            value: sample-app-canary

      # Step 3: 25% traffic
      - setWeight: 25
      - pause: {duration: 2m}

      # Step 4: 50% traffic
      - setWeight: 50
      - pause: {duration: 5m}

      # Step 5: Final analysis before full promotion
      - analysis:
          templates:
          - templateName: success-rate
          - templateName: latency-check

      # Step 6: Full rollout
      - setWeight: 100

      # Anti-affinity between canary and stable
      antiAffinity:
        preferredDuringSchedulingIgnoredDuringExecution:
          weight: 100

---
# Analysis Template: Success Rate
apiVersion: argoproj.io/v1alpha1
kind: AnalysisTemplate
metadata:
  name: success-rate
  namespace: production
spec:
  args:
  - name: service-name
  metrics:
  - name: success-rate
    interval: 30s
    count: 5
    successCondition: result[0] >= 0.99
    failureLimit: 3
    provider:
      prometheus:
        address: http://prometheus.monitoring:9090
        query: |
          sum(rate(
            http_requests_total{service="{{args.service-name}}",status!~"5.."}[2m]
          )) /
          sum(rate(
            http_requests_total{service="{{args.service-name}}"}[2m]
          ))

---
# Analysis Template: Latency Check
apiVersion: argoproj.io/v1alpha1
kind: AnalysisTemplate
metadata:
  name: latency-check
  namespace: production
spec:
  metrics:
  - name: p99-latency
    interval: 30s
    count: 5
    successCondition: result[0] < 0.5
    failureLimit: 3
    provider:
      prometheus:
        address: http://prometheus.monitoring:9090
        query: |
          histogram_quantile(0.99,
            sum(rate(
              http_request_duration_seconds_bucket{service="sample-app-canary"}[2m]
            )) by (le)
          )