Skip to content

Commit 4bbf97b

Browse files
committed
[Feat] Support StormService pause rollout in upgrade
* Update stormservice golang client * Improve the test coverage * Refactor the API to support manual resume * improve the canary features * Leave e2e test to future PRs * fix lint and verify issues * Polish the canary status Signed-off-by: Jiaxin Shan <[email protected]>
1 parent 90cc2f5 commit 4bbf97b

File tree

19 files changed

+3778
-18
lines changed

19 files changed

+3778
-18
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ AIBRIX_IMAGES := $(foreach img,$(IMAGES),$(AIBRIX_CONTAINER_REGISTRY_NAMESPACE)/
1414
IMG ?= ${AIBRIX_CONTAINER_REGISTRY_NAMESPACE}/controller-manager:${IMAGE_TAG}
1515

1616
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
17-
ENVTEST_K8S_VERSION = 1.29.0
17+
ENVTEST_K8S_VERSION = 1.30.0
1818

1919
# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
2020
ifeq (,$(shell go env GOBIN))

api/orchestration/v1alpha1/stormservice_types.go

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ limitations under the License.
1717
package v1alpha1
1818

1919
import (
20+
"strconv"
21+
"time"
22+
2023
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2124
"k8s.io/apimachinery/pkg/util/intstr"
2225
)
@@ -119,6 +122,10 @@ type StormServiceStatus struct {
119122

120123
// The label selector information of the pods belonging to the StormService object.
121124
ScalingTargetSelector string `json:"scalingTargetSelector,omitempty"`
125+
126+
// CanaryStatus tracks the progress of canary deployments.
127+
// +optional
128+
CanaryStatus *CanaryStatus `json:"canaryStatus,omitempty"`
122129
}
123130

124131
// These are valid conditions of a stormService.
@@ -146,6 +153,10 @@ type StormServiceUpdateStrategy struct {
146153

147154
// +optional
148155
MaxSurge *intstr.IntOrString `json:"maxSurge,omitempty" protobuf:"bytes,2,opt,name=maxSurge"`
156+
157+
// Canary defines the canary deployment strategy for gradual rollouts.
158+
// +optional
159+
Canary *CanaryUpdateStrategy `json:"canary,omitempty"`
149160
}
150161

151162
// +enum
@@ -185,6 +196,156 @@ type StormServiceList struct {
185196
Items []StormService `json:"items"`
186197
}
187198

199+
// CanaryUpdateStrategy defines the canary deployment configuration
200+
type CanaryUpdateStrategy struct {
201+
// Steps defines the sequence of canary deployment steps
202+
Steps []CanaryStep `json:"steps,omitempty"`
203+
}
204+
205+
// CanaryStep defines a single step in the canary deployment process
206+
type CanaryStep struct {
207+
// SetWeight defines the percentage of traffic/replicas to route to the new version
208+
// +kubebuilder:validation:Minimum=0
209+
// +kubebuilder:validation:Maximum=100
210+
// +optional
211+
SetWeight *int32 `json:"setWeight,omitempty"`
212+
213+
// Pause defines a pause in the canary deployment
214+
// +optional
215+
Pause *PauseStep `json:"pause,omitempty"`
216+
}
217+
218+
// PauseStep defines pause behavior in canary deployments
219+
type PauseStep struct {
220+
// Duration specifies how long to pause
221+
// - String: "30s", "5m", etc. (parsed as time.Duration)
222+
// - Int: seconds as integer
223+
// - nil: manual pause requiring user intervention
224+
// Resume manual pause by setting duration to "0" or 0
225+
// +optional
226+
Duration *intstr.IntOrString `json:"duration,omitempty"`
227+
}
228+
229+
// DurationSeconds converts the pause duration to seconds
230+
// Returns:
231+
// - >= 0: pause duration in seconds
232+
// - 0: manual pause (nil duration) or resume (duration "0"/0)
233+
// - -1: invalid duration string
234+
func (p *PauseStep) DurationSeconds() int32 {
235+
if p.Duration == nil {
236+
return 0 // Manual pause
237+
}
238+
239+
if p.Duration.Type == intstr.String {
240+
// Try parsing as integer first
241+
if s, err := strconv.ParseInt(p.Duration.StrVal, 10, 32); err == nil {
242+
return int32(s)
243+
}
244+
// Try parsing as duration string
245+
if d, err := time.ParseDuration(p.Duration.StrVal); err == nil {
246+
return int32(d.Seconds())
247+
}
248+
return -1 // Invalid string
249+
}
250+
251+
return p.Duration.IntVal
252+
}
253+
254+
// IsManualPause returns true if this is a manual pause (nil duration)
255+
func (p *PauseStep) IsManualPause() bool {
256+
return p.Duration == nil
257+
}
258+
259+
// IsResume returns true if this represents a resume action (duration 0 or "0")
260+
func (p *PauseStep) IsResume() bool {
261+
if p.Duration == nil {
262+
return false
263+
}
264+
return p.DurationSeconds() == 0
265+
}
266+
267+
// CanaryStatus tracks the progress of a canary deployment
268+
type CanaryStatus struct {
269+
// CurrentStep is the index of the current step in the canary deployment
270+
// +optional
271+
CurrentStep int32 `json:"currentStep,omitempty"`
272+
273+
// PauseConditions indicates the reasons why the canary deployment is paused
274+
// When paused, the first pause condition's StartTime indicates when the pause began
275+
// +optional
276+
PauseConditions []PauseCondition `json:"pauseConditions,omitempty"`
277+
278+
// StableRevision is the revision of the stable/old version
279+
// +optional
280+
StableRevision string `json:"stableRevision,omitempty"`
281+
282+
// CanaryRevision is the revision of the canary/new version
283+
// +optional
284+
CanaryRevision string `json:"canaryRevision,omitempty"`
285+
286+
// Phase indicates the current phase of the canary deployment
287+
// +optional
288+
Phase CanaryPhase `json:"phase,omitempty"`
289+
290+
// CanaryReplicas is the number of RoleSets on canary version (replica mode)
291+
// +optional
292+
CanaryReplicas int32 `json:"canaryReplicas,omitempty"`
293+
294+
// StableReplicas is the number of RoleSets on stable version (replica mode)
295+
// +optional
296+
StableReplicas int32 `json:"stableReplicas,omitempty"`
297+
298+
// RoleCanaryCounts tracks per-role canary pod counts (pooled mode)
299+
// +optional
300+
RoleCanaryCounts map[string]int32 `json:"roleCanaryCounts,omitempty"`
301+
302+
// TotalCanaryPods is the total number of canary pods across all roles (pooled mode)
303+
// +optional
304+
TotalCanaryPods int32 `json:"totalCanaryPods,omitempty"`
305+
306+
// AbortedAt indicates when the canary deployment was aborted
307+
// +optional
308+
AbortedAt *metav1.Time `json:"abortedAt,omitempty"`
309+
310+
// Message provides details about the current canary state
311+
// +optional
312+
Message string `json:"message,omitempty"`
313+
}
314+
315+
// CanaryPhase represents the phase of a canary deployment
316+
// +enum
317+
type CanaryPhase string
318+
319+
const (
320+
// CanaryPhaseInitializing indicates the canary deployment is starting
321+
CanaryPhaseInitializing CanaryPhase = "Initializing"
322+
// CanaryPhaseProgressing indicates the canary deployment is progressing through steps
323+
CanaryPhaseProgressing CanaryPhase = "Progressing"
324+
// CanaryPhasePaused indicates the canary deployment is paused
325+
CanaryPhasePaused CanaryPhase = "Paused"
326+
// CanaryPhaseCompleted indicates the canary deployment has completed successfully
327+
CanaryPhaseCompleted CanaryPhase = "Completed"
328+
// CanaryPhaseAborted indicates the canary deployment was aborted/rolled back
329+
CanaryPhaseAborted CanaryPhase = "Aborted"
330+
)
331+
332+
// PauseReason represents the reason for a pause condition
333+
// +enum
334+
type PauseReason string
335+
336+
const (
337+
// PauseReasonCanaryPauseStep indicates a pause at a canary step
338+
PauseReasonCanaryPauseStep PauseReason = "CanaryPauseStep"
339+
)
340+
341+
// PauseCondition represents a pause condition in the canary deployment
342+
type PauseCondition struct {
343+
// Reason indicates why the canary deployment was paused
344+
Reason PauseReason `json:"reason"`
345+
// StartTime is when the pause condition was added
346+
StartTime metav1.Time `json:"startTime"`
347+
}
348+
188349
func init() {
189350
SchemeBuilder.Register(&StormService{}, &StormServiceList{})
190351
}

api/orchestration/v1alpha1/zz_generated.deepcopy.go

Lines changed: 126 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)