Skip to content

Commit 3368230

Browse files
committed
feat: add required reason field to failover domain requests
Add mandatory 'Reason' field to FailoverDomainRequest to provide context for why domains are being failed over, improving audit trails and operational transparency. Changes: - Add Reason field to FailoverDomainRequest in types and mappers - Plumb reason through domain handler to audit logs - Add validation to require non-empty reason for failovers - Add CLI --reason flag (required) for domain failover command - Add unit and integration tests for validation and persistence This requires corresponding IDL changes in cadence-idl to expose the field via thrift/proto APIs. The go.mod includes a replace directive pointing to a fork with the IDL changes for now. Breaking change: All failover requests must now include a reason. Signed-off-by: Diana Zawadzki <[email protected]>
1 parent 01d916c commit 3368230

File tree

12 files changed

+275
-4
lines changed

12 files changed

+275
-4
lines changed

common/domain/handler.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,15 @@ func (d *handlerImpl) handleFailoverRequest(ctx context.Context,
656656
}
657657
response.DomainInfo, response.Configuration, response.ReplicationConfiguration = d.createResponse(intendedDomainState.Info, intendedDomainState.Config, intendedDomainState.ReplicationConfig)
658658

659-
err = d.updateDomainAuditLog(ctx, currentState, intendedDomainState, persistence.DomainAuditOperationTypeFailover, "domain failover")
659+
// Extract failover reason from update request data
660+
failoverMessage := "domain failover"
661+
if updateRequest.Data != nil {
662+
if reason, ok := updateRequest.Data["FailoverReason"]; ok && reason != "" {
663+
failoverMessage = fmt.Sprintf("domain failover: %s", reason)
664+
}
665+
}
666+
667+
err = d.updateDomainAuditLog(ctx, currentState, intendedDomainState, persistence.DomainAuditOperationTypeFailover, failoverMessage)
660668
if err != nil {
661669
return nil, err
662670
}

common/domain/handler_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3685,6 +3685,7 @@ func TestHandler_FailoverDomain(t *testing.T) {
36853685
request: &types.FailoverDomainRequest{
36863686
DomainName: constants.TestDomainName,
36873687
DomainActiveClusterName: common.Ptr(clusterB),
3688+
Reason: "Planned maintenance on cluster A",
36883689
},
36893690
response: func(timeSource clock.MockedTimeSource) *types.FailoverDomainResponse {
36903691
data, _ := json.Marshal([]FailoverEvent{
@@ -3730,6 +3731,7 @@ func TestHandler_FailoverDomain(t *testing.T) {
37303731
request: &types.FailoverDomainRequest{
37313732
DomainName: constants.TestDomainName,
37323733
DomainActiveClusterName: common.Ptr(cluster.TestAlternativeClusterName),
3734+
Reason: "Testing domain not found error",
37333735
},
37343736
err: &types.EntityNotExistsError{Message: "Domain not found"},
37353737
},
@@ -3767,6 +3769,7 @@ func TestHandler_FailoverDomain(t *testing.T) {
37673769
request: &types.FailoverDomainRequest{
37683770
DomainName: constants.TestDomainName,
37693771
DomainActiveClusterName: common.Ptr(cluster.TestCurrentClusterName),
3772+
Reason: "Testing update too frequent error",
37703773
},
37713774
err: errDomainUpdateTooFrequent,
37723775
},

common/types/mapper/proto/api.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4516,6 +4516,7 @@ func FromFailoverDomainRequest(t *types.FailoverDomainRequest) *apiv1.FailoverDo
45164516
DomainName: t.DomainName,
45174517
DomainActiveClusterName: t.GetDomainActiveClusterName(),
45184518
ActiveClusters: FromActiveClusters(t.ActiveClusters),
4519+
Reason: t.Reason,
45194520
}
45204521
}
45214522

@@ -4527,6 +4528,7 @@ func ToFailoverDomainRequest(t *apiv1.FailoverDomainRequest) *types.FailoverDoma
45274528
DomainName: t.DomainName,
45284529
DomainActiveClusterName: common.StringPtr(t.DomainActiveClusterName),
45294530
ActiveClusters: ToActiveClusters(t.ActiveClusters),
4531+
Reason: t.Reason,
45304532
}
45314533
}
45324534

common/types/mapper/thrift/shared.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1623,6 +1623,7 @@ func FromFailoverDomainRequest(t *types.FailoverDomainRequest) *shared.FailoverD
16231623
DomainName: &t.DomainName,
16241624
DomainActiveClusterName: t.DomainActiveClusterName,
16251625
ActiveClusters: FromActiveClusters(t.ActiveClusters),
1626+
Reason: &t.Reason,
16261627
}
16271628
}
16281629

@@ -1635,6 +1636,7 @@ func ToFailoverDomainRequest(t *shared.FailoverDomainRequest) *types.FailoverDom
16351636
DomainName: t.GetDomainName(),
16361637
DomainActiveClusterName: t.DomainActiveClusterName,
16371638
ActiveClusters: ToActiveClusters(t.ActiveClusters),
1639+
Reason: t.GetReason(),
16381640
}
16391641
}
16401642

common/types/shared.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,16 +1782,25 @@ type FailoverDomainRequest struct {
17821782
DomainName string `json:"domainName,omitempty"`
17831783
DomainActiveClusterName *string `json:"domainActiveClusterName,omitempty"`
17841784
ActiveClusters *ActiveClusters `json:"activeClusters,omitempty"`
1785+
Reason string `json:"reason,omitempty"`
17851786
}
17861787

17871788
func (v *FailoverDomainRequest) ToUpdateDomainRequest() *UpdateDomainRequest {
17881789
if v == nil {
17891790
return nil
17901791
}
1792+
// Include failover reason in domain data if provided
1793+
var data map[string]string
1794+
if v.Reason != "" {
1795+
data = map[string]string{
1796+
"FailoverReason": v.Reason,
1797+
}
1798+
}
17911799
return &UpdateDomainRequest{
17921800
Name: v.DomainName,
17931801
ActiveClusterName: v.DomainActiveClusterName,
17941802
ActiveClusters: v.ActiveClusters,
1803+
Data: data,
17951804
}
17961805
}
17971806

@@ -1819,6 +1828,14 @@ func (v *FailoverDomainRequest) GetDomain() (o string) {
18191828
return
18201829
}
18211830

1831+
// GetReason is an internal getter (TBD...)
1832+
func (v *FailoverDomainRequest) GetReason() (o string) {
1833+
if v != nil {
1834+
return v.Reason
1835+
}
1836+
return
1837+
}
1838+
18221839
// FailoverDomainResponse is an internal type (TBD...)
18231840
type FailoverDomainResponse struct {
18241841
DomainInfo *DomainInfo `json:"domainInfo,omitempty"`

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,3 +167,5 @@ replace github.com/apache/thrift => github.com/apache/thrift v0.0.0-201612212036
167167

168168
// DO NOT USE as it misses mysql/config store fix
169169
retract v1.2.3
170+
171+
replace github.com/uber/cadence-idl => github.com/zawadzkidiana/cadence-idl v0.0.0-20251112233538-63b7e01d0963

go.sum

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -442,9 +442,6 @@ github.com/uber-go/mapdecode v1.0.0/go.mod h1:b5nP15FwXTgpjTjeA9A2uTHXV5UJCl4arw
442442
github.com/uber-go/tally v3.3.12+incompatible/go.mod h1:YDTIBxdXyOU/sCWilKB4bgyufu1cEi0jdVnRdxvjnmU=
443443
github.com/uber-go/tally v3.3.15+incompatible h1:9hLSgNBP28CjIaDmAuRTq9qV+UZY+9PcvAkXO4nNMwg=
444444
github.com/uber-go/tally v3.3.15+incompatible/go.mod h1:YDTIBxdXyOU/sCWilKB4bgyufu1cEi0jdVnRdxvjnmU=
445-
github.com/uber/cadence-idl v0.0.0-20211111101836-d6b70b60eb8c/go.mod h1:oyUK7GCNCRHCCyWyzifSzXpVrRYVBbAMHAzF5dXiKws=
446-
github.com/uber/cadence-idl v0.0.0-20251027162905-7b9d8a31de8c h1:fd00g0M50UuZYetChIAPYUchvi1M+gL3rhbWwOQ6GhE=
447-
github.com/uber/cadence-idl v0.0.0-20251027162905-7b9d8a31de8c/go.mod h1:oyUK7GCNCRHCCyWyzifSzXpVrRYVBbAMHAzF5dXiKws=
448445
github.com/uber/jaeger-client-go v2.22.1+incompatible h1:NHcubEkVbahf9t3p75TOCR83gdUHXjRJvjoBh1yACsM=
449446
github.com/uber/jaeger-client-go v2.22.1+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk=
450447
github.com/uber/jaeger-lib v2.2.0+incompatible h1:MxZXOiR2JuoANZ3J6DE/U0kSFv/eJ/GfSYVCjK7dyaw=
@@ -485,6 +482,8 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec
485482
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
486483
github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
487484
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
485+
github.com/zawadzkidiana/cadence-idl v0.0.0-20251112233538-63b7e01d0963 h1:1EW62MeL+ZKpxrJsLTc+IjtMablb5lHOECRf9oleBk8=
486+
github.com/zawadzkidiana/cadence-idl v0.0.0-20251112233538-63b7e01d0963/go.mod h1:oyUK7GCNCRHCCyWyzifSzXpVrRYVBbAMHAzF5dXiKws=
488487
go.mongodb.org/mongo-driver v1.7.3 h1:G4l/eYY9VrQAK/AUgkV0koQKzQnyddnWxrd/Etf0jIs=
489488
go.mongodb.org/mongo-driver v1.7.3/go.mod h1:NqaYOwnXWr5Pm7AOpO5QFxKJ503nbMse/R79oO62zWg=
490489
go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=

go.work.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,8 @@ github.com/yudai/pp v2.0.1+incompatible h1:Q4//iY4pNF6yPLZIigmvcl7k/bPgrcTPIFIcm
659659
github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
660660
github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg=
661661
github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
662+
github.com/zawadzkidiana/cadence-idl v0.0.0-20251112233538-63b7e01d0963 h1:1EW62MeL+ZKpxrJsLTc+IjtMablb5lHOECRf9oleBk8=
663+
github.com/zawadzkidiana/cadence-idl v0.0.0-20251112233538-63b7e01d0963/go.mod h1:oyUK7GCNCRHCCyWyzifSzXpVrRYVBbAMHAzF5dXiKws=
662664
github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
663665
github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
664666
go.etcd.io/bbolt v1.3.4 h1:hi1bXHMVrlQh6WwxAy+qZCV/SYIlqo+Ushwdpa4tAKg=
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
// Copyright (c) 2017 Uber Technologies, Inc.
2+
//
3+
// Permission is hereby granted, free of charge, to any person obtaining a copy
4+
// of this software and associated documentation files (the "Software"), to deal
5+
// in the Software without restriction, including without limitation the rights
6+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7+
// copies of the Software, and to permit persons to whom the Software is
8+
// furnished to do so, subject to the following conditions:
9+
//
10+
// The above copyright notice and this permission notice shall be included in
11+
// all copies or substantial portions of the Software.
12+
//
13+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19+
// THE SOFTWARE.
20+
21+
package api
22+
23+
import (
24+
"context"
25+
"testing"
26+
27+
"github.com/stretchr/testify/assert"
28+
"github.com/stretchr/testify/require"
29+
30+
"github.com/uber/cadence/common"
31+
"github.com/uber/cadence/common/types"
32+
)
33+
34+
func TestFailoverDomainValidation_WithoutReason_ExpectError(t *testing.T) {
35+
// This test verifies that when a failover is attempted without providing a reason,
36+
// the validation fails and returns an appropriate error.
37+
38+
v, _ := setupMocksForRequestValidator(t)
39+
40+
// Test case 1: Failover request WITHOUT a reason
41+
failoverRequestWithoutReason := &types.FailoverDomainRequest{
42+
DomainName: "test-domain",
43+
DomainActiveClusterName: common.StringPtr("cluster2"),
44+
// Reason is intentionally missing
45+
}
46+
47+
err := v.ValidateFailoverDomainRequest(context.Background(), failoverRequestWithoutReason)
48+
49+
// Verify we get the expected error
50+
assert.Error(t, err)
51+
assert.Contains(t, err.Error(), "Reason must be provided for domain failover")
52+
53+
// Test case 2: Failover request with empty reason
54+
failoverRequestEmptyReason := &types.FailoverDomainRequest{
55+
DomainName: "test-domain",
56+
DomainActiveClusterName: common.StringPtr("cluster2"),
57+
Reason: "", // Empty reason
58+
}
59+
60+
err = v.ValidateFailoverDomainRequest(context.Background(), failoverRequestEmptyReason)
61+
62+
// Verify we get the expected error
63+
assert.Error(t, err)
64+
assert.Contains(t, err.Error(), "Reason must be provided for domain failover")
65+
}
66+
67+
func TestFailoverDomainValidation_WithReason_ExpectSuccess(t *testing.T) {
68+
// This test verifies that when a failover is attempted with a valid reason,
69+
// the validation succeeds.
70+
71+
v, _ := setupMocksForRequestValidator(t)
72+
73+
// Create a failover request WITH a reason
74+
failoverRequestWithReason := &types.FailoverDomainRequest{
75+
DomainName: "test-domain",
76+
DomainActiveClusterName: common.StringPtr("cluster2"),
77+
Reason: "Planned maintenance on cluster1 - upgrading to v1.2.3",
78+
}
79+
80+
err := v.ValidateFailoverDomainRequest(context.Background(), failoverRequestWithReason)
81+
82+
// Verify validation passes
83+
require.NoError(t, err)
84+
}
85+
86+
func TestFailoverDomainReason_StoredInDomainData(t *testing.T) {
87+
// This test verifies that the reason provided during failover
88+
// is properly converted to domain data for storage.
89+
90+
failoverRequest := &types.FailoverDomainRequest{
91+
DomainName: "test-domain",
92+
DomainActiveClusterName: common.StringPtr("cluster2"),
93+
Reason: "Emergency failover due to cluster1 outage",
94+
}
95+
96+
// Call ToUpdateDomainRequest and verify reason is in data
97+
updateReq := failoverRequest.ToUpdateDomainRequest()
98+
99+
assert.NotNil(t, updateReq)
100+
assert.Equal(t, "test-domain", updateReq.Name)
101+
assert.Equal(t, common.StringPtr("cluster2"), updateReq.ActiveClusterName)
102+
assert.NotNil(t, updateReq.Data)
103+
assert.Equal(t, "Emergency failover due to cluster1 outage", updateReq.Data["FailoverReason"])
104+
}

service/frontend/api/request_validator.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,11 @@ func (v *requestValidatorImpl) ValidateFailoverDomainRequest(ctx context.Context
378378
return &types.BadRequestError{Message: "DomainActiveClusterName or ActiveClusters must be provided to failover the domain"}
379379
}
380380

381+
// Validate that a reason is provided
382+
if failoverDomainRequest.GetReason() == "" {
383+
return &types.BadRequestError{Message: "Reason must be provided for domain failover"}
384+
}
385+
381386
// Security token is not required for failover request - reject the failover if the cluster is in lockdown
382387
return checkFailOverPermission(v.config, failoverDomainRequest.GetDomainName())
383388
}

0 commit comments

Comments
 (0)