diff --git a/common/domain/handler.go b/common/domain/handler.go index cf4597fa0dd..98b5e3d7ee1 100644 --- a/common/domain/handler.go +++ b/common/domain/handler.go @@ -656,7 +656,15 @@ func (d *handlerImpl) handleFailoverRequest(ctx context.Context, } response.DomainInfo, response.Configuration, response.ReplicationConfiguration = d.createResponse(intendedDomainState.Info, intendedDomainState.Config, intendedDomainState.ReplicationConfig) - err = d.updateDomainAuditLog(ctx, currentState, intendedDomainState, persistence.DomainAuditOperationTypeFailover, "domain failover") + // Extract failover reason from update request data + failoverMessage := "domain failover" + if updateRequest.Data != nil { + if reason, ok := updateRequest.Data["FailoverReason"]; ok && reason != "" { + failoverMessage = fmt.Sprintf("domain failover: %s", reason) + } + } + + err = d.updateDomainAuditLog(ctx, currentState, intendedDomainState, persistence.DomainAuditOperationTypeFailover, failoverMessage) if err != nil { return nil, err } diff --git a/common/domain/handler_test.go b/common/domain/handler_test.go index 3e7fa28e553..c02ea03c96e 100644 --- a/common/domain/handler_test.go +++ b/common/domain/handler_test.go @@ -3685,6 +3685,7 @@ func TestHandler_FailoverDomain(t *testing.T) { request: &types.FailoverDomainRequest{ DomainName: constants.TestDomainName, DomainActiveClusterName: common.Ptr(clusterB), + Reason: "Planned maintenance on cluster A", }, response: func(timeSource clock.MockedTimeSource) *types.FailoverDomainResponse { data, _ := json.Marshal([]FailoverEvent{ @@ -3730,6 +3731,7 @@ func TestHandler_FailoverDomain(t *testing.T) { request: &types.FailoverDomainRequest{ DomainName: constants.TestDomainName, DomainActiveClusterName: common.Ptr(cluster.TestAlternativeClusterName), + Reason: "Testing domain not found error", }, err: &types.EntityNotExistsError{Message: "Domain not found"}, }, @@ -3767,6 +3769,7 @@ func TestHandler_FailoverDomain(t *testing.T) { request: &types.FailoverDomainRequest{ DomainName: constants.TestDomainName, DomainActiveClusterName: common.Ptr(cluster.TestCurrentClusterName), + Reason: "Testing update too frequent error", }, err: errDomainUpdateTooFrequent, }, diff --git a/common/types/mapper/proto/api.go b/common/types/mapper/proto/api.go index cb03abe7315..8175082be12 100644 --- a/common/types/mapper/proto/api.go +++ b/common/types/mapper/proto/api.go @@ -4516,6 +4516,7 @@ func FromFailoverDomainRequest(t *types.FailoverDomainRequest) *apiv1.FailoverDo DomainName: t.DomainName, DomainActiveClusterName: t.GetDomainActiveClusterName(), ActiveClusters: FromActiveClusters(t.ActiveClusters), + Reason: t.Reason, } } @@ -4527,6 +4528,7 @@ func ToFailoverDomainRequest(t *apiv1.FailoverDomainRequest) *types.FailoverDoma DomainName: t.DomainName, DomainActiveClusterName: common.StringPtr(t.DomainActiveClusterName), ActiveClusters: ToActiveClusters(t.ActiveClusters), + Reason: t.Reason, } } diff --git a/common/types/mapper/thrift/shared.go b/common/types/mapper/thrift/shared.go index 2b63a43bf6c..1f27912e546 100644 --- a/common/types/mapper/thrift/shared.go +++ b/common/types/mapper/thrift/shared.go @@ -1623,6 +1623,7 @@ func FromFailoverDomainRequest(t *types.FailoverDomainRequest) *shared.FailoverD DomainName: &t.DomainName, DomainActiveClusterName: t.DomainActiveClusterName, ActiveClusters: FromActiveClusters(t.ActiveClusters), + Reason: &t.Reason, } } @@ -1635,6 +1636,7 @@ func ToFailoverDomainRequest(t *shared.FailoverDomainRequest) *types.FailoverDom DomainName: t.GetDomainName(), DomainActiveClusterName: t.DomainActiveClusterName, ActiveClusters: ToActiveClusters(t.ActiveClusters), + Reason: t.GetReason(), } } diff --git a/common/types/shared.go b/common/types/shared.go index ad861ef5c29..434716814bf 100644 --- a/common/types/shared.go +++ b/common/types/shared.go @@ -1782,16 +1782,25 @@ type FailoverDomainRequest struct { DomainName string `json:"domainName,omitempty"` DomainActiveClusterName *string `json:"domainActiveClusterName,omitempty"` ActiveClusters *ActiveClusters `json:"activeClusters,omitempty"` + Reason string `json:"reason,omitempty"` } func (v *FailoverDomainRequest) ToUpdateDomainRequest() *UpdateDomainRequest { if v == nil { return nil } + // Include failover reason in domain data if provided + var data map[string]string + if v.Reason != "" { + data = map[string]string{ + "FailoverReason": v.Reason, + } + } return &UpdateDomainRequest{ Name: v.DomainName, ActiveClusterName: v.DomainActiveClusterName, ActiveClusters: v.ActiveClusters, + Data: data, } } @@ -1819,6 +1828,14 @@ func (v *FailoverDomainRequest) GetDomain() (o string) { return } +// GetReason is an internal getter (TBD...) +func (v *FailoverDomainRequest) GetReason() (o string) { + if v != nil { + return v.Reason + } + return +} + // FailoverDomainResponse is an internal type (TBD...) type FailoverDomainResponse struct { DomainInfo *DomainInfo `json:"domainInfo,omitempty"` diff --git a/go.mod b/go.mod index 7511e0893f0..cf7078d0b25 100644 --- a/go.mod +++ b/go.mod @@ -167,3 +167,5 @@ replace github.com/apache/thrift => github.com/apache/thrift v0.0.0-201612212036 // DO NOT USE as it misses mysql/config store fix retract v1.2.3 + +replace github.com/uber/cadence-idl => github.com/zawadzkidiana/cadence-idl v0.0.0-20251112233538-63b7e01d0963 diff --git a/go.sum b/go.sum index 7a63bf2a971..91a1b1597b8 100644 --- a/go.sum +++ b/go.sum @@ -442,9 +442,6 @@ github.com/uber-go/mapdecode v1.0.0/go.mod h1:b5nP15FwXTgpjTjeA9A2uTHXV5UJCl4arw github.com/uber-go/tally v3.3.12+incompatible/go.mod h1:YDTIBxdXyOU/sCWilKB4bgyufu1cEi0jdVnRdxvjnmU= github.com/uber-go/tally v3.3.15+incompatible h1:9hLSgNBP28CjIaDmAuRTq9qV+UZY+9PcvAkXO4nNMwg= github.com/uber-go/tally v3.3.15+incompatible/go.mod h1:YDTIBxdXyOU/sCWilKB4bgyufu1cEi0jdVnRdxvjnmU= -github.com/uber/cadence-idl v0.0.0-20211111101836-d6b70b60eb8c/go.mod h1:oyUK7GCNCRHCCyWyzifSzXpVrRYVBbAMHAzF5dXiKws= -github.com/uber/cadence-idl v0.0.0-20251027162905-7b9d8a31de8c h1:fd00g0M50UuZYetChIAPYUchvi1M+gL3rhbWwOQ6GhE= -github.com/uber/cadence-idl v0.0.0-20251027162905-7b9d8a31de8c/go.mod h1:oyUK7GCNCRHCCyWyzifSzXpVrRYVBbAMHAzF5dXiKws= github.com/uber/jaeger-client-go v2.22.1+incompatible h1:NHcubEkVbahf9t3p75TOCR83gdUHXjRJvjoBh1yACsM= github.com/uber/jaeger-client-go v2.22.1+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk= github.com/uber/jaeger-lib v2.2.0+incompatible h1:MxZXOiR2JuoANZ3J6DE/U0kSFv/eJ/GfSYVCjK7dyaw= @@ -485,6 +482,8 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/zawadzkidiana/cadence-idl v0.0.0-20251112233538-63b7e01d0963 h1:1EW62MeL+ZKpxrJsLTc+IjtMablb5lHOECRf9oleBk8= +github.com/zawadzkidiana/cadence-idl v0.0.0-20251112233538-63b7e01d0963/go.mod h1:oyUK7GCNCRHCCyWyzifSzXpVrRYVBbAMHAzF5dXiKws= go.mongodb.org/mongo-driver v1.7.3 h1:G4l/eYY9VrQAK/AUgkV0koQKzQnyddnWxrd/Etf0jIs= go.mongodb.org/mongo-driver v1.7.3/go.mod h1:NqaYOwnXWr5Pm7AOpO5QFxKJ503nbMse/R79oO62zWg= go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= diff --git a/go.work.sum b/go.work.sum index 2b1625d8c7d..b6d07ef505f 100644 --- a/go.work.sum +++ b/go.work.sum @@ -659,6 +659,8 @@ github.com/yudai/pp v2.0.1+incompatible h1:Q4//iY4pNF6yPLZIigmvcl7k/bPgrcTPIFIcm github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE= github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg= github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= +github.com/zawadzkidiana/cadence-idl v0.0.0-20251112233538-63b7e01d0963 h1:1EW62MeL+ZKpxrJsLTc+IjtMablb5lHOECRf9oleBk8= +github.com/zawadzkidiana/cadence-idl v0.0.0-20251112233538-63b7e01d0963/go.mod h1:oyUK7GCNCRHCCyWyzifSzXpVrRYVBbAMHAzF5dXiKws= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= go.etcd.io/bbolt v1.3.4 h1:hi1bXHMVrlQh6WwxAy+qZCV/SYIlqo+Ushwdpa4tAKg= diff --git a/service/frontend/api/domain_failover_integration_test.go b/service/frontend/api/domain_failover_integration_test.go new file mode 100644 index 00000000000..2677fad8ef8 --- /dev/null +++ b/service/frontend/api/domain_failover_integration_test.go @@ -0,0 +1,104 @@ +// Copyright (c) 2017 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package api + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/uber/cadence/common" + "github.com/uber/cadence/common/types" +) + +func TestFailoverDomainValidation_WithoutReason_ExpectError(t *testing.T) { + // This test verifies that when a failover is attempted without providing a reason, + // the validation fails and returns an appropriate error. + + v, _ := setupMocksForRequestValidator(t) + + // Test case 1: Failover request WITHOUT a reason + failoverRequestWithoutReason := &types.FailoverDomainRequest{ + DomainName: "test-domain", + DomainActiveClusterName: common.StringPtr("cluster2"), + // Reason is intentionally missing + } + + err := v.ValidateFailoverDomainRequest(context.Background(), failoverRequestWithoutReason) + + // Verify we get the expected error + assert.Error(t, err) + assert.Contains(t, err.Error(), "Reason must be provided for domain failover") + + // Test case 2: Failover request with empty reason + failoverRequestEmptyReason := &types.FailoverDomainRequest{ + DomainName: "test-domain", + DomainActiveClusterName: common.StringPtr("cluster2"), + Reason: "", // Empty reason + } + + err = v.ValidateFailoverDomainRequest(context.Background(), failoverRequestEmptyReason) + + // Verify we get the expected error + assert.Error(t, err) + assert.Contains(t, err.Error(), "Reason must be provided for domain failover") +} + +func TestFailoverDomainValidation_WithReason_ExpectSuccess(t *testing.T) { + // This test verifies that when a failover is attempted with a valid reason, + // the validation succeeds. + + v, _ := setupMocksForRequestValidator(t) + + // Create a failover request WITH a reason + failoverRequestWithReason := &types.FailoverDomainRequest{ + DomainName: "test-domain", + DomainActiveClusterName: common.StringPtr("cluster2"), + Reason: "Planned maintenance on cluster1 - upgrading to v1.2.3", + } + + err := v.ValidateFailoverDomainRequest(context.Background(), failoverRequestWithReason) + + // Verify validation passes + require.NoError(t, err) +} + +func TestFailoverDomainReason_StoredInDomainData(t *testing.T) { + // This test verifies that the reason provided during failover + // is properly converted to domain data for storage. + + failoverRequest := &types.FailoverDomainRequest{ + DomainName: "test-domain", + DomainActiveClusterName: common.StringPtr("cluster2"), + Reason: "Emergency failover due to cluster1 outage", + } + + // Call ToUpdateDomainRequest and verify reason is in data + updateReq := failoverRequest.ToUpdateDomainRequest() + + assert.NotNil(t, updateReq) + assert.Equal(t, "test-domain", updateReq.Name) + assert.Equal(t, common.StringPtr("cluster2"), updateReq.ActiveClusterName) + assert.NotNil(t, updateReq.Data) + assert.Equal(t, "Emergency failover due to cluster1 outage", updateReq.Data["FailoverReason"]) +} diff --git a/service/frontend/api/request_validator.go b/service/frontend/api/request_validator.go index e9078fac21e..3542e9d82fd 100644 --- a/service/frontend/api/request_validator.go +++ b/service/frontend/api/request_validator.go @@ -378,6 +378,11 @@ func (v *requestValidatorImpl) ValidateFailoverDomainRequest(ctx context.Context return &types.BadRequestError{Message: "DomainActiveClusterName or ActiveClusters must be provided to failover the domain"} } + // Validate that a reason is provided + if failoverDomainRequest.GetReason() == "" { + return &types.BadRequestError{Message: "Reason must be provided for domain failover"} + } + // Security token is not required for failover request - reject the failover if the cluster is in lockdown return checkFailOverPermission(v.config, failoverDomainRequest.GetDomainName()) } diff --git a/service/frontend/api/request_validator_failover_test.go b/service/frontend/api/request_validator_failover_test.go new file mode 100644 index 00000000000..a34ec4bcb75 --- /dev/null +++ b/service/frontend/api/request_validator_failover_test.go @@ -0,0 +1,118 @@ +// Copyright (c) 2017 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package api + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/uber/cadence/common" + "github.com/uber/cadence/common/types" +) + +func TestValidateFailoverDomainRequest(t *testing.T) { + testCases := []struct { + name string + req *types.FailoverDomainRequest + expectError bool + expectedError string + }{ + { + name: "valid request", + req: &types.FailoverDomainRequest{ + DomainName: "test-domain", + DomainActiveClusterName: common.StringPtr("cluster2"), + Reason: "Planned maintenance on cluster1", + }, + expectError: false, + }, + { + name: "nil request", + req: nil, + expectError: true, + expectedError: "Request is nil.", + }, + { + name: "empty domain name", + req: &types.FailoverDomainRequest{ + DomainActiveClusterName: common.StringPtr("cluster2"), + Reason: "Planned maintenance", + }, + expectError: true, + expectedError: "Domain not set on request.", + }, + { + name: "missing cluster info", + req: &types.FailoverDomainRequest{ + DomainName: "test-domain", + Reason: "Planned maintenance", + }, + expectError: true, + expectedError: "DomainActiveClusterName or ActiveClusters must be provided to failover the domain", + }, + { + name: "missing reason", + req: &types.FailoverDomainRequest{ + DomainName: "test-domain", + DomainActiveClusterName: common.StringPtr("cluster2"), + }, + expectError: true, + expectedError: "Reason must be provided for domain failover", + }, + { + name: "empty reason", + req: &types.FailoverDomainRequest{ + DomainName: "test-domain", + DomainActiveClusterName: common.StringPtr("cluster2"), + Reason: "", + }, + expectError: true, + expectedError: "Reason must be provided for domain failover", + }, + // TODO: Fix ActiveClusters test case structure + // { + // name: "valid request with active clusters", + // req: &types.FailoverDomainRequest{ + // DomainName: "test-domain", + // ActiveClusters: &types.ActiveClusters{ + // AttributeScopes: []string{"cluster1", "cluster2"}, + // }, + // Reason: "Load balancing across clusters", + // }, + // expectError: false, + // }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + v, _ := setupMocksForRequestValidator(t) + + err := v.ValidateFailoverDomainRequest(context.Background(), tc.req) + if tc.expectError { + assert.ErrorContains(t, err, tc.expectedError) + } else { + assert.NoError(t, err) + } + }) + } +} diff --git a/tools/cli/domain_commands.go b/tools/cli/domain_commands.go index 0e25d79f798..9d173805348 100644 --- a/tools/cli/domain_commands.go +++ b/tools/cli/domain_commands.go @@ -461,6 +461,15 @@ func (d *domainCLIImpl) FailoverDomain(c *cli.Context) error { DomainName: domainName, } + if !c.IsSet(FlagReason) { + return commoncli.Problem(fmt.Sprintf("%s flag is required for domain failover.", FlagReason), nil) + } + failoverReason := strings.TrimSpace(c.String(FlagReason)) + if failoverReason == "" { + return commoncli.Problem("Reason must be a non-empty string.", nil) + } + failoverRequest.Reason = failoverReason + ctx, cancel, err := newContext(c) defer cancel() if err != nil {