Skip to content

Commit 1ec9f3b

Browse files
chore: Refactoring/domain handler update 1/n (#7395)
<!-- Describe what has changed in this PR --> **What changed?** This is a refactoring of the domain-handler with the intent of making the pathing complexity in that method more tractable. The idea is pretty simple: Separate out the following pathways: - Local-domain updates (in this PR) - Global-domain configuration updates (in following PRs) - Failovers - Normal active/passive failover - AA failover and in doing so, unify the UpdateDomain Failover functionality and the new Failover endpoint, which is really a subset of functionality of UpdateDomain. The reason for this is because the handler's extremely hard to follow and has a lot of needless complexity through it's overloaded use-cases. The idea being that by splitting out the pathways as mentioned above, they can follow a much more standard pattern of: - Request validation - Fetching existing data - performing writes Without huge amounts of conditionals and guards. This should make validation a lot easier (right now it's really hard to work out what's permissable). It should make future edits easier. Importantly for my work it should unblock the ability to write changes to the failoverHistory endpoint, since otherwise crowbaring that it is too hard. <!-- How have you verified this change? Tested locally? Added a unit test? Checked in staging env? --> **How did you test it?** Unit tests and manual local testing <!-- Assuming the worst case, what can be broken when deploying this change to production? --> **Potential risks** This could break domain updates, failover, registration if there's a bug, so some care is warranted. --------- Signed-off-by: David Porter <[email protected]>
1 parent 0501d3d commit 1ec9f3b

File tree

7 files changed

+143
-19
lines changed

7 files changed

+143
-19
lines changed

common/domain/attrValidator.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,16 @@ func newAttrValidator(
4848
}
4949
}
5050

51+
func (d *AttrValidatorImpl) validateLocalDomainUpdateRequest(updateRequest *types.UpdateDomainRequest) error {
52+
if updateRequest.ActiveClusterName != nil || updateRequest.ActiveClusters != nil || updateRequest.Clusters != nil {
53+
return errLocalDomainsCannotFailover
54+
}
55+
if updateRequest.FailoverTimeoutInSeconds != nil {
56+
return errLocalDomainsCannotFailover
57+
}
58+
return nil
59+
}
60+
5161
func (d *AttrValidatorImpl) validateDomainConfig(config *persistence.DomainConfig) error {
5262
if config.Retention < int32(d.minRetentionDays) {
5363
return errInvalidRetentionPeriod

common/domain/errors.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ var (
3333
errOngoingGracefulFailover = &types.BadRequestError{Message: "Cannot start concurrent graceful failover."}
3434
errInvalidGracefulFailover = &types.BadRequestError{Message: "Cannot start graceful failover without updating active cluster or in local domain."}
3535
errActiveClusterNameRequired = &types.BadRequestError{Message: "ActiveClusterName is required for all global domains."}
36+
errLocalDomainsCannotFailover = &types.BadRequestError{Message: "Local domains cannot perform failovers or change replication configuration"}
3637

3738
errInvalidRetentionPeriod = &types.BadRequestError{Message: "A valid retention period is not set on request."}
3839
errInvalidArchivalConfig = &types.BadRequestError{Message: "Invalid to enable archival without specifying a uri."}

common/domain/handler.go

Lines changed: 120 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,7 @@ func (d *handlerImpl) UpdateDomain(
427427
return nil, err
428428
}
429429

430+
// todo (david.porter) remove this and push the deepcopy into each of the branches
430431
getResponse := currentDomainState.DeepCopy()
431432

432433
info := getResponse.Info
@@ -439,10 +440,15 @@ func (d *handlerImpl) UpdateDomain(
439440
isGlobalDomain := getResponse.IsGlobalDomain
440441
gracefulFailoverEndTime := getResponse.FailoverEndTime
441442
currentActiveCluster := replicationConfig.ActiveClusterName
442-
currentActiveClusters := replicationConfig.ActiveClusters.DeepCopy()
443443
previousFailoverVersion := getResponse.PreviousFailoverVersion
444444
lastUpdatedTime := time.Unix(0, getResponse.LastUpdatedTime)
445445

446+
if !isGlobalDomain {
447+
return d.updateLocalDomain(ctx, updateRequest, getResponse, notificationVersion)
448+
}
449+
// todo (active-active) refactor the rest of this method to remove all branching for global domain variations
450+
// and the split between domain update and failover
451+
446452
// whether history archival config changed
447453
historyArchivalConfigChanged := false
448454
// whether visibility archival config changed
@@ -632,7 +638,7 @@ func (d *handlerImpl) UpdateDomain(
632638
failoverType,
633639
&currentActiveCluster,
634640
updateRequest.ActiveClusterName,
635-
currentActiveClusters,
641+
currentDomainState.ReplicationConfig.GetActiveClusters(),
636642
replicationConfig.ActiveClusters,
637643
))
638644
if err != nil {
@@ -663,6 +669,8 @@ func (d *handlerImpl) UpdateDomain(
663669
return nil, err
664670
}
665671
}
672+
// todo (david.porter) remove this - all domains at this point are global
673+
// leaving during the refactor just for clarity
666674
if isGlobalDomain {
667675
if err = d.domainReplicator.HandleTransmissionTask(
668676
ctx,
@@ -691,6 +699,116 @@ func (d *handlerImpl) UpdateDomain(
691699
return response, nil
692700
}
693701

702+
func (d *handlerImpl) updateLocalDomain(ctx context.Context,
703+
updateRequest *types.UpdateDomainRequest,
704+
currentState *persistence.GetDomainResponse,
705+
notificationVersion int64,
706+
) (*types.UpdateDomainResponse, error) {
707+
708+
err := d.domainAttrValidator.validateLocalDomainUpdateRequest(updateRequest)
709+
if err != nil {
710+
return nil, err
711+
}
712+
713+
// whether history archival config changed
714+
historyArchivalConfigChanged := false
715+
// whether visibility archival config changed
716+
visibilityArchivalConfigChanged := false
717+
// whether anything other than active cluster is changed
718+
configurationChanged := false
719+
720+
intendedDomainState := currentState.DeepCopy()
721+
722+
configVersion := currentState.ConfigVersion
723+
724+
now := d.timeSource.Now()
725+
726+
lastUpdatedTime := time.Unix(0, currentState.LastUpdatedTime)
727+
728+
// Update history archival state
729+
historyArchivalConfigChanged, err = d.updateHistoryArchivalState(intendedDomainState.Config, updateRequest)
730+
if err != nil {
731+
return nil, err
732+
}
733+
734+
// Update visibility archival state
735+
visibilityArchivalConfigChanged, err = d.updateVisibilityArchivalState(intendedDomainState.Config, updateRequest)
736+
if err != nil {
737+
return nil, err
738+
}
739+
740+
// Update domain info
741+
info, domainInfoChanged := d.updateDomainInfo(
742+
updateRequest,
743+
intendedDomainState.Info,
744+
)
745+
746+
// Update domain config
747+
config, domainConfigChanged, err := d.updateDomainConfiguration(
748+
updateRequest.GetName(),
749+
intendedDomainState.Config,
750+
updateRequest,
751+
)
752+
if err != nil {
753+
return nil, err
754+
}
755+
756+
// Update domain bad binary
757+
config, deleteBinaryChanged, err := d.updateDeleteBadBinary(
758+
config,
759+
updateRequest.DeleteBadBinary,
760+
)
761+
if err != nil {
762+
return nil, err
763+
}
764+
765+
configurationChanged = historyArchivalConfigChanged || visibilityArchivalConfigChanged || domainInfoChanged || domainConfigChanged || deleteBinaryChanged
766+
767+
if err = d.domainAttrValidator.validateDomainConfig(config); err != nil {
768+
return nil, err
769+
}
770+
771+
if err = d.domainAttrValidator.validateDomainReplicationConfigForLocalDomain(
772+
intendedDomainState.ReplicationConfig,
773+
); err != nil {
774+
return nil, err
775+
}
776+
777+
if configurationChanged {
778+
// set the versions
779+
if configurationChanged {
780+
configVersion = intendedDomainState.ConfigVersion + 1
781+
}
782+
783+
lastUpdatedTime = now
784+
785+
updateReq := createUpdateRequest(
786+
info,
787+
config,
788+
intendedDomainState.ReplicationConfig,
789+
configVersion,
790+
intendedDomainState.FailoverVersion,
791+
intendedDomainState.FailoverNotificationVersion,
792+
intendedDomainState.FailoverEndTime,
793+
intendedDomainState.PreviousFailoverVersion,
794+
lastUpdatedTime,
795+
notificationVersion,
796+
)
797+
798+
err = d.domainManager.UpdateDomain(ctx, &updateReq)
799+
if err != nil {
800+
return nil, err
801+
}
802+
}
803+
response := &types.UpdateDomainResponse{
804+
IsGlobalDomain: false,
805+
FailoverVersion: intendedDomainState.FailoverVersion,
806+
}
807+
response.DomainInfo, response.Configuration, response.ReplicationConfiguration = d.createResponse(info, config, intendedDomainState.ReplicationConfig)
808+
809+
return response, nil
810+
}
811+
694812
// FailoverDomain handles failover of the domain to a different cluster
695813
func (d *handlerImpl) FailoverDomain(
696814
ctx context.Context,

common/domain/handler_MasterCluster_test.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -427,8 +427,6 @@ func (s *domainHandlerGlobalDomainEnabledPrimaryClusterSuite) TestUpdateGetDomai
427427
VisibilityArchivalStatus: types.ArchivalStatusDisabled.Ptr(),
428428
VisibilityArchivalURI: common.StringPtr(""),
429429
BadBinaries: &types.BadBinaries{Binaries: map[string]*types.BadBinaryInfo{}},
430-
ActiveClusterName: common.StringPtr(s.ClusterMetadata.GetCurrentClusterName()),
431-
Clusters: clusters,
432430
})
433431
s.Nil(err)
434432
fnTest(

common/domain/handler_NotMasterCluster_test.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -398,8 +398,6 @@ func (s *domainHandlerGlobalDomainEnabledNotPrimaryClusterSuite) TestUpdateGetDo
398398
VisibilityArchivalStatus: types.ArchivalStatusDisabled.Ptr(),
399399
VisibilityArchivalURI: common.StringPtr(""),
400400
BadBinaries: &types.BadBinaries{Binaries: map[string]*types.BadBinaryInfo{}},
401-
ActiveClusterName: common.StringPtr(s.ClusterMetadata.GetCurrentClusterName()),
402-
Clusters: clusters,
403401
})
404402
s.Nil(err)
405403
fnTest(

common/domain/handler_test.go

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2210,7 +2210,7 @@ func TestHandler_UpdateDomain(t *testing.T) {
22102210
},
22112211
},
22122212
{
2213-
name: "Success case - local domain force failover",
2213+
name: "Error case - local domain force failover - shoudl not be able to failover a local domain",
22142214
setupMock: func(domainManager *persistence.MockDomainManager, updateRequest *types.UpdateDomainRequest, archivalMetadata *archiver.MockArchivalMetadata, timeSource clock.MockedTimeSource, _ *MockReplicator) {
22152215
domainResponse := &persistence.GetDomainResponse{
22162216
ReplicationConfig: &persistence.DomainReplicationConfig{
@@ -2239,17 +2239,6 @@ func TestHandler_UpdateDomain(t *testing.T) {
22392239
domainManager.EXPECT().GetMetadata(ctx).Return(&persistence.GetMetadataResponse{}, nil).Times(1)
22402240
domainManager.EXPECT().GetDomain(ctx, &persistence.GetDomainRequest{Name: updateRequest.GetName()}).
22412241
Return(domainResponse, nil).Times(1)
2242-
archivalConfig := archiver.NewArchivalConfig(
2243-
commonconstants.ArchivalDisabled,
2244-
dynamicproperties.GetStringPropertyFn(commonconstants.ArchivalDisabled),
2245-
false,
2246-
dynamicproperties.GetBoolPropertyFn(false),
2247-
commonconstants.ArchivalDisabled,
2248-
"")
2249-
archivalMetadata.On("GetHistoryConfig").Return(archivalConfig).Times(1)
2250-
archivalMetadata.On("GetVisibilityConfig").Return(archivalConfig).Times(1)
2251-
timeSource.Advance(time.Hour)
2252-
domainManager.EXPECT().UpdateDomain(ctx, gomock.Any()).Return(nil).Times(1)
22532242
},
22542243
request: &types.UpdateDomainRequest{
22552244
Name: constants.TestDomainName,
@@ -2281,6 +2270,7 @@ func TestHandler_UpdateDomain(t *testing.T) {
22812270
},
22822271
}
22832272
},
2273+
err: errLocalDomainsCannotFailover,
22842274
},
22852275
{
22862276
name: "Error case - GetMetadata error",
@@ -2417,7 +2407,7 @@ func TestHandler_UpdateDomain(t *testing.T) {
24172407
},
24182408
},
24192409
{
2420-
name: "Error case - handleGracefulFailover error",
2410+
name: "Error case - handleGracefulFailover error in the case of a global domain - it should return an error to the user",
24212411
setupMock: func(domainManager *persistence.MockDomainManager, updateRequest *types.UpdateDomainRequest, archivalMetadata *archiver.MockArchivalMetadata, _ clock.MockedTimeSource, _ *MockReplicator) {
24222412
domainManager.EXPECT().GetMetadata(ctx).Return(&persistence.GetMetadataResponse{}, nil).Times(1)
24232413
domainManager.EXPECT().GetDomain(ctx, &persistence.GetDomainRequest{Name: updateRequest.GetName()}).
@@ -2427,6 +2417,7 @@ func TestHandler_UpdateDomain(t *testing.T) {
24272417
},
24282418
ReplicationConfig: &persistence.DomainReplicationConfig{},
24292419
Config: &persistence.DomainConfig{},
2420+
IsGlobalDomain: true,
24302421
}, nil).Times(1)
24312422
archivalConfig := archiver.NewArchivalConfig(
24322423
commonconstants.ArchivalDisabled,
@@ -2452,6 +2443,7 @@ func TestHandler_UpdateDomain(t *testing.T) {
24522443
Return(&persistence.GetDomainResponse{
24532444
ReplicationConfig: &persistence.DomainReplicationConfig{},
24542445
Config: &persistence.DomainConfig{},
2446+
IsGlobalDomain: true,
24552447
Info: &persistence.DomainInfo{
24562448
Name: constants.TestDomainName,
24572449
},

common/persistence/data_manager_interfaces.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1933,6 +1933,13 @@ func (t *TimerTaskInfo) ToTask() (Task, error) {
19331933
}
19341934
}
19351935

1936+
func (c *DomainReplicationConfig) GetActiveClusters() *types.ActiveClusters {
1937+
if c != nil && c.ActiveClusters != nil {
1938+
return c.ActiveClusters
1939+
}
1940+
return nil
1941+
}
1942+
19361943
// ToNilSafeCopy
19371944
// TODO: it seems that we just need a nil safe shardInfo, deep copy is not necessary
19381945
func (s *ShardInfo) ToNilSafeCopy() *ShardInfo {

0 commit comments

Comments
 (0)