@@ -18,7 +18,13 @@ package general
1818
1919import (
2020 "context"
21+ "encoding/json"
2122 "fmt"
23+ << << << < HEAD
24+ == == == =
25+ "strconv"
26+ "strings"
27+ >> >> >> > e7888dfa83 (`vtorc` : support analysis ordering , improve semi - sync rollout (#19427 ))
2228 "testing"
2329 "time"
2430
@@ -893,3 +899,85 @@ func TestFullStatusConnectionPooling(t *testing.T) {
893899 assert .Equal (t , 200 , status )
894900 assert .Equal (t , "null" , resp )
895901}
902+
903+ // TestSemiSyncRecoveryOrdering verifies that when the durability policy changes
904+ // to semi_sync, VTOrc fixes ReplicaSemiSyncMustBeSet before PrimarySemiSyncMustBeSet.
905+ // This ordering is enforced by the AfterAnalyses/BeforeAnalyses dependencies.
906+ func TestSemiSyncRecoveryOrdering (t * testing.T ) {
907+ defer utils .PrintVTOrcLogsOnFailure (t , clusterInfo .ClusterInstance )
908+ // Start with durability "none" so no semi-sync is required initially.
909+ utils .SetupVttabletsAndVTOrcs (t , clusterInfo , 2 , 0 , nil , cluster.VTOrcConfiguration {
910+ PreventCrossCellFailover : true ,
911+ }, cluster .DefaultVtorcsByCell , policy .DurabilityNone )
912+ keyspace := & clusterInfo .ClusterInstance .Keyspaces [0 ]
913+ shard0 := & keyspace .Shards [0 ]
914+
915+ // Wait for primary election and healthy replication.
916+ primary := utils .ShardPrimaryTablet (t , clusterInfo , keyspace , shard0 )
917+ assert .NotNil (t , primary , "should have elected a primary" )
918+ utils .CheckReplication (t , clusterInfo , primary , shard0 .Vttablets , 10 * time .Second )
919+
920+ vtorc := clusterInfo .ClusterInstance .VTOrcProcesses [0 ]
921+ utils .WaitForSuccessfulRecoveryCount (t , vtorc , logic .ElectNewPrimaryRecoveryName , keyspace .Name , shard0 .Name , 1 )
922+
923+ // Change durability to semi_sync. VTOrc should detect that replicas and primary
924+ // need semi-sync enabled, and fix them in the correct order.
925+ out , err := clusterInfo .ClusterInstance .VtctldClientProcess .ExecuteCommandWithOutput (
926+ "SetKeyspaceDurabilityPolicy" , keyspace .Name , "--durability-policy=" + policy .DurabilitySemiSync )
927+ require .NoError (t , err , out )
928+
929+ // Poll the database-state API to verify recovery ordering.
930+ // The topology_recovery table has auto-incremented recovery_id values that
931+ // reflect execution order. All ReplicaSemiSyncMustBeSet recovery_ids should
932+ // be less than any PrimarySemiSyncMustBeSet recovery_id.
933+ type tableState struct {
934+ TableName string
935+ Rows []map [string ]any
936+ }
937+
938+ assert .EventuallyWithT (t , func (c * assert.CollectT ) {
939+ status , response , err := utils .MakeAPICall (t , vtorc , "/api/database-state" )
940+ assert .NoError (c , err )
941+ assert .Equal (c , 200 , status )
942+
943+ var tables []tableState
944+ if ! assert .NoError (c , json .Unmarshal ([]byte (response ), & tables )) {
945+ return
946+ }
947+
948+ var maxReplicaRecoveryID , minPrimaryRecoveryID int
949+ var replicaCount , primaryCount int
950+ for _ , table := range tables {
951+ if table .TableName != "topology_recovery" {
952+ continue
953+ }
954+ for _ , row := range table .Rows {
955+ analysis , _ := row ["analysis" ].(string )
956+ recoveryIDStr , _ := row ["recovery_id" ].(string )
957+ recoveryID , err := strconv .Atoi (recoveryIDStr )
958+ if err != nil {
959+ continue
960+ }
961+ switch inst .AnalysisCode (analysis ) {
962+ case inst .ReplicaSemiSyncMustBeSet :
963+ replicaCount ++
964+ if replicaCount == 1 || recoveryID > maxReplicaRecoveryID {
965+ maxReplicaRecoveryID = recoveryID
966+ }
967+ case inst .PrimarySemiSyncMustBeSet :
968+ primaryCount ++
969+ if primaryCount == 1 || recoveryID < minPrimaryRecoveryID {
970+ minPrimaryRecoveryID = recoveryID
971+ }
972+ }
973+ }
974+ }
975+
976+ assert .Greater (c , replicaCount , 0 , "should have ReplicaSemiSyncMustBeSet recoveries" )
977+ assert .Greater (c , primaryCount , 0 , "should have PrimarySemiSyncMustBeSet recoveries" )
978+ if replicaCount > 0 && primaryCount > 0 {
979+ assert .Less (c , maxReplicaRecoveryID , minPrimaryRecoveryID ,
980+ "all ReplicaSemiSyncMustBeSet recoveries should have lower recovery_id than PrimarySemiSyncMustBeSet" )
981+ }
982+ }, 30 * time .Second , time .Second )
983+ }
0 commit comments