@@ -777,6 +777,125 @@ func TestRemoveTablet(t *testing.T) {
777
777
assert .Empty (t , a , "wrong result, expected empty list" )
778
778
}
779
779
780
+ // When an external primary failover is performed,
781
+ // the demoted primary will advertise itself as a `PRIMARY`
782
+ // tablet until it recognizes that it was demoted,
783
+ // and until all in-flight operations have either finished
784
+ // (successfully or unsuccessfully, see `--shutdown_grace_period` flag).
785
+ //
786
+ // During this time, operations like `RemoveTablet` should not lead
787
+ // to multiple tablets becoming valid targets for `PRIMARY`.
788
+ func TestRemoveTabletDuringExternalReparenting (t * testing.T ) {
789
+ // reset error counters
790
+ hcErrorCounters .ResetAll ()
791
+ ts := memorytopo .NewServer ("cell" )
792
+ defer ts .Close ()
793
+ hc := createTestHc (ts )
794
+ // close healthcheck
795
+ defer hc .Close ()
796
+
797
+ firstTablet := createTestTablet (0 , "cell" , "a" )
798
+ firstTablet .Type = topodatapb .TabletType_PRIMARY
799
+
800
+ secondTablet := createTestTablet (1 , "cell" , "b" )
801
+ secondTablet .Type = topodatapb .TabletType_REPLICA
802
+
803
+ thirdTablet := createTestTablet (2 , "cell" , "c" )
804
+ thirdTablet .Type = topodatapb .TabletType_REPLICA
805
+
806
+ firstTabletHealthStream := make (chan * querypb.StreamHealthResponse )
807
+ firstTabletConn := createFakeConn (firstTablet , firstTabletHealthStream )
808
+ firstTabletConn .errCh = make (chan error )
809
+
810
+ secondTabletHealthStream := make (chan * querypb.StreamHealthResponse )
811
+ secondTabletConn := createFakeConn (secondTablet , secondTabletHealthStream )
812
+ secondTabletConn .errCh = make (chan error )
813
+
814
+ thirdTabletHealthStream := make (chan * querypb.StreamHealthResponse )
815
+ thirdTabletConn := createFakeConn (thirdTablet , thirdTabletHealthStream )
816
+ thirdTabletConn .errCh = make (chan error )
817
+
818
+ resultChan := hc .Subscribe ()
819
+
820
+ hc .AddTablet (firstTablet )
821
+ hc .AddTablet (secondTablet )
822
+ hc .AddTablet (thirdTablet )
823
+
824
+ <- resultChan
825
+ <- resultChan
826
+
827
+ firstTabletPrimaryTermStartTimestamp := time .Now ().Unix () - 10
828
+
829
+ firstTabletHealthStream <- & querypb.StreamHealthResponse {
830
+ TabletAlias : firstTablet .Alias ,
831
+ Target : & querypb.Target {Keyspace : "k" , Shard : "s" , TabletType : topodatapb .TabletType_PRIMARY },
832
+ Serving : true ,
833
+
834
+ TabletExternallyReparentedTimestamp : firstTabletPrimaryTermStartTimestamp ,
835
+ RealtimeStats : & querypb.RealtimeStats {ReplicationLagSeconds : 0 , CpuUsage : 0.5 },
836
+ }
837
+
838
+ secondTabletHealthStream <- & querypb.StreamHealthResponse {
839
+ TabletAlias : secondTablet .Alias ,
840
+ Target : & querypb.Target {Keyspace : "k" , Shard : "s" , TabletType : topodatapb .TabletType_REPLICA },
841
+ Serving : true ,
842
+
843
+ TabletExternallyReparentedTimestamp : 0 ,
844
+ RealtimeStats : & querypb.RealtimeStats {ReplicationLagSeconds : 1 , CpuUsage : 0.5 },
845
+ }
846
+
847
+ thirdTabletHealthStream <- & querypb.StreamHealthResponse {
848
+ TabletAlias : thirdTablet .Alias ,
849
+ Target : & querypb.Target {Keyspace : "k" , Shard : "s" , TabletType : topodatapb .TabletType_REPLICA },
850
+ Serving : true ,
851
+
852
+ TabletExternallyReparentedTimestamp : 0 ,
853
+ RealtimeStats : & querypb.RealtimeStats {ReplicationLagSeconds : 1 , CpuUsage : 0.5 },
854
+ }
855
+
856
+ <- resultChan
857
+ <- resultChan
858
+ <- resultChan
859
+
860
+ secondTabletPrimaryTermStartTimestamp := time .Now ().Unix ()
861
+
862
+ // Simulate a failover
863
+ firstTabletHealthStream <- & querypb.StreamHealthResponse {
864
+ TabletAlias : firstTablet .Alias ,
865
+ Target : & querypb.Target {Keyspace : "k" , Shard : "s" , TabletType : topodatapb .TabletType_PRIMARY },
866
+ Serving : true ,
867
+
868
+ TabletExternallyReparentedTimestamp : firstTabletPrimaryTermStartTimestamp ,
869
+ RealtimeStats : & querypb.RealtimeStats {ReplicationLagSeconds : 0 , CpuUsage : 0.5 },
870
+ }
871
+
872
+ secondTabletHealthStream <- & querypb.StreamHealthResponse {
873
+ TabletAlias : secondTablet .Alias ,
874
+ Target : & querypb.Target {Keyspace : "k" , Shard : "s" , TabletType : topodatapb .TabletType_PRIMARY },
875
+ Serving : true ,
876
+
877
+ TabletExternallyReparentedTimestamp : secondTabletPrimaryTermStartTimestamp ,
878
+ RealtimeStats : & querypb.RealtimeStats {ReplicationLagSeconds : 0 , CpuUsage : 0.5 },
879
+ }
880
+
881
+ <- resultChan
882
+ <- resultChan
883
+
884
+ hc .RemoveTablet (thirdTablet )
885
+
886
+ // `secondTablet` should be the primary now
887
+ expectedTabletStats := []* TabletHealth {{
888
+ Tablet : secondTablet ,
889
+ Target : & querypb.Target {Keyspace : "k" , Shard : "s" , TabletType : topodatapb .TabletType_PRIMARY },
890
+ Serving : true ,
891
+ Stats : & querypb.RealtimeStats {ReplicationLagSeconds : 0 , CpuUsage : 0.5 },
892
+ PrimaryTermStartTime : secondTabletPrimaryTermStartTimestamp ,
893
+ }}
894
+
895
+ actualTabletStats := hc .GetHealthyTabletStats (& querypb.Target {Keyspace : "k" , Shard : "s" , TabletType : topodatapb .TabletType_PRIMARY })
896
+ mustMatch (t , expectedTabletStats , actualTabletStats , "unexpected result" )
897
+ }
898
+
780
899
// TestGetHealthyTablets tests the functionality of GetHealthyTabletStats.
781
900
func TestGetHealthyTablets (t * testing.T ) {
782
901
ts := memorytopo .NewServer ("cell" )
0 commit comments