1919import static com .google .common .truth .Truth .assertWithMessage ;
2020import static com .google .common .truth .TruthJUnit .assume ;
2121
22+ import com .google .api .gax .rpc .FailedPreconditionException ;
2223import com .google .cloud .Policy ;
2324import com .google .cloud .bigtable .admin .v2 .BigtableInstanceAdminClient ;
2425import com .google .cloud .bigtable .admin .v2 .models .AppProfile ;
3637import com .google .cloud .bigtable .test_helpers .env .EmulatorEnv ;
3738import com .google .cloud .bigtable .test_helpers .env .PrefixGenerator ;
3839import com .google .cloud .bigtable .test_helpers .env .TestEnvRule ;
40+ import java .time .Duration ;
3941import java .util .List ;
42+ import java .util .logging .Level ;
43+ import java .util .logging .Logger ;
4044import org .junit .Before ;
4145import org .junit .BeforeClass ;
4246import org .junit .ClassRule ;
4953public class BigtableInstanceAdminClientIT {
5054
5155 @ ClassRule public static TestEnvRule testEnvRule = new TestEnvRule ();
56+ private static final Logger logger =
57+ Logger .getLogger (BigtableInstanceAdminClientIT .class .getName ());
5258 @ Rule public final PrefixGenerator prefixGenerator = new PrefixGenerator ();
5359
5460 private String instanceId = testEnvRule .env ().getInstanceId ();
@@ -410,7 +416,7 @@ public void createClusterWithAutoscalingTest() {
410416 }
411417
412418 @ Test
413- public void createClusterWithAutoscalingAndPartialUpdateTest () {
419+ public void createClusterWithAutoscalingAndPartialUpdateTest () throws Exception {
414420 String newInstanceId = prefixGenerator .newPrefix ();
415421 String newClusterId = newInstanceId + "-c1" ;
416422
@@ -448,8 +454,16 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
448454 assertThat (retrievedCluster .getAutoscalingCpuPercentageTarget ()).isEqualTo (20 );
449455 assertThat (retrievedCluster .getStorageUtilizationGibPerNode ()).isEqualTo (2561 );
450456
457+ // The test might trigger cluster autoscaling, which races against the update cluster calls in
458+ // this test and causing the update cluster calls to fail with "FAILED_PRECONDITION: Cannot
459+ // update cluster that is currently being modified" error.
460+ // In order to avoid test flakiness due to this race condition, we wrap all the update cluster
461+ // call with a retry loop.
462+ // TODO: After we have a proper fix for the issue, remove the
463+ // updateClusterAutoScalingConfigWithRetry function and all the calls to it.
464+
451465 Cluster updatedCluster =
452- client . updateClusterAutoscalingConfig (
466+ updateClusterAutoScalingConfigWithRetry (
453467 ClusterAutoscalingConfig .of (newInstanceId , clusterId ).setMaxNodes (3 ));
454468 assertThat (updatedCluster .getAutoscalingMinServeNodes ()).isEqualTo (1 );
455469 assertThat (updatedCluster .getAutoscalingMaxServeNodes ()).isEqualTo (3 );
@@ -463,7 +477,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
463477 assertThat (retrievedUpdatedCluster .getStorageUtilizationGibPerNode ()).isEqualTo (2561 );
464478
465479 updatedCluster =
466- client . updateClusterAutoscalingConfig (
480+ updateClusterAutoScalingConfigWithRetry (
467481 ClusterAutoscalingConfig .of (newInstanceId , clusterId ).setMinNodes (2 ));
468482 assertThat (updatedCluster .getAutoscalingMinServeNodes ()).isEqualTo (2 );
469483 assertThat (updatedCluster .getAutoscalingMaxServeNodes ()).isEqualTo (3 );
@@ -477,7 +491,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
477491 assertThat (retrievedUpdatedCluster .getStorageUtilizationGibPerNode ()).isEqualTo (2561 );
478492
479493 updatedCluster =
480- client . updateClusterAutoscalingConfig (
494+ updateClusterAutoScalingConfigWithRetry (
481495 ClusterAutoscalingConfig .of (newInstanceId , clusterId )
482496 .setCpuUtilizationTargetPercent (40 ));
483497 assertThat (updatedCluster .getAutoscalingMinServeNodes ()).isEqualTo (2 );
@@ -492,7 +506,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
492506 assertThat (retrievedUpdatedCluster .getStorageUtilizationGibPerNode ()).isEqualTo (2561 );
493507
494508 updatedCluster =
495- client . updateClusterAutoscalingConfig (
509+ updateClusterAutoScalingConfigWithRetry (
496510 ClusterAutoscalingConfig .of (newInstanceId , clusterId )
497511 .setCpuUtilizationTargetPercent (45 )
498512 .setMaxNodes (5 ));
@@ -508,7 +522,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
508522 assertThat (retrievedUpdatedCluster .getStorageUtilizationGibPerNode ()).isEqualTo (2561 );
509523
510524 updatedCluster =
511- client . updateClusterAutoscalingConfig (
525+ updateClusterAutoScalingConfigWithRetry (
512526 ClusterAutoscalingConfig .of (newInstanceId , clusterId )
513527 .setStorageUtilizationGibPerNode (2777 ));
514528 assertThat (updatedCluster .getAutoscalingMinServeNodes ()).isEqualTo (2 );
@@ -523,7 +537,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
523537 assertThat (retrievedUpdatedCluster .getStorageUtilizationGibPerNode ()).isEqualTo (2777 );
524538
525539 updatedCluster =
526- client . updateClusterAutoscalingConfig (
540+ updateClusterAutoScalingConfigWithRetry (
527541 ClusterAutoscalingConfig .of (newInstanceId , clusterId )
528542 // testing default case
529543 .setStorageUtilizationGibPerNode (0 ));
@@ -614,4 +628,20 @@ private void basicClusterOperationTestHelper(String targetInstanceId, String tar
614628 assertThat (updatedCluster .getAutoscalingCpuPercentageTarget ()).isEqualTo (0 );
615629 assertThat (updatedCluster .getStorageUtilizationGibPerNode ()).isEqualTo (0 );
616630 }
631+
632+ private Cluster updateClusterAutoScalingConfigWithRetry (
633+ ClusterAutoscalingConfig clusterAutoscalingConfig ) throws Exception {
634+ int retryCount = 0 ;
635+ int maxRetries = 10 ;
636+ while (true ) {
637+ try {
638+ return client .updateClusterAutoscalingConfig (clusterAutoscalingConfig );
639+ } catch (FailedPreconditionException e ) {
640+ if (++retryCount == maxRetries ) throw e ;
641+ logger .log (
642+ Level .INFO , "Retrying updateClusterAutoscalingConfig, retryCount: " + retryCount );
643+ Thread .sleep (Duration .ofMinutes (1 ).toMillis ());
644+ }
645+ }
646+ }
617647}
0 commit comments