19
19
import static com .google .common .truth .Truth .assertWithMessage ;
20
20
import static com .google .common .truth .TruthJUnit .assume ;
21
21
22
+ import com .google .api .gax .rpc .FailedPreconditionException ;
22
23
import com .google .cloud .Policy ;
23
24
import com .google .cloud .bigtable .admin .v2 .BigtableInstanceAdminClient ;
24
25
import com .google .cloud .bigtable .admin .v2 .models .AppProfile ;
36
37
import com .google .cloud .bigtable .test_helpers .env .EmulatorEnv ;
37
38
import com .google .cloud .bigtable .test_helpers .env .PrefixGenerator ;
38
39
import com .google .cloud .bigtable .test_helpers .env .TestEnvRule ;
40
+ import java .time .Duration ;
39
41
import java .util .List ;
42
+ import java .util .logging .Level ;
43
+ import java .util .logging .Logger ;
40
44
import org .junit .Before ;
41
45
import org .junit .BeforeClass ;
42
46
import org .junit .ClassRule ;
49
53
public class BigtableInstanceAdminClientIT {
50
54
51
55
@ ClassRule public static TestEnvRule testEnvRule = new TestEnvRule ();
56
+ private static final Logger logger =
57
+ Logger .getLogger (BigtableInstanceAdminClientIT .class .getName ());
52
58
@ Rule public final PrefixGenerator prefixGenerator = new PrefixGenerator ();
53
59
54
60
private String instanceId = testEnvRule .env ().getInstanceId ();
@@ -410,7 +416,7 @@ public void createClusterWithAutoscalingTest() {
410
416
}
411
417
412
418
@ Test
413
- public void createClusterWithAutoscalingAndPartialUpdateTest () {
419
+ public void createClusterWithAutoscalingAndPartialUpdateTest () throws Exception {
414
420
String newInstanceId = prefixGenerator .newPrefix ();
415
421
String newClusterId = newInstanceId + "-c1" ;
416
422
@@ -448,8 +454,16 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
448
454
assertThat (retrievedCluster .getAutoscalingCpuPercentageTarget ()).isEqualTo (20 );
449
455
assertThat (retrievedCluster .getStorageUtilizationGibPerNode ()).isEqualTo (2561 );
450
456
457
+ // The test might trigger cluster autoscaling, which races against the update cluster calls in
458
+ // this test and causing the update cluster calls to fail with "FAILED_PRECONDITION: Cannot
459
+ // update cluster that is currently being modified" error.
460
+ // In order to avoid test flakiness due to this race condition, we wrap all the update cluster
461
+ // call with a retry loop.
462
+ // TODO: After we have a proper fix for the issue, remove the
463
+ // updateClusterAutoScalingConfigWithRetry function and all the calls to it.
464
+
451
465
Cluster updatedCluster =
452
- client . updateClusterAutoscalingConfig (
466
+ updateClusterAutoScalingConfigWithRetry (
453
467
ClusterAutoscalingConfig .of (newInstanceId , clusterId ).setMaxNodes (3 ));
454
468
assertThat (updatedCluster .getAutoscalingMinServeNodes ()).isEqualTo (1 );
455
469
assertThat (updatedCluster .getAutoscalingMaxServeNodes ()).isEqualTo (3 );
@@ -463,7 +477,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
463
477
assertThat (retrievedUpdatedCluster .getStorageUtilizationGibPerNode ()).isEqualTo (2561 );
464
478
465
479
updatedCluster =
466
- client . updateClusterAutoscalingConfig (
480
+ updateClusterAutoScalingConfigWithRetry (
467
481
ClusterAutoscalingConfig .of (newInstanceId , clusterId ).setMinNodes (2 ));
468
482
assertThat (updatedCluster .getAutoscalingMinServeNodes ()).isEqualTo (2 );
469
483
assertThat (updatedCluster .getAutoscalingMaxServeNodes ()).isEqualTo (3 );
@@ -477,7 +491,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
477
491
assertThat (retrievedUpdatedCluster .getStorageUtilizationGibPerNode ()).isEqualTo (2561 );
478
492
479
493
updatedCluster =
480
- client . updateClusterAutoscalingConfig (
494
+ updateClusterAutoScalingConfigWithRetry (
481
495
ClusterAutoscalingConfig .of (newInstanceId , clusterId )
482
496
.setCpuUtilizationTargetPercent (40 ));
483
497
assertThat (updatedCluster .getAutoscalingMinServeNodes ()).isEqualTo (2 );
@@ -492,7 +506,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
492
506
assertThat (retrievedUpdatedCluster .getStorageUtilizationGibPerNode ()).isEqualTo (2561 );
493
507
494
508
updatedCluster =
495
- client . updateClusterAutoscalingConfig (
509
+ updateClusterAutoScalingConfigWithRetry (
496
510
ClusterAutoscalingConfig .of (newInstanceId , clusterId )
497
511
.setCpuUtilizationTargetPercent (45 )
498
512
.setMaxNodes (5 ));
@@ -508,7 +522,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
508
522
assertThat (retrievedUpdatedCluster .getStorageUtilizationGibPerNode ()).isEqualTo (2561 );
509
523
510
524
updatedCluster =
511
- client . updateClusterAutoscalingConfig (
525
+ updateClusterAutoScalingConfigWithRetry (
512
526
ClusterAutoscalingConfig .of (newInstanceId , clusterId )
513
527
.setStorageUtilizationGibPerNode (2777 ));
514
528
assertThat (updatedCluster .getAutoscalingMinServeNodes ()).isEqualTo (2 );
@@ -523,7 +537,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
523
537
assertThat (retrievedUpdatedCluster .getStorageUtilizationGibPerNode ()).isEqualTo (2777 );
524
538
525
539
updatedCluster =
526
- client . updateClusterAutoscalingConfig (
540
+ updateClusterAutoScalingConfigWithRetry (
527
541
ClusterAutoscalingConfig .of (newInstanceId , clusterId )
528
542
// testing default case
529
543
.setStorageUtilizationGibPerNode (0 ));
@@ -614,4 +628,20 @@ private void basicClusterOperationTestHelper(String targetInstanceId, String tar
614
628
assertThat (updatedCluster .getAutoscalingCpuPercentageTarget ()).isEqualTo (0 );
615
629
assertThat (updatedCluster .getStorageUtilizationGibPerNode ()).isEqualTo (0 );
616
630
}
631
+
632
+ private Cluster updateClusterAutoScalingConfigWithRetry (
633
+ ClusterAutoscalingConfig clusterAutoscalingConfig ) throws Exception {
634
+ int retryCount = 0 ;
635
+ int maxRetries = 10 ;
636
+ while (true ) {
637
+ try {
638
+ return client .updateClusterAutoscalingConfig (clusterAutoscalingConfig );
639
+ } catch (FailedPreconditionException e ) {
640
+ if (++retryCount == maxRetries ) throw e ;
641
+ logger .log (
642
+ Level .INFO , "Retrying updateClusterAutoscalingConfig, retryCount: " + retryCount );
643
+ Thread .sleep (Duration .ofMinutes (1 ).toMillis ());
644
+ }
645
+ }
646
+ }
617
647
}
0 commit comments