Skip to content

Commit 3156889

Browse files
authored
test: fix flaky BigtableInstanceAdminClientIT.createClusterWithAutoscalingAndPartialUpdateTest (#2432)
Attempt to fix a flaky java-bigtable Kokoro test with the approach proposed in b/369770575. Change-Id: Ia8bc9aa98922a226b84c19400dac91db05b0c6c8 Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/java-bigtable/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) - [ ] Rollback plan is reviewed and LGTMed - [ ] All new data plane features have a completed end to end testing plan Fixes #<issue_number_goes_here> ☕️ If you write sample code, please follow the [samples format]( https://togithub.com/GoogleCloudPlatform/java-docs-samples/blob/main/SAMPLE_FORMAT.md).
1 parent 687b6df commit 3156889

File tree

1 file changed

+37
-7
lines changed

1 file changed

+37
-7
lines changed

google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/admin/v2/it/BigtableInstanceAdminClientIT.java

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import static com.google.common.truth.Truth.assertWithMessage;
2020
import static com.google.common.truth.TruthJUnit.assume;
2121

22+
import com.google.api.gax.rpc.FailedPreconditionException;
2223
import com.google.cloud.Policy;
2324
import com.google.cloud.bigtable.admin.v2.BigtableInstanceAdminClient;
2425
import com.google.cloud.bigtable.admin.v2.models.AppProfile;
@@ -36,7 +37,10 @@
3637
import com.google.cloud.bigtable.test_helpers.env.EmulatorEnv;
3738
import com.google.cloud.bigtable.test_helpers.env.PrefixGenerator;
3839
import com.google.cloud.bigtable.test_helpers.env.TestEnvRule;
40+
import java.time.Duration;
3941
import java.util.List;
42+
import java.util.logging.Level;
43+
import java.util.logging.Logger;
4044
import org.junit.Before;
4145
import org.junit.BeforeClass;
4246
import org.junit.ClassRule;
@@ -49,6 +53,8 @@
4953
public class BigtableInstanceAdminClientIT {
5054

5155
@ClassRule public static TestEnvRule testEnvRule = new TestEnvRule();
56+
private static final Logger logger =
57+
Logger.getLogger(BigtableInstanceAdminClientIT.class.getName());
5258
@Rule public final PrefixGenerator prefixGenerator = new PrefixGenerator();
5359

5460
private String instanceId = testEnvRule.env().getInstanceId();
@@ -410,7 +416,7 @@ public void createClusterWithAutoscalingTest() {
410416
}
411417

412418
@Test
413-
public void createClusterWithAutoscalingAndPartialUpdateTest() {
419+
public void createClusterWithAutoscalingAndPartialUpdateTest() throws Exception {
414420
String newInstanceId = prefixGenerator.newPrefix();
415421
String newClusterId = newInstanceId + "-c1";
416422

@@ -448,8 +454,16 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
448454
assertThat(retrievedCluster.getAutoscalingCpuPercentageTarget()).isEqualTo(20);
449455
assertThat(retrievedCluster.getStorageUtilizationGibPerNode()).isEqualTo(2561);
450456

457+
// The test might trigger cluster autoscaling, which races against the update cluster calls in
458+
// this test and causing the update cluster calls to fail with "FAILED_PRECONDITION: Cannot
459+
// update cluster that is currently being modified" error.
460+
// In order to avoid test flakiness due to this race condition, we wrap all the update cluster
461+
// call with a retry loop.
462+
// TODO: After we have a proper fix for the issue, remove the
463+
// updateClusterAutoScalingConfigWithRetry function and all the calls to it.
464+
451465
Cluster updatedCluster =
452-
client.updateClusterAutoscalingConfig(
466+
updateClusterAutoScalingConfigWithRetry(
453467
ClusterAutoscalingConfig.of(newInstanceId, clusterId).setMaxNodes(3));
454468
assertThat(updatedCluster.getAutoscalingMinServeNodes()).isEqualTo(1);
455469
assertThat(updatedCluster.getAutoscalingMaxServeNodes()).isEqualTo(3);
@@ -463,7 +477,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
463477
assertThat(retrievedUpdatedCluster.getStorageUtilizationGibPerNode()).isEqualTo(2561);
464478

465479
updatedCluster =
466-
client.updateClusterAutoscalingConfig(
480+
updateClusterAutoScalingConfigWithRetry(
467481
ClusterAutoscalingConfig.of(newInstanceId, clusterId).setMinNodes(2));
468482
assertThat(updatedCluster.getAutoscalingMinServeNodes()).isEqualTo(2);
469483
assertThat(updatedCluster.getAutoscalingMaxServeNodes()).isEqualTo(3);
@@ -477,7 +491,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
477491
assertThat(retrievedUpdatedCluster.getStorageUtilizationGibPerNode()).isEqualTo(2561);
478492

479493
updatedCluster =
480-
client.updateClusterAutoscalingConfig(
494+
updateClusterAutoScalingConfigWithRetry(
481495
ClusterAutoscalingConfig.of(newInstanceId, clusterId)
482496
.setCpuUtilizationTargetPercent(40));
483497
assertThat(updatedCluster.getAutoscalingMinServeNodes()).isEqualTo(2);
@@ -492,7 +506,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
492506
assertThat(retrievedUpdatedCluster.getStorageUtilizationGibPerNode()).isEqualTo(2561);
493507

494508
updatedCluster =
495-
client.updateClusterAutoscalingConfig(
509+
updateClusterAutoScalingConfigWithRetry(
496510
ClusterAutoscalingConfig.of(newInstanceId, clusterId)
497511
.setCpuUtilizationTargetPercent(45)
498512
.setMaxNodes(5));
@@ -508,7 +522,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
508522
assertThat(retrievedUpdatedCluster.getStorageUtilizationGibPerNode()).isEqualTo(2561);
509523

510524
updatedCluster =
511-
client.updateClusterAutoscalingConfig(
525+
updateClusterAutoScalingConfigWithRetry(
512526
ClusterAutoscalingConfig.of(newInstanceId, clusterId)
513527
.setStorageUtilizationGibPerNode(2777));
514528
assertThat(updatedCluster.getAutoscalingMinServeNodes()).isEqualTo(2);
@@ -523,7 +537,7 @@ public void createClusterWithAutoscalingAndPartialUpdateTest() {
523537
assertThat(retrievedUpdatedCluster.getStorageUtilizationGibPerNode()).isEqualTo(2777);
524538

525539
updatedCluster =
526-
client.updateClusterAutoscalingConfig(
540+
updateClusterAutoScalingConfigWithRetry(
527541
ClusterAutoscalingConfig.of(newInstanceId, clusterId)
528542
// testing default case
529543
.setStorageUtilizationGibPerNode(0));
@@ -614,4 +628,20 @@ private void basicClusterOperationTestHelper(String targetInstanceId, String tar
614628
assertThat(updatedCluster.getAutoscalingCpuPercentageTarget()).isEqualTo(0);
615629
assertThat(updatedCluster.getStorageUtilizationGibPerNode()).isEqualTo(0);
616630
}
631+
632+
private Cluster updateClusterAutoScalingConfigWithRetry(
633+
ClusterAutoscalingConfig clusterAutoscalingConfig) throws Exception {
634+
int retryCount = 0;
635+
int maxRetries = 10;
636+
while (true) {
637+
try {
638+
return client.updateClusterAutoscalingConfig(clusterAutoscalingConfig);
639+
} catch (FailedPreconditionException e) {
640+
if (++retryCount == maxRetries) throw e;
641+
logger.log(
642+
Level.INFO, "Retrying updateClusterAutoscalingConfig, retryCount: " + retryCount);
643+
Thread.sleep(Duration.ofMinutes(1).toMillis());
644+
}
645+
}
646+
}
617647
}

0 commit comments

Comments
 (0)