Skip to content

Commit e0174d0

Browse files
authored
Retry in w.clusters().ensureClusterIsRunning(id) when cluster is simultaneously started by two different processes (#134)
This PR adds a retry for timing edge cases like `INVALID_STATE: Cluster XXX is in unexpected state Pending.` Other PRs: - databricks/databricks-sdk-py#283 - databricks/databricks-sdk-go#580
1 parent a2479d7 commit e0174d0

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

databricks-sdk-java/src/main/java/com/databricks/sdk/mixin/ClustersExt.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
public class ClustersExt extends ClustersAPI {
2020
private static final Logger LOG = LoggerFactory.getLogger(ClustersExt.class);
21+
private static final String INVALID_STATE = "INVALID_STATE";
2122

2223
public ClustersExt(ApiClient apiClient) {
2324
super(apiClient);
@@ -217,9 +218,11 @@ public void ensureClusterIsRunning(String clusterId) throws TimeoutException {
217218
// running, reconfiguring
218219
LOG.debug("Cluster is {}: {}", info.getState(), info.getStateMessage());
219220
return;
220-
} catch (IllegalStateException e) {
221-
LOG.debug("Cluster reached illegal state. Retrying startup", e);
222221
} catch (DatabricksError e) {
222+
if (e.getErrorCode().equals(INVALID_STATE)) {
223+
LOG.debug("Cluster was started by other process: {} Retrying.", e.getMessage());
224+
continue;
225+
}
223226
LOG.debug("Received {} error code", e.getErrorCode());
224227
throw e;
225228
}

0 commit comments

Comments
 (0)