diff --git a/rust/operator-binary/src/connect/controller.rs b/rust/operator-binary/src/connect/controller.rs index 345bac6d..5544fb39 100644 --- a/rust/operator-binary/src/connect/controller.rs +++ b/rust/operator-binary/src/connect/controller.rs @@ -315,6 +315,12 @@ pub async fn reconcile( let cluster_operation_cond_builder = ClusterOperationsConditionBuilder::new(&scs.spec.cluster_operation); + // TODO: This StatefulSet only contains the driver. We should probably also + // consider the state of the executors to determine if the + // SparkConnectServer is ready. This depends on the availability and + // resilience properties of Spark and could e.g. be "driver and more than + // 75% of the executors ready". Special care needs to be taken about + // auto-scaling executors in this case (if/once supported). let status = SparkConnectServerStatus { conditions: compute_conditions(scs, &[&ss_cond_builder, &cluster_operation_cond_builder]), }; diff --git a/tests/templates/kuttl/spark-connect/10-assert.yaml b/tests/templates/kuttl/spark-connect/10-assert.yaml index 15886034..6e2bd2a2 100644 --- a/tests/templates/kuttl/spark-connect/10-assert.yaml +++ b/tests/templates/kuttl/spark-connect/10-assert.yaml @@ -1,7 +1,7 @@ --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert -timeout: 300 +timeout: 600 --- apiVersion: apps/v1 kind: StatefulSet @@ -23,20 +23,3 @@ metadata: name: spark-connect-server-headless spec: type: ClusterIP ---- -apiVersion: kuttl.dev/v1beta1 -kind: TestAssert -timeout: 300 -commands: - # Test that spark connect executors are running. - # Sleep to prevent the following spark connect app from failing - # while the spark-connect server is busy setting up the executors. - - script: | - # wait for the spark-connect CR to become available - kubectl wait --for=condition=Available sparkconnectservers/spark-connect --namespace "$NAMESPACE" --timeout=3m - - EXECUTOR_COUNT=$(kubectl get pods -n "$NAMESPACE" --selector 'spark-app-name=spark-connect-server' --field-selector='status.phase=Running' -o NAME|wc -l) - test 1 -eq "$EXECUTOR_COUNT" - - # wait a little longer to increase the chance apps being able to connect - sleep 5 diff --git a/tests/templates/kuttl/spark-connect/11-assert.yaml b/tests/templates/kuttl/spark-connect/11-assert.yaml new file mode 100644 index 00000000..2a0f4043 --- /dev/null +++ b/tests/templates/kuttl/spark-connect/11-assert.yaml @@ -0,0 +1,18 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 600 +commands: + # Test that spark connect executors are running. + # Sleep to prevent the following spark connect app from failing + # while the spark-connect server is busy setting up the executors. + - script: | + # wait for the spark-connect CR to become available + kubectl wait --for=condition=Available sparkconnectservers/spark-connect --namespace "$NAMESPACE" --timeout=3m + + # FIXME: As the status currently does not respect the executors state, we wait for them to be ready ourselves + # (see TODO comment in code): + kubectl wait --for=condition=Ready pod -l spark-app-name=spark-connect-server -n "$NAMESPACE" --timeout=10m + + # wait a little longer to increase the chance apps being able to connect + sleep 10 diff --git a/tests/templates/kuttl/spark-connect/20-assert.yaml b/tests/templates/kuttl/spark-connect/20-assert.yaml index 930e3383..c37bc28e 100644 --- a/tests/templates/kuttl/spark-connect/20-assert.yaml +++ b/tests/templates/kuttl/spark-connect/20-assert.yaml @@ -1,9 +1,7 @@ --- apiVersion: kuttl.dev/v1beta1 kind: TestAssert -metadata: - name: simple-connect-app -timeout: 300 +timeout: 600 --- apiVersion: batch/v1 kind: Job diff --git a/tests/templates/kuttl/spark-connect/20-run-connect-client.yaml.j2 b/tests/templates/kuttl/spark-connect/20-run-connect-client.yaml.j2 index e6950b5a..85aff8e7 100644 --- a/tests/templates/kuttl/spark-connect/20-run-connect-client.yaml.j2 +++ b/tests/templates/kuttl/spark-connect/20-run-connect-client.yaml.j2 @@ -54,7 +54,7 @@ spec: template: spec: restartPolicy: OnFailure - activeDeadlineSeconds: 100 + activeDeadlineSeconds: 600 containers: - name: simple-connect-app {% if test_scenario['values']['spark-connect-client'].find(",") > 0 %}