Skip to content

Commit e85af07

Browse files
authored
[ML] Split ES integration tests into parallel steps (#2990)
Split each per-architecture Elasticsearch integration test step into two independent steps that run in parallel: - Multi-Node Tests (javaRestTest) — ~27 min on x86_64, ~37 min on aarch64 - YAML REST Tests (yamlRestTest) — ~12 min on x86_64, ~15 min on aarch64 Previously these ran sequentially in a single step, making the total wall-clock time ~40 min (x86_64) / ~53 min (aarch64). Running them in parallel reduces the critical path to the duration of the slower suite, saving ~11-14 minutes per PR build. The split also improves failure attribution (immediately visible which suite failed) and enables selective retry of just the failed suite. A new ES_TEST_SUITE environment variable controls which Gradle command to run. When unset, both suites run sequentially for backward compatibility with local developer use. Made-with: Cursor
1 parent b29f459 commit e85af07

File tree

3 files changed

+68
-11
lines changed

3 files changed

+68
-11
lines changed

.buildkite/pipelines/run_es_tests_aarch64.yml.sh

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010

1111
cat <<EOL
1212
steps:
13-
- label: "Java :java: Integration Tests for aarch64 :hammer:"
14-
key: "java_integration_tests_aarch64"
13+
- label: "Java :java: Multi-Node Tests for aarch64 :hammer:"
14+
key: "java_multinode_tests_aarch64"
1515
command:
1616
- 'sudo rpm --import https://yum.corretto.aws/corretto.key'
1717
- 'sudo curl -L -o /etc/yum.repos.d/corretto.repo https://yum.corretto.aws/corretto.repo'
@@ -28,7 +28,30 @@ steps:
2828
env:
2929
IVY_REPO: "../ivy"
3030
GRADLE_JVM_OPTS: "-Dorg.gradle.jvmargs=-Xmx16g"
31+
ES_TEST_SUITE: "javaRestTest"
3132
notify:
3233
- github_commit_status:
33-
context: "Java Integration Tests for aarch64"
34+
context: "Java Multi-Node Tests for aarch64"
35+
- label: "Java :java: YAML REST Tests for aarch64 :hammer:"
36+
key: "java_yaml_rest_tests_aarch64"
37+
command:
38+
- 'sudo rpm --import https://yum.corretto.aws/corretto.key'
39+
- 'sudo curl -L -o /etc/yum.repos.d/corretto.repo https://yum.corretto.aws/corretto.repo'
40+
- 'sudo dnf install -y java-21-amazon-corretto-devel'
41+
- 'buildkite-agent artifact download "build/*" . --step build_test_linux-aarch64-RelWithDebInfo'
42+
- '.buildkite/scripts/steps/run_es_tests.sh || (cd ../elasticsearch && find x-pack -name logs | xargs tar cvzf logs.tgz && buildkite-agent artifact upload logs.tgz && false)'
43+
depends_on: "build_test_linux-aarch64-RelWithDebInfo"
44+
agents:
45+
provider: aws
46+
instanceType: m6g.2xlarge
47+
imagePrefix: core-almalinux-8-aarch64
48+
diskSizeGb: 100
49+
diskName: '/dev/xvda'
50+
env:
51+
IVY_REPO: "../ivy"
52+
GRADLE_JVM_OPTS: "-Dorg.gradle.jvmargs=-Xmx16g"
53+
ES_TEST_SUITE: "yamlRestTest"
54+
notify:
55+
- github_commit_status:
56+
context: "Java YAML REST Tests for aarch64"
3457
EOL

.buildkite/pipelines/run_es_tests_x86_64.yml.sh

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010

1111
cat <<EOL
1212
steps:
13-
- label: "Java :java: Integration Tests for x86_64 :hammer:"
14-
key: "java_integration_tests_x86_64"
13+
- label: "Java :java: Multi-Node Tests for x86_64 :hammer:"
14+
key: "java_multinode_tests_x86_64"
1515
command:
1616
- 'sudo rpm --import https://yum.corretto.aws/corretto.key'
1717
- 'sudo curl -L -o /etc/yum.repos.d/corretto.repo https://yum.corretto.aws/corretto.repo'
@@ -28,7 +28,30 @@ steps:
2828
env:
2929
IVY_REPO: "../ivy"
3030
GRADLE_JVM_OPTS: "-Dorg.gradle.jvmargs=-Xmx16g"
31+
ES_TEST_SUITE: "javaRestTest"
3132
notify:
3233
- github_commit_status:
33-
context: "Java Integration Tests for x86_64"
34+
context: "Java Multi-Node Tests for x86_64"
35+
- label: "Java :java: YAML REST Tests for x86_64 :hammer:"
36+
key: "java_yaml_rest_tests_x86_64"
37+
command:
38+
- 'sudo rpm --import https://yum.corretto.aws/corretto.key'
39+
- 'sudo curl -L -o /etc/yum.repos.d/corretto.repo https://yum.corretto.aws/corretto.repo'
40+
- 'sudo dnf install -y java-21-amazon-corretto-devel'
41+
- 'buildkite-agent artifact download "build/*" . --step build_test_linux-x86_64-RelWithDebInfo'
42+
- '.buildkite/scripts/steps/run_es_tests.sh || (cd ../elasticsearch && find x-pack -name logs | xargs tar cvzf logs.tgz && buildkite-agent artifact upload logs.tgz && false)'
43+
depends_on: "build_test_linux-x86_64-RelWithDebInfo"
44+
agents:
45+
provider: aws
46+
instanceType: m6i.2xlarge
47+
imagePrefix: core-amazonlinux-2023
48+
diskSizeGb: 100
49+
diskName: '/dev/xvda'
50+
env:
51+
IVY_REPO: "../ivy"
52+
GRADLE_JVM_OPTS: "-Dorg.gradle.jvmargs=-Xmx16g"
53+
ES_TEST_SUITE: "yamlRestTest"
54+
notify:
55+
- github_commit_status:
56+
context: "Java YAML REST Tests for x86_64"
3457
EOL

dev-tools/run_es_tests.sh

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,6 @@ GCS_CACHE_PATH=""
131131
if [ -n "${GRADLE_BUILD_CACHE_GCS_BUCKET:-}" ] && [ -n "${GOOGLE_APPLICATION_CREDENTIALS:-}" ]; then
132132
GCS_CACHE_PATH="gs://${GRADLE_BUILD_CACHE_GCS_BUCKET}/${CACHE_KEY}.tar.gz"
133133
if command -v gsutil &>/dev/null; then
134-
# The gcloud SDK gsutil needs explicit service account activation;
135-
# GOOGLE_APPLICATION_CREDENTIALS alone is not sufficient.
136134
if command -v gcloud &>/dev/null; then
137135
gcloud auth activate-service-account --key-file="$GOOGLE_APPLICATION_CREDENTIALS" 2>/dev/null || true
138136
fi
@@ -152,8 +150,22 @@ if [ -n "${GRADLE_BUILD_CACHE_GCS_BUCKET:-}" ] && [ -n "${GOOGLE_APPLICATION_CRE
152150
fi
153151
fi
154152

155-
./gradlew $GRADLE_JVM_OPTS $CACHE_ARGS -Dbuild.ml_cpp.repo="$IVY_REPO_URL" :x-pack:plugin:ml:qa:native-multi-node-tests:javaRestTest $EXTRA_TEST_OPTS
156-
./gradlew $GRADLE_JVM_OPTS $CACHE_ARGS -Dbuild.ml_cpp.repo="$IVY_REPO_URL" :x-pack:plugin:yamlRestTest --tests "org.elasticsearch.xpack.test.rest.XPackRestIT.test {p0=ml/*}" $EXTRA_TEST_OPTS
153+
# ES_TEST_SUITE selects which test suite to run:
154+
# javaRestTest - native multi-node integration tests only
155+
# yamlRestTest - ML YAML REST tests only
156+
# (unset/empty) - both suites sequentially (backward compatible)
157+
case "${ES_TEST_SUITE:-}" in
158+
javaRestTest)
159+
./gradlew $GRADLE_JVM_OPTS $CACHE_ARGS -Dbuild.ml_cpp.repo="$IVY_REPO_URL" :x-pack:plugin:ml:qa:native-multi-node-tests:javaRestTest $EXTRA_TEST_OPTS
160+
;;
161+
yamlRestTest)
162+
./gradlew $GRADLE_JVM_OPTS $CACHE_ARGS -Dbuild.ml_cpp.repo="$IVY_REPO_URL" :x-pack:plugin:yamlRestTest --tests "org.elasticsearch.xpack.test.rest.XPackRestIT.test {p0=ml/*}" $EXTRA_TEST_OPTS
163+
;;
164+
*)
165+
./gradlew $GRADLE_JVM_OPTS $CACHE_ARGS -Dbuild.ml_cpp.repo="$IVY_REPO_URL" :x-pack:plugin:ml:qa:native-multi-node-tests:javaRestTest $EXTRA_TEST_OPTS
166+
./gradlew $GRADLE_JVM_OPTS $CACHE_ARGS -Dbuild.ml_cpp.repo="$IVY_REPO_URL" :x-pack:plugin:yamlRestTest --tests "org.elasticsearch.xpack.test.rest.XPackRestIT.test {p0=ml/*}" $EXTRA_TEST_OPTS
167+
;;
168+
esac
157169

158170
# Upload Gradle build cache to GCS for future builds.
159171
if [ -n "$GCS_CACHE_PATH" ] && [ -d "$GRADLE_CACHE_DIR" ] && command -v gsutil &>/dev/null; then
@@ -169,4 +181,3 @@ if [ -n "$GCS_CACHE_PATH" ] && [ -d "$GRADLE_CACHE_DIR" ] && command -v gsutil &
169181
echo "Skipping cache upload (size=${CACHE_SIZE:-0}M, expected 1-4095M)"
170182
fi
171183
fi
172-

0 commit comments

Comments
 (0)