Skip to content

Commit e8f25f7

Browse files
CI: Fix NPROC handling (#1014)
* Pass NPROC var to Docker commands Signed-off-by: Alexey Rivkin <[email protected]> * Fix POSIX shell compatibility in common.sh Replace bash [[ ]] with POSIX [ ] in container detection. Scripts using #!/bin/sh failed on [[ syntax, causing NPROC to default to 256 CPUs instead of memory-based limit, leading to OOM. Signed-off-by: Alexey Rivkin <[email protected]> --------- Signed-off-by: Alexey Rivkin <[email protected]>
1 parent 6cf6818 commit e8f25f7

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

.ci/jenkins/lib/test-matrix.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ steps:
9898
parallel: false
9999
run: |
100100
set -ex
101-
docker exec -w ${CONTAINER_WORKSPACE} "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}" /bin/bash -c "UCX_VERSION=${ucx_version} .gitlab/build.sh ${INSTALL_DIR}"
101+
docker exec -e NPROC -w ${CONTAINER_WORKSPACE} "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}" /bin/bash -c "UCX_VERSION=${ucx_version} .gitlab/build.sh ${INSTALL_DIR}"
102102
103103
onfail: |
104104
docker rm -f "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}"
@@ -107,31 +107,31 @@ steps:
107107
- name: Test CPP
108108
parallel: false
109109
run: |
110-
timeout ${TEST_TIMEOUT}m docker exec -w ${CONTAINER_WORKSPACE} "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}" /bin/bash -c ".gitlab/test_cpp.sh ${INSTALL_DIR}"
110+
timeout ${TEST_TIMEOUT}m docker exec -e NPROC -w ${CONTAINER_WORKSPACE} "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}" /bin/bash -c ".gitlab/test_cpp.sh ${INSTALL_DIR}"
111111
onfail: |
112112
docker rm -f "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}"
113113
docker image rm -f "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}"
114114
115115
- name: Test Python
116116
parallel: false
117117
run: |
118-
timeout ${TEST_TIMEOUT}m docker exec -w ${CONTAINER_WORKSPACE} "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}" /bin/bash -c ".gitlab/test_python.sh ${INSTALL_DIR}"
118+
timeout ${TEST_TIMEOUT}m docker exec -e NPROC -w ${CONTAINER_WORKSPACE} "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}" /bin/bash -c ".gitlab/test_python.sh ${INSTALL_DIR}"
119119
onfail: |
120120
docker rm -f "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}"
121121
docker image rm -f "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}"
122122
123123
- name: Test Nixlbench
124124
parallel: false
125125
run: |
126-
timeout ${TEST_TIMEOUT}m docker exec -w ${CONTAINER_WORKSPACE} "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}" /bin/bash -c ".gitlab/test_nixlbench.sh ${INSTALL_DIR}"
126+
timeout ${TEST_TIMEOUT}m docker exec -e NPROC -w ${CONTAINER_WORKSPACE} "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}" /bin/bash -c ".gitlab/test_nixlbench.sh ${INSTALL_DIR}"
127127
onfail: |
128128
docker rm -f "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}"
129129
docker image rm -f "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}"
130130
131131
- name: Test Rust
132132
parallel: false
133133
run: |
134-
timeout ${TEST_TIMEOUT}m docker exec -w ${CONTAINER_WORKSPACE} "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}" /bin/bash -c ".gitlab/test_rust.sh ${INSTALL_DIR}"
134+
timeout ${TEST_TIMEOUT}m docker exec -e NPROC -w ${CONTAINER_WORKSPACE} "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}" /bin/bash -c ".gitlab/test_rust.sh ${INSTALL_DIR}"
135135
always: |
136136
docker rm -f "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}"
137137
docker image rm -f "${JOB_BASE_NAME}-${BUILD_ID}-${axis_index}"

.ci/scripts/common.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ export TEST_LIBFABRIC=${TEST_LIBFABRIC:-false}
9898
# Set default parallelism for make/ninja (can be overridden by NPROC env var)
9999
if [ -z "$NPROC" ]; then
100100
# In containers, calculate based on memory limits to avoid OOM
101-
if [[ -f /.dockerenv || -f /run/.containerenv || -n "${KUBERNETES_SERVICE_HOST}" ]]; then
101+
if [ -f /.dockerenv ] || [ -f /run/.containerenv ] || [ -n "${KUBERNETES_SERVICE_HOST}" ]; then
102102
if [ -f /sys/fs/cgroup/memory/memory.limit_in_bytes ]; then
103103
limit=$(cat /sys/fs/cgroup/memory/memory.limit_in_bytes)
104104
elif [ -f /sys/fs/cgroup/memory.max ]; then

0 commit comments

Comments
 (0)