Skip to content

Commit 288d621

Browse files
committed
Remove environment variable setting in start up script
1 parent e9cfd24 commit 288d621

File tree

3 files changed

+5
-7
lines changed

3 files changed

+5
-7
lines changed

.github/actions/gke-xpk/action.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ runs:
129129
- name: Set workload commands
130130
shell: bash -x -e -u {0}
131131
run: |
132+
133+
# install dependencies to enable export artifacts from container to gcs bucket
132134
PRELUDE="
133135
apt install -y ripgrep > /dev/null;
134136
curl -LO https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-linux-x86_64.tar.gz;
@@ -156,6 +158,7 @@ runs:
156158
env;
157159
"
158160
161+
# gsutil command to export logs from container's /opt/output to bucket
159162
POSTLUDE="
160163
./google-cloud-sdk/bin/gsutil cp -r ${{ inputs.CONTAINER_OUTPUT_PATH }}/ ${GCS_ARTIFACT_PATH}/node-0\$NODE_RANK;
161164
${{ inputs.EXIT_COMMAND }}

.github/workflows/jax-vllm-offloading-gke-grpo.yml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ jobs:
6060
VLLM_LOAD_FORMAT=dummy
6161
NCCL_NET_PLUGIN=/opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so
6262
NCCL_TUNER_PLUGIN=none
63-
MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
63+
MODEL_NAME=${{ matrix.model }}
6464
NCCL_CUMEM_ENABLE=0
6565
NCCL_BUFFSIZE=16777216
6666
XLA_FLAGS=--xla_gpu_enable_latency_hiding_scheduler=true --xla_gpu_enable_command_buffer=FUSION,CUBLAS,CUDNN,CUSTOM_CALL --xla_gpu_collective_permute_combine_threshold_bytes=8589934592 --xla_gpu_reduce_scatter_combine_threshold_bytes=8589934592 --xla_gpu_all_gather_combine_threshold_bytes=8589934592 --xla_gpu_all_reduce_combine_threshold_bytes=8589934592
@@ -74,9 +74,6 @@ jobs:
7474
COMMAND: |
7575
set -x;
7676
export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:/usr/local/cuda-12.9/compat/lib.real:/usr/local/nvidia/lib64;
77-
export MODEL_NAME=${{ matrix.model }}
78-
export JAX_COORDINATOR_ADDRESS=${JOBSET_NAME}-${REPLICATED_JOB_NAME}-0-0.${JOBSET_NAME}:${JAX_COORDINATOR_PORT}
79-
export GATEWAY_URL=${JOBSET_NAME}:${GATEWAY_PORT}
8077
env;
8178
8279
pip install jax[k8s];

.github/workflows/jax-vllm-offloading-gke-transfer.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ jobs:
5858
VLLM_DISTRIBUTED_BACKEND=mp
5959
VLLM_ATTENTION_BACKEND=TRITON_ATTN
6060
VLLM_LOAD_FORMAT=dummy
61+
MODEL_NAME=${{ matrix.model }}
6162
NCCL_NET_PLUGIN=/opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so
6263
NCCL_TUNER_PLUGIN=none
6364
NCCL_CUMEM_ENABLE=0
@@ -72,9 +73,6 @@ jobs:
7273
COMMAND: |
7374
set -x;
7475
export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:/usr/local/cuda-12.9/compat/lib.real:/usr/local/nvidia/lib64;
75-
export MODEL_NAME=${{ matrix.model }}
76-
export JAX_COORDINATOR_ADDRESS=${JOBSET_NAME}-${REPLICATED_JOB_NAME}-0-0.${JOBSET_NAME}:${JAX_COORDINATOR_PORT}
77-
export GATEWAY_URL=${JOBSET_NAME}:${GATEWAY_PORT}
7876
env;
7977
8078
pip install jax[k8s];

0 commit comments

Comments
 (0)