Skip to content

Commit be0a53a

Browse files
committed
Add changes to use datasets from s3
1 parent 22c3315 commit be0a53a

File tree

6 files changed

+274
-8
lines changed

6 files changed

+274
-8
lines changed

.github/workflows/pr.yaml

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
33

44
name: pr
@@ -193,6 +193,10 @@ jobs:
193193
build_type: pull-request
194194
script: ci/test_cpp.sh
195195
matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }}
196+
env_variables: |
197+
CUOPT_DATASET_S3_URI=${{ vars.CUOPT_DATASET_S3_URI }}
198+
CUOPT_AWS_ACCESS_KEY_ID=${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
199+
CUOPT_AWS_SECRET_ACCESS_KEY=${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
196200
conda-python-build:
197201
needs: [conda-cpp-build, compute-matrix-filters]
198202
secrets: inherit
@@ -211,6 +215,10 @@ jobs:
211215
build_type: pull-request
212216
script: ci/test_python.sh
213217
matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }}
218+
env_variables: |
219+
CUOPT_DATASET_S3_URI=${{ vars.CUOPT_DATASET_S3_URI }}
220+
CUOPT_AWS_ACCESS_KEY_ID=${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
221+
CUOPT_AWS_SECRET_ACCESS_KEY=${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
214222
docs-build:
215223
needs: conda-python-build
216224
secrets: inherit
@@ -265,6 +273,10 @@ jobs:
265273
build_type: pull-request
266274
script: ci/test_wheel_cuopt.sh
267275
matrix_filter: ${{ needs.compute-matrix-filters.outputs.wheel_lean_filter }}
276+
env_variables: |
277+
CUOPT_DATASET_S3_URI=${{ vars.CUOPT_DATASET_S3_URI }}
278+
CUOPT_AWS_ACCESS_KEY_ID=${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
279+
CUOPT_AWS_SECRET_ACCESS_KEY=${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
268280
wheel-build-cuopt-server:
269281
needs: [checks, compute-matrix-filters]
270282
secrets: inherit
@@ -299,6 +311,10 @@ jobs:
299311
build_type: pull-request
300312
script: ci/test_wheel_cuopt_server.sh
301313
matrix_filter: ${{ needs.compute-matrix-filters.outputs.wheel_lean_filter }}
314+
env_variables: |
315+
CUOPT_DATASET_S3_URI=${{ vars.CUOPT_DATASET_S3_URI }}
316+
CUOPT_AWS_ACCESS_KEY_ID=${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
317+
CUOPT_AWS_SECRET_ACCESS_KEY=${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
302318
test-self-hosted-server:
303319
needs: [wheel-build-cuopt, wheel-build-cuopt-server, changed-files]
304320
secrets: inherit

.github/workflows/test.yaml

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
33

44
name: test
@@ -35,6 +35,10 @@ jobs:
3535
date: ${{ inputs.date }}
3636
sha: ${{ inputs.sha }}
3737
script: ci/test_cpp.sh
38+
env_variables: |
39+
CUOPT_DATASET_S3_URI=${{ vars.CUOPT_DATASET_S3_URI }}
40+
CUOPT_AWS_ACCESS_KEY_ID=${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
41+
CUOPT_AWS_SECRET_ACCESS_KEY=${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
3842
conda-python-tests:
3943
secrets: inherit
4044
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@main
@@ -45,6 +49,10 @@ jobs:
4549
date: ${{ inputs.date }}
4650
sha: ${{ inputs.sha }}
4751
script: ci/test_python.sh
52+
env_variables: |
53+
CUOPT_DATASET_S3_URI=${{ vars.CUOPT_DATASET_S3_URI }}
54+
CUOPT_AWS_ACCESS_KEY_ID=${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
55+
CUOPT_AWS_SECRET_ACCESS_KEY=${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
4856
wheel-tests-cuopt:
4957
secrets: inherit
5058
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main
@@ -54,6 +62,10 @@ jobs:
5462
date: ${{ inputs.date }}
5563
sha: ${{ inputs.sha }}
5664
script: ci/test_wheel_cuopt.sh
65+
env_variables: |
66+
CUOPT_DATASET_S3_URI=${{ vars.CUOPT_DATASET_S3_URI }}
67+
CUOPT_AWS_ACCESS_KEY_ID=${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
68+
CUOPT_AWS_SECRET_ACCESS_KEY=${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
5769
wheel-tests-cuopt-server:
5870
secrets: inherit
5971
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main
@@ -63,6 +75,10 @@ jobs:
6375
date: ${{ inputs.date }}
6476
sha: ${{ inputs.sha }}
6577
script: ci/test_wheel_cuopt_server.sh
78+
env_variables: |
79+
CUOPT_DATASET_S3_URI=${{ vars.CUOPT_DATASET_S3_URI }}
80+
CUOPT_AWS_ACCESS_KEY_ID=${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
81+
CUOPT_AWS_SECRET_ACCESS_KEY=${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}
6682
conda-notebook-tests:
6783
secrets: inherit
6884
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main
@@ -75,3 +91,7 @@ jobs:
7591
arch: "amd64"
7692
container_image: "rapidsai/ci-conda:26.02-latest"
7793
script: ci/test_notebooks.sh
94+
env_variables: |
95+
CUOPT_DATASET_S3_URI=${{ vars.CUOPT_DATASET_S3_URI }}
96+
CUOPT_AWS_ACCESS_KEY_ID=${{ secrets.CUOPT_AWS_ACCESS_KEY_ID }}
97+
CUOPT_AWS_SECRET_ACCESS_KEY=${{ secrets.CUOPT_AWS_SECRET_ACCESS_KEY }}

benchmarks/linear_programming/utils/get_datasets.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
33

44
import os
@@ -663,7 +663,7 @@ def extract(file, dir, type):
663663
raise Exception(f"Unknown file extension found for extraction {file}")
664664
# download emps and compile
665665
# Disable emps for now
666-
if type == "netlib" and False:
666+
if type == "netlib":
667667
url = MittelmannInstances["emps"]
668668
file = os.path.join(dir, "emps.c")
669669
download(url, file)

datasets/get_test_data.sh

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,96 @@
11
#!/bin/bash
2-
# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
# SPDX-License-Identifier: Apache-2.0
44

55
set -e
66
set -o pipefail
77

8+
################################################################################
9+
# S3 Dataset Download Support
10+
################################################################################
11+
# Set CUOPT_DATASET_S3_URI to base S3 path
12+
# AWS credentials should be configured via:
13+
# - Environment variables (CUOPT_AWS_ACCESS_KEY_ID, CUOPT_AWS_SECRET_ACCESS_KEY)
14+
# - Standard AWS variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
15+
# - AWS CLI configuration (~/.aws/credentials)
16+
# - IAM role (for EC2 instances)
17+
18+
function try_download_from_s3() {
19+
local s3_dirs=("$@") # Array of directories to sync from S3
20+
21+
if [ -z "${CUOPT_DATASET_S3_URI:-}" ]; then
22+
echo "CUOPT_DATASET_S3_URI not set, skipping S3 download..."
23+
return 1
24+
fi
25+
26+
if ! command -v aws &> /dev/null; then
27+
echo "AWS CLI not found, skipping S3 download..."
28+
return 1
29+
fi
30+
31+
# Append routing subdirectory to base S3 URI
32+
local s3_uri="${CUOPT_DATASET_S3_URI}routing/"
33+
echo "Attempting to download datasets from S3: $s3_uri"
34+
35+
# Support custom credential variable names to avoid conflicts
36+
# Priority: CUOPT_* variables > standard AWS_* variables > aws configure
37+
local access_key="${CUOPT_AWS_ACCESS_KEY_ID:-${AWS_ACCESS_KEY_ID:-}}"
38+
local secret_key="${CUOPT_AWS_SECRET_ACCESS_KEY:-${AWS_SECRET_ACCESS_KEY:-}}"
39+
local session_token="${CUOPT_AWS_SESSION_TOKEN:-${AWS_SESSION_TOKEN:-}}"
40+
local region="${CUOPT_AWS_REGION:-${AWS_DEFAULT_REGION:-us-east-1}}"
41+
42+
# Temporarily export for AWS CLI if custom variables are used
43+
if [ -n "$CUOPT_AWS_ACCESS_KEY_ID" ]; then
44+
echo "Using custom CUOPT_AWS_ACCESS_KEY_ID credentials"
45+
export AWS_ACCESS_KEY_ID="$access_key"
46+
export AWS_SECRET_ACCESS_KEY="$secret_key"
47+
[ -n "$session_token" ] && export AWS_SESSION_TOKEN="$session_token"
48+
export AWS_DEFAULT_REGION="$region"
49+
elif [ -n "$AWS_ACCESS_KEY_ID" ]; then
50+
echo "Using AWS_ACCESS_KEY_ID credentials from environment"
51+
else
52+
echo "Using AWS credentials from aws configure"
53+
fi
54+
55+
# Test AWS credentials
56+
if ! aws sts get-caller-identity &> /dev/null; then
57+
echo "AWS credentials not configured or invalid, skipping S3 download..."
58+
echo "Set CUOPT_AWS_ACCESS_KEY_ID/CUOPT_AWS_SECRET_ACCESS_KEY or AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY"
59+
return 1
60+
fi
61+
62+
# Try to sync from S3
63+
local success=true
64+
if [ ${#s3_dirs[@]} -eq 0 ]; then
65+
# No specific directories - download everything
66+
echo "Downloading all datasets from S3..."
67+
if ! aws s3 sync "$s3_uri" . --exclude "tmp/*" --exclude "get_test_data.sh" --exclude "*.sh" --exclude "*.md"; then
68+
success=false
69+
fi
70+
else
71+
# Download specific directories only
72+
echo "Downloading selected datasets: ${s3_dirs[*]}"
73+
for dir in "${s3_dirs[@]}"; do
74+
echo "Syncing ${dir}/..."
75+
if ! aws s3 sync "${s3_uri}${dir}/" "${dir}/" --exclude "*.sh" --exclude "*.md"; then
76+
echo "Warning: Failed to download ${dir}, will try HTTP fallback"
77+
success=false
78+
fi
79+
done
80+
fi
81+
82+
if $success; then
83+
echo "Successfully downloaded datasets from S3!"
84+
return 0
85+
else
86+
echo "Failed to download from S3, falling back to HTTP download..."
87+
return 1
88+
fi
89+
}
90+
91+
################################################################################
92+
# HTTP Dataset Download Configuration
93+
################################################################################
894
# Update this to add/remove/change a dataset, using the following format:
995
#
1096
# comment about the dataset
@@ -107,7 +193,13 @@ URLS=($(echo "$DATASET_DATA"|awk '{if (NR%4 == 3) print $0}')) # extract 3rd fi
107193
# shellcheck disable=SC2207
108194
DESTDIRS=($(echo "$DATASET_DATA"|awk '{if (NR%4 == 0) print $0}')) # extract 4th fields to a bash array
109195

110-
echo Downloading ...
196+
# Try S3 download first with selected directories
197+
if try_download_from_s3 "${DESTDIRS[@]}"; then
198+
echo "Datasets successfully retrieved from S3, skipping HTTP download."
199+
exit 0
200+
fi
201+
202+
echo "Downloading from HTTP sources..."
111203

112204
# Download all tarfiles to a tmp dir
113205
rm -rf tmp

datasets/linear_programming/download_pdlp_test_dataset.sh

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/bin/bash
2-
# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
# SPDX-License-Identifier: Apache-2.0
44

55
set -euo pipefail
@@ -20,6 +20,76 @@ datasets=(
2020
"square41"
2121
)
2222

23+
BASEDIR=$(dirname "$0")
24+
25+
################################################################################
26+
# S3 Download Support
27+
################################################################################
28+
# Set CUOPT_DATASET_S3_URI to base S3 path
29+
# Use CUOPT_AWS_ACCESS_KEY_ID and CUOPT_AWS_SECRET_ACCESS_KEY
30+
# or standard AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
31+
32+
function try_download_from_s3() {
33+
if [ -z "${CUOPT_DATASET_S3_URI:-}" ]; then
34+
return 1
35+
fi
36+
37+
if ! command -v aws &> /dev/null; then
38+
echo "AWS CLI not found, skipping S3 download..."
39+
return 1
40+
fi
41+
42+
# Append linear_programming/pdlp subdirectory to base S3 URI
43+
local s3_uri="${CUOPT_DATASET_S3_URI}linear_programming/pdlp/"
44+
echo "Attempting to download PDLP datasets from S3: $s3_uri"
45+
46+
# Support custom credential variable names
47+
local access_key="${CUOPT_AWS_ACCESS_KEY_ID:-${AWS_ACCESS_KEY_ID:-}}"
48+
local secret_key="${CUOPT_AWS_SECRET_ACCESS_KEY:-${AWS_SECRET_ACCESS_KEY:-}}"
49+
local session_token="${CUOPT_AWS_SESSION_TOKEN:-${AWS_SESSION_TOKEN:-}}"
50+
local region="${CUOPT_AWS_REGION:-${AWS_DEFAULT_REGION:-us-east-1}}"
51+
52+
# Temporarily export for AWS CLI if custom variables are used
53+
if [ -n "$CUOPT_AWS_ACCESS_KEY_ID" ]; then
54+
echo "Using custom CUOPT_AWS_ACCESS_KEY_ID credentials"
55+
export AWS_ACCESS_KEY_ID="$access_key"
56+
export AWS_SECRET_ACCESS_KEY="$secret_key"
57+
[ -n "$session_token" ] && export AWS_SESSION_TOKEN="$session_token"
58+
export AWS_DEFAULT_REGION="$region"
59+
fi
60+
61+
# Test AWS credentials
62+
if ! aws sts get-caller-identity &> /dev/null; then
63+
echo "AWS credentials not configured, skipping S3 download..."
64+
return 1
65+
fi
66+
67+
# Try to sync from S3 (downloads from pdlp/ subdirectory)
68+
local success=true
69+
for dataset in "${datasets[@]}"; do
70+
echo "Downloading ${dataset} from S3..."
71+
if ! aws s3 sync "${s3_uri}${dataset}/" "$BASEDIR/${dataset}/" --exclude "*.sh"; then
72+
echo "Warning: Failed to download ${dataset}"
73+
success=false
74+
fi
75+
done
76+
77+
if $success; then
78+
echo "Successfully downloaded PDLP datasets from S3!"
79+
return 0
80+
else
81+
echo "Some downloads failed, falling back to HTTP download..."
82+
return 1
83+
fi
84+
}
85+
86+
# Try S3 first
87+
if try_download_from_s3; then
88+
exit 0
89+
fi
90+
91+
# HTTP fallback using Python script
92+
echo "Downloading PDLP datasets using Python script..."
2393
for dataset in "${datasets[@]}"; do
2494
python benchmarks/linear_programming/utils/get_datasets.py -d "$dataset"
2595
done

0 commit comments

Comments
 (0)