Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 54 additions & 46 deletions .github/workflows/build-test-publish-wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,52 +28,60 @@ defaults:

jobs:
pre-flight:
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.64.2
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.70.1
with:
default_runner_prefix: ${{ vars.DEFAULT_RUNNER_PREFIX }}
non_nvidia_runner_prefix: ${{ vars.NON_NVIDIA_RUNNER_PREFIX }}
default_test_data_path: ${{ vars.DEFAULT_TEST_DATA_PATH }}
non_nvidia_test_data_path: ${{ vars.NON_NVIDIA_TEST_DATA_PATH }}
secrets:
NVIDIA_MANAGEMENT_ORG_PAT: ${{ secrets.NVIDIA_MANAGEMENT_ORG_PAT }}

# build-test-publish-wheel:
# needs: [pre-flight]
# if: |
# !(needs.pre-flight.outputs.docs_only == 'true'
# || needs.pre-flight.outputs.is_deployment_workflow == 'true')
# uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.48.0
# with:
# dry-run: true
# python-package: nemo_export_deploy_common
# python-version: "3.10"
# packaging: uv
# no-publish: ${{ !(github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/r')) }}
# skip-test-wheel: true
# custom-container: nvcr.io/nvidia/pytorch:25.11-py3
# no-build-isolation: true
# runner: linux-amd64-cpu16
# secrets:
# TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
# TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
# SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
# SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
# GH_TOKEN: ${{ secrets.PAT }}
build-test-publish-wheel:
needs: [pre-flight]
if: |
!(needs.pre-flight.outputs.docs_only == 'true'
|| needs.pre-flight.outputs.is_deployment_workflow == 'true')
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.70.1
with:
dry-run: true
python-package: nemo_export_deploy_common
python-version: "3.10"
packaging: uv
no-publish: ${{ !(github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/r')) }}
skip-test-wheel: true
custom-container: nvcr.io/nvidia/pytorch:25.11-py3
no-build-isolation: true
runner: ${{ needs.pre-flight.outputs.runner_prefix }}-gpu-x2-container
container-options: "--gpus all --runtime=nvidia"
secrets:
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
SLACK_WEBHOOK: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
GH_TOKEN: ${{ secrets.PAT }}

# build-test-publish-wheel-summary:
# needs: [pre-flight, build-test-publish-wheel]
# if: |
# (
# needs.pre-flight.outputs.docs_only == 'true'
# || needs.pre-flight.outputs.is_deployment_workflow == 'true'
# || always()
# )
# && !cancelled()
# runs-on: ubuntu-latest
# steps:
# - name: Result
# run: |
# FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0
build-test-publish-wheel-summary:
needs: [pre-flight, build-test-publish-wheel]
if: |
(
needs.pre-flight.outputs.docs_only == 'true'
|| needs.pre-flight.outputs.is_deployment_workflow == 'true'
|| always()
)
&& !cancelled()
runs-on: ubuntu-latest
steps:
- name: Result
run: |
FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success")] | length') || echo 0

# if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
# echo "✅ All previous jobs completed successfully"
# exit 0
# else
# echo "❌ Found $FAILED_JOBS failed job(s)"
# # Show which jobs failed
# gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
# exit 1
# fi
if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
echo "✅ All previous jobs completed successfully"
exit 0
else
echo "❌ Found $FAILED_JOBS failed job(s)"
# Show which jobs failed
gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success") | .name'
exit 1
fi
2 changes: 1 addition & 1 deletion .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
skip-test-wheel: true
custom-container: nvcr.io/nvidia/pytorch:25.11-py3
no-build-isolation: true
runner: linux-amd64-cpu16
runner: nemo-ci-aws-gpu-x2-container
secrets:
TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
Expand Down
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

[build-system]
requires = ["setuptools>=42", "wheel", "cython>=3.0.0"]
requires = ["setuptools>=42", "wheel", "cython>=3.0.0", "torch"]
build-backend = "setuptools.build_meta"

[tool.setuptools]
Expand Down Expand Up @@ -136,6 +136,9 @@ default-groups = ["linting", "build", "test"]
link-mode = "copy"
conflicts = [[{ extra = "trtllm" }, { extra = "vllm" }, { extra = "trt-onnx" }]]
override-dependencies = [
"torch; sys_platform == 'never'",
"torchvision; sys_platform == 'never'",
"triton; sys_platform == 'never'",
"urllib3>1.27.0",
"tiktoken>=0.9.0", # because nemo-toolkit and megatron-bridge disagree on tiktoken, we need to pin it here,
"fsspec[http]>=2023.1.0,<=2024.9.0",
Expand Down
Loading