Skip to content

add support for DCP and FSDP async save (#4027) #10210

add support for DCP and FSDP async save (#4027)

add support for DCP and FSDP async save (#4027) #10210

# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Build, test, and publish a PyPi wheel (to testpypi).
on:
push:
branches:
- main
- "pull-request/[0-9]+"
- "deploy-release/*"
merge_group:
types: [checks_requested]
defaults:
run:
shell: bash -x -e -u -o pipefail {0}
permissions:
id-token: write
contents: read
jobs:
pre-flight:
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cicd_preflight.yml@v0.73.2
if: github.repository == 'NVIDIA/Megatron-LM'
build-test-publish-wheels:
needs: [pre-flight]
uses: ./.github/workflows/_build_test_publish_wheel.yml
with:
no-publish: true
secrets:
TWINE_PASSWORD: ${{ (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/heads/r')) && secrets.SVC_PYPI_TOKEN || secrets.SVC_PYPI_TEST_TOKEN }}
build-test-publish-wheel-summary:
needs: [pre-flight, build-test-publish-wheels]
if: |
(
needs.pre-flight.outputs.docs_only == 'true'
|| needs.pre-flight.outputs.is_merge_group == 'true'
|| needs.pre-flight.outputs.is_deployment_workflow == 'true'
|| always()
)
&& github.repository == 'NVIDIA/Megatron-LM'
&& !cancelled()
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v6
- name: Result
env:
GH_TOKEN: ${{ github.token }}
GITHUB_RUN_ID: ${{ github.run_id }}
SKIPPING_IS_ALLOWED: false
run: |
FAILED_JOBS=$(gh run view $GITHUB_RUN_ID --json jobs --jq '[.jobs[] | select(.status == "completed" and .conclusion != "success" and (.name | test("build-and-test-wheels")))] | length') || echo 0
if [ "${FAILED_JOBS:-0}" -eq 0 ] || [ "$SKIPPING_IS_ALLOWED" == "true" ]; then
echo "✅ All build-and-test-wheels jobs completed successfully"
exit 0
else
echo "❌ Found $FAILED_JOBS failed build-and-test-wheels job(s)"
# Show which jobs failed
gh run view $GITHUB_RUN_ID --json jobs --jq '.jobs[] | select(.status == "completed" and .conclusion != "success" and (.name | test("build-and-test-wheels"))) | .name'
exit 1
fi