@@ -488,103 +488,19 @@ jobs:
488488 # ARTIFACTS: |
489489 # test-equinox.log
490490 # secrets: inherit
491- test-transformerengine-eks :
491+ test-transformerengine-h100 :
492492# needs: build-jax
493493 if : inputs.ARCHITECTURE == 'amd64'
494494 runs-on : eks
495- env :
495+ uses : ./.github/workflows/transformer-engine/_unittests_eks.yaml
496+ with :
496497# JAX_DOCKER_IMAGE: ${{ needs.build-jax.outputs.DOCKER_TAG_FINAL }}
497498 JAX_DOCKER_IMAGE : ghcr.io/nvidia/jax-toolbox-internal:13652377029-jax-amd64
498499 JOB_NAME : transformerengine-${{ github.run_id }}
499500 S3_BUCKET : jax-toolbox-eks-output
500501 CI_NAME : transformer-engine
501- steps :
502- - name : Check out the repository
503- uses : actions/checkout@v4
504- - name : Login to GitHub Container Registry
505- uses : docker/login-action@v3
506- with :
507- registry : ghcr.io
508- username : ${{ github.repository_owner }}
509- password : ${{ secrets.GITHUB_TOKEN }}
510- - name : K8s GHCR store and delete token
511- id : store-token
512- uses : ./.github/actions/store-delete-k8s-ghcr
513- - name : Configure job manifest
514- run : |
515- cat .github/eks-workflow-files/transformer-engine/unit-tests.yml | \
516- sed s@JOB_NAME@${{ env.JOB_NAME }}@g | \
517- sed s@IMAGE_URI@${{ env.JAX_DOCKER_IMAGE }}@g | \
518- sed s@SECRETS_NAME@${{ steps.store-token.outputs.token-name }}@g | \
519- tee .github/eks-workflow-files/transformer-engine/unit-tests.yml
520- - name : Submit & delete transformer engine unit test job
521- uses : ./.github/actions/submit-delete-k8s-job
522- with :
523- job-config-file : .github/eks-workflow-files/transformer-engine/unit-tests.yml
524- job-name : ${{ env.JOB_NAME }}
525- - name : Download and process logs from S3
526- id : s3-logs-process
527- run : |
528- LOCAL_DIR=${{ env.CI_NAME }}-output
502+ secrets : inherit
529503
530- mkdir -p $LOCAL_DIR
531- # aws s3 cp s3://${{ env.S3_BUCKET }}/${{ env.CI_NAME }}/${{ env.JOB_NAME }}/summary.txt $LOCAL_DIR/
532- aws s3 cp s3://${{ env.S3_BUCKET }}/${{ env.CI_NAME }}/${{ env.JOB_NAME }}/ $LOCAL_DIR/ --recursive --exclude "*" --include "*.log"
533-
534- passed=$(cat $LOCAL_DIR/tests.log | grep 'PASSED opt/transformer' | wc -l || true)
535- failed=$(cat $LOCAL_DIR/tests.log | grep 'FAILED opt/transformer' | wc -l || true)
536-
537- total=$((failed + passed))
538- echo "Passed tests: $passed"
539- echo "Failed tests: $failed"
540- echo "Total tests: $total"
541- echo "PASSED_TESTS=$passed" >> $GITHUB_OUTPUT
542- echo "FAILED_TESTS=$failed" >> $GITHUB_OUTPUT
543- echo "TOTAL_TESTS=$total" >> $GITHUB_OUTPUT
544-
545- - name : Generate sitrep
546- id : sitrep
547- if : ${{ !cancelled() }}
548- shell : bash -x -e {0}
549- run : |
550- # bring in utility functions
551- source .github/workflows/scripts/to_json.sh
552-
553- badge_label='TransformerEngine EKS Unit'
554-
555- total_tests=${{ steps.s3-logs-process.outputs.TOTAL_TESTS }} \
556- failed_tests=${{ steps.s3-logs-process.outputs.FAILED_TESTS }} \
557- passed_tests=${{ steps.s3-logs-process.outputs.PASSED_TESTS }} \
558- errors="0" \
559- summary="All tests: $total_tests. Passed: $passed_tests. Failed: $failed_tests." \
560- badge_message="Passed $passed_tests out of $total_tests." \
561- badge_color="brightgreen"
562- if [ "$failed_tests" -gt 0 ]; then
563- badge_color="red"
564- fi \
565-
566- to_json \
567- summary \
568- errors total_tests passed_tests failed_tests \
569- badge_label badge_color badge_message \
570- > sitrep.json
571-
572- schemaVersion=1 \
573- label="${badge_label}" \
574- message="Passed $passed_tests out of $total_tests." \
575- color=$badge_color \
576- to_json schemaVersion label message color \
577- > badge-transformer-engine-test.json
578-
579- - name : Upload artifacts
580- if : ${{ !cancelled() }}
581- uses : actions/upload-artifact@v4
582- with :
583- name : " artifact-transformer-engine-test"
584- path : |
585- sitrep.json
586- badge-transformer-engine-test.json
587- trasformer-engine-output/*
588504
589505# te-unittests:
590506# secrets: inherit
@@ -753,4 +669,4 @@ jobs:
753669# uses: ./.github/workflows/_test_maxtext.yaml
754670# with:
755671# MAXTEXT_IMAGE: ${{ needs.build-maxtext.outputs.DOCKER_TAG_FINAL }}
756- # secrets: inherit
672+ # secrets: inherit
0 commit comments