Skip to content

(undone)safe_clip

(undone)safe_clip #228

Workflow file for this run

name: CI Validation (MUSA GPU)
on:
pull_request:
branches: [ main, develop ]
types: [ opened, synchronize, reopened ]
workflow_dispatch:
inputs:
target_branch:
description: 'Target branch to check rebase against (for manual run)'
required: false
default: 'main'
type: string
schedule:
- cron: '0 0 * * *'
permissions:
contents: read
jobs:
validate:
runs-on: [self-hosted, musa-gpu]
env:
COMMIT_ID: ${{ github.event.pull_request.head.sha || github.sha }}
LOG_BASE: /home/runner/ci_logs
WORKSPACE: /home/runner/action-runner/_work/${{ github.event.repository.name }}/${{ github.event.repository.name }}
steps:
- name: Check who am I
run: |
echo "我是用户: $(whoami)"
echo "我当前所在的家目录是: $HOME"
# ==============================
# Checkout
# ==============================
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ env.COMMIT_ID }}
# ==============================
# 创建日志目录
# ==============================
- name: Prepare log directory
run: |
if [ "${{ github.event_name }}" == "pull_request" ]; then
TYPE="pr"
ID="${{ github.event.pull_request.head.sha }}"
elif [ "${{ github.event_name }}" == "schedule" ]; then
TYPE="daily"
ID=$(date +%F)
else
TYPE="manual"
ID="${{ github.run_id }}"
fi
LOG_DIR="$LOG_BASE/$TYPE/$ID"
mkdir -p "$LOG_DIR"
echo "LOG_DIR=$LOG_DIR" >> $GITHUB_ENV
echo "TYPE=$TYPE" >> $GITHUB_ENV
echo "✅ Log dir: $LOG_DIR"
# ==============================
# Rebase 检查(仅 PR)
# ==============================
# - name: Check Rebase Status
# if: github.event_name == 'pull_request'
# run: |
# TARGET_BRANCH="${{ github.base_ref }}"
# git fetch origin $TARGET_BRANCH
# if ! git merge-base --is-ancestor origin/$TARGET_BRANCH HEAD; then
# echo "❌ NOT rebased"
# exit 1
# fi
# ==============================
# Format
# ==============================
- name: Check code formatting
id: run-format
run: |
set +e
exit_code=0
# 使用 bash -l -c 模拟登录,自动激活 Conda 环境
bash -l -c "
set -o pipefail
cd $WORKSPACE &&
LOG_FILE='$LOG_DIR/format.log' &&
FILE_LIST=\$(mktemp) &&
if [ '${{ github.event_name }}' = 'pull_request' ]; then
TARGET_BRANCH='${{ github.base_ref }}' &&
git fetch origin \$TARGET_BRANCH &&
git diff -z --diff-filter=ACMRTUXB --name-only origin/\$TARGET_BRANCH...HEAD -- \
'*.c' '*.cc' '*.cpp' '*.cu' '*.h' '*.hpp' > \$FILE_LIST
else
find . \( -path ./build -o -path ./.git \) -prune -o \
-regex '.*\.\(cc\|cpp\|hpp\|c\|h\|cu\)' -print0 > \$FILE_LIST
fi &&
if [ ! -s \$FILE_LIST ]; then
echo 'No C/C++ files to format-check.' > \$LOG_FILE
else
xargs -0 clang-format --Werror --dry-run < \$FILE_LIST > \$LOG_FILE 2>&1
fi
" || exit_code=$?
if [ $exit_code -eq 0 ]; then
echo "status=success" >> $GITHUB_OUTPUT
else
echo "status=failure" >> $GITHUB_OUTPUT
exit 1
fi
continue-on-error: true
# ==============================
# Build
# ==============================
- name: Build
id: run-build
run: |
set +e
exit_code=0
bash -l -c "
export CPLUS_INCLUDE_PATH=/usr/include/c++/11:/usr/include/x86_64-linux-gnu/c++/11:\$CPLUS_INCLUDE_PATH &&
cd $WORKSPACE &&
cp -r /home/runner/tensorflow_musa_extension/CMakeLists.txt ./ &&
LOG_FILE='$LOG_DIR/build.log' &&
rm -rf ./build &&
./build.sh > \$LOG_FILE 2>&1
" || exit_code=$?
if [ $exit_code -eq 0 ]; then
echo "status=success" >> $GITHUB_OUTPUT
else
echo "status=failure" >> $GITHUB_OUTPUT
exit 1
fi
continue-on-error: true
# ==============================
# Test
# ==============================
- name: Integration Test
id: run-integration
run: |
set +e
exit_code=0
bash -l -c "
cd $WORKSPACE/test &&
LOG_FILE='$LOG_DIR/test.log' &&
timeout 30m python test_runner.py --fusion > \$LOG_FILE 2>&1
" || exit_code=$?
if [ $exit_code -eq 0 ]; then
echo "status=success" >> $GITHUB_OUTPUT
else
echo "status=failure" >> $GITHUB_OUTPUT
exit 1
fi
continue-on-error: true
# ==============================
# 测试整网性能 part1
# ==============================
- name: Check Performance part1
id: run-performance-part1
run: |
set -euo pipefail
exit_code=0
bash -l -c "
cd /home/runner/tf_test_model/prunedGraph &&
LOG_FILE='$LOG_DIR/performance-part1.log' &&
timeout 20m python run_graph_tf_musa.py --inference-only --inference-rounds 1000 > \$LOG_FILE 2>&1
" || exit_code=$?
if [ $exit_code -ne 0 ]; then
echo "status=failure" >> $GITHUB_OUTPUT
exit 1
fi
CURRENT_TIME=$(grep "Average Time (s)" "$LOG_DIR/performance-part1.log" | tail -1 | awk -F'|' '{print $3}' | tr -d ' ')
if [ -z "$CURRENT_TIME" ]; then
echo "❌ 无法解析性能结果"
echo "status=failure" >> $GITHUB_OUTPUT
exit 1
fi
BASELINE=0.038332
THRESHOLD=$(awk -v base="$BASELINE" 'BEGIN {printf "%.6f", base * 1.05}')
echo "Baseline : $BASELINE"
echo "Threshold: $THRESHOLD"
echo "Current : $CURRENT_TIME"
if awk -v cur="$CURRENT_TIME" -v thr="$THRESHOLD" 'BEGIN {exit !(cur > thr)}'; then
echo "❌ 性能退化"
echo "status=failure" >> $GITHUB_OUTPUT
exit 1
else
echo "✅ 性能正常"
echo "status=success" >> $GITHUB_OUTPUT
fi
continue-on-error: true
# ==============================
# 测试整网性能 part2
# ==============================
- name: Check Performance part2
id: run-performance-part2
run: |
set -euo pipefail
exit_code=0
bash -l -c "
cd /home/runner/tf_test_model/prunedGraph &&
LOG_FILE='$LOG_DIR/performance-part2.log' &&
timeout 20m python run_graph_tf_musa.py --compare-accuracy --rtol 1e-2 --atol 1e-2 > \$LOG_FILE 2>&1
" || exit_code=$?
if [ $exit_code -ne 0 ]; then
echo "❌ 精度测试脚本执行失败"
echo "status=failure" >> $GITHUB_OUTPUT
exit 1
fi
if grep -q "精度对比通过!CPU 与 MUSA 结果一致" "$LOG_DIR/performance-part2.log"; then
echo "✅ 精度对比通过"
echo "status=success" >> $GITHUB_OUTPUT
else
echo "❌ 精度对比失败"
echo "status=failure" >> $GITHUB_OUTPUT
echo "===== 日志最后50行 ====="
tail -50 "$LOG_DIR/performance-part2.log"
exit 1
fi
continue-on-error: true
# ==============================
# Summary(写入日志目录)
# ==============================
- name: Save Summary
if: always()
run: |
SUMMARY="$LOG_DIR/summary.md"
echo '# CI Report - $COMMIT_ID' > "$SUMMARY"
echo "- Format: ${{ steps.run-format.outputs.status || 'skipped' }}" >> "$SUMMARY"
echo "- Build: ${{ steps.run-build.outputs.status || 'skipped' }}" >> "$SUMMARY"
echo "- Test: ${{ steps.run-integration.outputs.status || 'skipped' }}" >> "$SUMMARY"
echo "- Performance: ${{ steps.run-performance-part1.outputs.status || 'skipped' }}" >> "$SUMMARY"
echo "- Performance: ${{ steps.run-performance-part2.outputs.status || 'skipped' }}" >> "$SUMMARY"
# ==============================
# CI Summary(GitHub UI)
# ==============================
- name: CI Summary
if: always()
run: |
echo "## CI Result" >> $GITHUB_STEP_SUMMARY
echo "Commit: $COMMIT_ID" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Step | Result |" >> $GITHUB_STEP_SUMMARY
echo "|------|--------|" >> $GITHUB_STEP_SUMMARY
echo "| Format | ${{ steps.run-format.outputs.status || 'skipped' }} |" >> $GITHUB_STEP_SUMMARY
echo "| Build | ${{ steps.run-build.outputs.status || 'skipped' }} |" >> $GITHUB_STEP_SUMMARY
echo "| Test | ${{ steps.run-integration.outputs.status || 'skipped' }} |" >> $GITHUB_STEP_SUMMARY
echo "| Performance | ${{ steps.run-performance-part1.outputs.status || 'skipped' }} |" >> $GITHUB_STEP_SUMMARY
echo "| Performance | ${{ steps.run-performance-part2.outputs.status || 'skipped' }} |" >> $GITHUB_STEP_SUMMARY
if [ "${{ steps.run-build.outputs.status }}" == "failure" ] || \
[ "${{ steps.run-integration.outputs.status }}" == "failure" ] || \
[ "${{ steps.run-performance-part1.outputs.status }}" == "failure" ] || \
[ "${{ steps.run-performance-part2.outputs.status }}" == "failure" ]; then
exit 1
fi