(undone)safe_clip #228

Workflow file for this run

.github/workflows/pr-validation.yml at 8eccfa1

	name: CI Validation (MUSA GPU)

	on:
	pull_request:
	branches: [ main, develop ]
	types: [ opened, synchronize, reopened ]
	workflow_dispatch:
	inputs:
	target_branch:
	description: 'Target branch to check rebase against (for manual run)'
	required: false
	default: 'main'
	type: string
	schedule:
	- cron: '0 0 * * *'

	permissions:
	contents: read

	jobs:
	validate:
	runs-on: [self-hosted, musa-gpu]

	env:
	COMMIT_ID: ${{ github.event.pull_request.head.sha \|\| github.sha }}
	LOG_BASE: /home/runner/ci_logs
	WORKSPACE: /home/runner/action-runner/_work/${{ github.event.repository.name }}/${{ github.event.repository.name }}

	steps:
	- name: Check who am I
	run: \|
	echo "我是用户: $(whoami)"
	echo "我当前所在的家目录是: $HOME"

	# ==============================
	# Checkout
	# ==============================
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	fetch-depth: 0
	ref: ${{ env.COMMIT_ID }}

	# ==============================
	# 创建日志目录
	# ==============================
	- name: Prepare log directory
	run: \|
	if [ "${{ github.event_name }}" == "pull_request" ]; then
	TYPE="pr"
	ID="${{ github.event.pull_request.head.sha }}"
	elif [ "${{ github.event_name }}" == "schedule" ]; then
	TYPE="daily"
	ID=$(date +%F)
	else
	TYPE="manual"
	ID="${{ github.run_id }}"
	fi

	LOG_DIR="$LOG_BASE/$TYPE/$ID"

	mkdir -p "$LOG_DIR"

	echo "LOG_DIR=$LOG_DIR" >> $GITHUB_ENV
	echo "TYPE=$TYPE" >> $GITHUB_ENV

	echo "✅ Log dir: $LOG_DIR"

	# ==============================
	# Rebase 检查（仅 PR）
	# ==============================
	# - name: Check Rebase Status
	# if: github.event_name == 'pull_request'
	# run: \|
	# TARGET_BRANCH="${{ github.base_ref }}"
	# git fetch origin $TARGET_BRANCH
	# if ! git merge-base --is-ancestor origin/$TARGET_BRANCH HEAD; then
	# echo "❌ NOT rebased"
	# exit 1
	# fi

	# ==============================
	# Format
	# ==============================
	- name: Check code formatting
	id: run-format
	run: \|
	set +e

	exit_code=0
	# 使用 bash -l -c 模拟登录，自动激活 Conda 环境
	bash -l -c "
	set -o pipefail
	cd $WORKSPACE &&
	LOG_FILE='$LOG_DIR/format.log' &&
	FILE_LIST=\$(mktemp) &&
	if [ '${{ github.event_name }}' = 'pull_request' ]; then
	TARGET_BRANCH='${{ github.base_ref }}' &&
	git fetch origin \$TARGET_BRANCH &&
	git diff -z --diff-filter=ACMRTUXB --name-only origin/\$TARGET_BRANCH...HEAD -- \
	'.c' '.cc' '.cpp' '.cu' '.h' '.hpp' > \$FILE_LIST
	else
	find . $ -path ./build -o -path ./.git $ -prune -o \
	-regex '.*\.$cc\\|cpp\\|hpp\\|c\\|h\\|cu$' -print0 > \$FILE_LIST
	fi &&
	if [ ! -s \$FILE_LIST ]; then
	echo 'No C/C++ files to format-check.' > \$LOG_FILE
	else
	xargs -0 clang-format --Werror --dry-run < \$FILE_LIST > \$LOG_FILE 2>&1
	fi
	" \|\| exit_code=$?

	if [ $exit_code -eq 0 ]; then
	echo "status=success" >> $GITHUB_OUTPUT
	else
	echo "status=failure" >> $GITHUB_OUTPUT
	exit 1
	fi
	continue-on-error: true

	# ==============================
	# Build
	# ==============================
	- name: Build
	id: run-build
	run: \|
	set +e

	exit_code=0
	bash -l -c "
	export CPLUS_INCLUDE_PATH=/usr/include/c++/11:/usr/include/x86_64-linux-gnu/c++/11:\$CPLUS_INCLUDE_PATH &&
	cd $WORKSPACE &&
	cp -r /home/runner/tensorflow_musa_extension/CMakeLists.txt ./ &&
	LOG_FILE='$LOG_DIR/build.log' &&
	rm -rf ./build &&
	./build.sh > \$LOG_FILE 2>&1
	" \|\| exit_code=$?

	if [ $exit_code -eq 0 ]; then
	echo "status=success" >> $GITHUB_OUTPUT
	else
	echo "status=failure" >> $GITHUB_OUTPUT
	exit 1
	fi
	continue-on-error: true

	# ==============================
	# Test
	# ==============================
	- name: Integration Test
	id: run-integration
	run: \|
	set +e

	exit_code=0
	bash -l -c "
	cd $WORKSPACE/test &&
	LOG_FILE='$LOG_DIR/test.log' &&
	timeout 30m python test_runner.py --fusion > \$LOG_FILE 2>&1
	" \|\| exit_code=$?

	if [ $exit_code -eq 0 ]; then
	echo "status=success" >> $GITHUB_OUTPUT
	else
	echo "status=failure" >> $GITHUB_OUTPUT
	exit 1
	fi
	continue-on-error: true

	# ==============================
	# 测试整网性能 part1
	# ==============================
	- name: Check Performance part1
	id: run-performance-part1
	run: \|
	set -euo pipefail

	exit_code=0

	bash -l -c "
	cd /home/runner/tf_test_model/prunedGraph &&
	LOG_FILE='$LOG_DIR/performance-part1.log' &&
	timeout 20m python run_graph_tf_musa.py --inference-only --inference-rounds 1000 > \$LOG_FILE 2>&1
	" \|\| exit_code=$?

	if [ $exit_code -ne 0 ]; then
	echo "status=failure" >> $GITHUB_OUTPUT
	exit 1
	fi

	CURRENT_TIME=$(grep "Average Time (s)" "$LOG_DIR/performance-part1.log" \| tail -1 \| awk -F'\|' '{print $3}' \| tr -d ' ')

	if [ -z "$CURRENT_TIME" ]; then
	echo "❌ 无法解析性能结果"
	echo "status=failure" >> $GITHUB_OUTPUT
	exit 1
	fi

	BASELINE=0.038332
	THRESHOLD=$(awk -v base="$BASELINE" 'BEGIN {printf "%.6f", base * 1.05}')

	echo "Baseline : $BASELINE"
	echo "Threshold: $THRESHOLD"
	echo "Current : $CURRENT_TIME"

	if awk -v cur="$CURRENT_TIME" -v thr="$THRESHOLD" 'BEGIN {exit !(cur > thr)}'; then
	echo "❌ 性能退化"
	echo "status=failure" >> $GITHUB_OUTPUT
	exit 1
	else
	echo "✅ 性能正常"
	echo "status=success" >> $GITHUB_OUTPUT
	fi
	continue-on-error: true

	# ==============================
	# 测试整网性能 part2
	# ==============================
	- name: Check Performance part2
	id: run-performance-part2
	run: \|
	set -euo pipefail

	exit_code=0

	bash -l -c "
	cd /home/runner/tf_test_model/prunedGraph &&
	LOG_FILE='$LOG_DIR/performance-part2.log' &&
	timeout 20m python run_graph_tf_musa.py --compare-accuracy --rtol 1e-2 --atol 1e-2 > \$LOG_FILE 2>&1
	" \|\| exit_code=$?

	if [ $exit_code -ne 0 ]; then
	echo "❌ 精度测试脚本执行失败"
	echo "status=failure" >> $GITHUB_OUTPUT
	exit 1
	fi

	if grep -q "精度对比通过！CPU 与 MUSA 结果一致" "$LOG_DIR/performance-part2.log"; then
	echo "✅ 精度对比通过"
	echo "status=success" >> $GITHUB_OUTPUT
	else
	echo "❌ 精度对比失败"
	echo "status=failure" >> $GITHUB_OUTPUT

	echo "===== 日志最后50行 ====="
	tail -50 "$LOG_DIR/performance-part2.log"

	exit 1
	fi
	continue-on-error: true

	# ==============================
	# Summary（写入日志目录）
	# ==============================
	- name: Save Summary
	if: always()
	run: \|
	SUMMARY="$LOG_DIR/summary.md"
	echo '# CI Report - $COMMIT_ID' > "$SUMMARY"
	echo "- Format: ${{ steps.run-format.outputs.status \|\| 'skipped' }}" >> "$SUMMARY"
	echo "- Build: ${{ steps.run-build.outputs.status \|\| 'skipped' }}" >> "$SUMMARY"
	echo "- Test: ${{ steps.run-integration.outputs.status \|\| 'skipped' }}" >> "$SUMMARY"
	echo "- Performance: ${{ steps.run-performance-part1.outputs.status \|\| 'skipped' }}" >> "$SUMMARY"
	echo "- Performance: ${{ steps.run-performance-part2.outputs.status \|\| 'skipped' }}" >> "$SUMMARY"

	# ==============================
	# CI Summary（GitHub UI）
	# ==============================
	- name: CI Summary
	if: always()
	run: \|
	echo "## CI Result" >> $GITHUB_STEP_SUMMARY
	echo "Commit: $COMMIT_ID" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "\| Step \| Result \|" >> $GITHUB_STEP_SUMMARY
	echo "\|------\|--------\|" >> $GITHUB_STEP_SUMMARY
	echo "\| Format \| ${{ steps.run-format.outputs.status \|\| 'skipped' }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| Build \| ${{ steps.run-build.outputs.status \|\| 'skipped' }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| Test \| ${{ steps.run-integration.outputs.status \|\| 'skipped' }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| Performance \| ${{ steps.run-performance-part1.outputs.status \|\| 'skipped' }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| Performance \| ${{ steps.run-performance-part2.outputs.status \|\| 'skipped' }} \|" >> $GITHUB_STEP_SUMMARY

	if [ "${{ steps.run-build.outputs.status }}" == "failure" ] \|\| \
	[ "${{ steps.run-integration.outputs.status }}" == "failure" ] \|\| \
	[ "${{ steps.run-performance-part1.outputs.status }}" == "failure" ] \|\| \
	[ "${{ steps.run-performance-part2.outputs.status }}" == "failure" ]; then
	exit 1
	fi

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

(undone)safe_clip #228

Workflow file

(undone)safe_clip #228

Uh oh!

Workflow file for this run