diff --git a/benchmarks/hardware/hygon/scripts/README.md b/benchmarks/hardware/hygon/scripts/README.md new file mode 100644 index 0000000..7c1a139 --- /dev/null +++ b/benchmarks/hardware/hygon/scripts/README.md @@ -0,0 +1,23 @@ +# 海光平台通信与带宽测试脚本 + +本目录包含基于 HYQual 和 rocm-bandwidth-test 的自动测试脚本,用于在海光平台评估: + +1. CPU 与 AI 芯片间通信带宽(H2D 和 D2H) +2. AI 芯片间通信带宽 +3. CUDA 代码兼容性测试 +4. 内存带宽测试 + +## 目录结构 + +- `run_all_tests.sh`:一键执行所有测试 +- `run_hyqual_test.expect`:自动执行 HYQual PCIe 带宽测试(菜单项 4) +- `run_hyqual_mem_test.expect`:自动执行 HYQual 内存带宽测试(菜单项 6) + +## 使用说明 + +1. 拷贝 HYQual 工具包(https://download.sourcefind.cn:65024/directlink/5/%E5%9F%BA%E7%A1%80%E5%8E%8B%E5%8A%9B%E5%B7%A5%E5%85%B7/hyqual_v3.0.3.tar.gz)至某路径,如 `/home/xxx/hyqual_v3.0.3` +2. 修改 `run_all_tests.sh` 中对应路径变量 +3. 运行测试脚本: + ```bash + sudo bash run_all_tests.sh + diff --git a/benchmarks/hardware/hygon/scripts/run_all_tests.sh b/benchmarks/hardware/hygon/scripts/run_all_tests.sh new file mode 100755 index 0000000..185716f --- /dev/null +++ b/benchmarks/hardware/hygon/scripts/run_all_tests.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +set -e +set -o pipefail + +# 设置路径变量 +WORKDIR="$(pwd)" +LOGDIR="${WORKDIR}/logs" +HYQUAL_DIR="${WORKDIR}/hyqual_v3.0.3" +DTK_BIN_DIR="/opt/dtk/bin" +CUDA_TEST_DIR="/home/qiyuan" +MEM_BANDWIDTH_DIR="/home/hyqual_v2.2.7" + +# 创建日志目录 +mkdir -p "${LOGDIR}" + +############################## +# 1. CPU 与 AI 芯片通信带宽测试 +############################## +echo "[1/4] 进行 CPU 与 AI 芯片通信带宽测试..." +cd "${HYQUAL_DIR}" +chmod +x run +expect "${WORKDIR}/run_hyqual_test.expect" "${HYQUAL_DIR}" | tee "${LOGDIR}/cpu_to_dcu_bandwidth.log" +echo "[INFO] 已记录至 ${LOGDIR}/cpu_to_dcu_bandwidth.log" + +############################## +# 2. AI 芯片间通信带宽测试 +############################## +echo "[2/4] 进行 AI 芯片间通信带宽测试..." + +if docker ps -a | grep -q bandwidth_test; then + echo "[INFO] 使用 bandwidth_test 容器执行 rocm-bandwidth-test..." + docker exec bandwidth_test bash -c "cd ${DTK_BIN_DIR} && ./rocm-bandwidth-test" | tee "${LOGDIR}/dcu_to_dcu_bandwidth.log" +else + echo "[WARNING] bandwidth_test 容器未运行,请手动启动后再执行 rocm-bandwidth-test。" + echo "[INFO] 启动命令:docker start bandwidth_test" +fi + +############################## +# 3. CUDA 代码兼容性测试 +############################## +echo "[3/4] 进行 CUDA 代码兼容性测试..." + +docker exec bandwidth_test bash -c "cd /opt/dtk && source env.sh && source cuda/env.sh && cd ${CUDA_TEST_DIR} && ./test1" | tee "${LOGDIR}/cuda_test1.log" +docker exec bandwidth_test bash -c "cd /opt/dtk && source env.sh && source cuda/env.sh && cd ${CUDA_TEST_DIR} && ./test2" | tee "${LOGDIR}/cuda_test2.log" + +############################## +# 4. 内存带宽测试 +############################## +echo "[4/4] 进行内存带宽测试..." + +docker exec bandwidth_test bash -c "cd /opt/dtk && source env.sh && source cuda/env.sh && cd ${MEM_BANDWIDTH_DIR} && expect /tmp/run_hyqual_mem_test.expect ${MEM_BANDWIDTH_DIR}" | tee "${LOGDIR}/memory_bandwidth.log" + +############################## +# 总结输出 +############################## +echo -e "\n 所有测试完成。日志保存在:${LOGDIR}" + +echo -e "\n [1] H2D/D2H 带宽日志:${LOGDIR}/cpu_to_dcu_bandwidth.log" +echo -e " [2] DCU 间带宽日志:${LOGDIR}/dcu_to_dcu_bandwidth.log" +echo -e " [3] CUDA 测试日志:${LOGDIR}/cuda_test1.log 和 cuda_test2.log" +echo -e " [4] 内存带宽日志:${LOGDIR}/memory_bandwidth.log" + +cd "${WORKDIR}" + diff --git a/benchmarks/hardware/hygon/scripts/run_hyqual_mem_test.expect b/benchmarks/hardware/hygon/scripts/run_hyqual_mem_test.expect new file mode 100755 index 0000000..a8d09c1 --- /dev/null +++ b/benchmarks/hardware/hygon/scripts/run_hyqual_mem_test.expect @@ -0,0 +1,31 @@ +#!/usr/bin/expect -f + +set timeout 600 +set hyqual_path [lindex $argv 0] + +spawn bash $hyqual_path/run +expect { + "Key-in selection followed by :" { + send "6\r" + } + timeout { + puts "内存测试菜单未加载成功" + exit 1 + } +} + +expect { + -re "Bandwidth.*" { + sleep 2 + send "q\r" + } + "Key-in selection followed by :" { + send "q\r" + } + timeout { + puts "内存带宽测试超时,强制退出" + send "\003" + } + eof {} +} + diff --git a/benchmarks/hardware/hygon/scripts/run_hyqual_test.expect b/benchmarks/hardware/hygon/scripts/run_hyqual_test.expect new file mode 100755 index 0000000..6d76d3c --- /dev/null +++ b/benchmarks/hardware/hygon/scripts/run_hyqual_test.expect @@ -0,0 +1,35 @@ +#!/usr/bin/expect -f + +set timeout 600 ;# 最多等10分钟 +set hyqual_path [lindex $argv 0] + +spawn $hyqual_path/run +expect { + "Key-in selection followed by :" { + send "4\r" + } + timeout { + puts "启动失败,未出现菜单提示" + exit 1 + } +} + +# 等待测试完成后退出 +expect { + -re "Bandwidth.*" { + # HYQual输出测试结果后,发送 q 或 Ctrl+C 退出 + sleep 2 + send "q\r" + } + "Key-in selection followed by :" { + send "q\r" + } + timeout { + puts "测试超时,请检查 hyqual 是否卡住" + send "\003" ;# Ctrl+C 强退 + } + eof { + # 正常退出 + } +} +