|
| 1 | +#!/bin/bash |
| 2 | +# 测试共享内存是否工作 |
| 3 | + |
| 4 | +echo "=========================================" |
| 5 | +echo "测试共享内存配置" |
| 6 | +echo "=========================================" |
| 7 | +echo "" |
| 8 | + |
| 9 | +# 1. 检查host共享内存 |
| 10 | +echo "[1/6] 检查host共享内存文件..." |
| 11 | +if [ ! -f /dev/shm/cxlmemsim_shared ]; then |
| 12 | + echo " ✗ 错误: /dev/shm/cxlmemsim_shared 不存在" |
| 13 | + exit 1 |
| 14 | +fi |
| 15 | +echo " ✓ 文件存在: $(ls -lh /dev/shm/cxlmemsim_shared | awk '{print $5}')" |
| 16 | + |
| 17 | +# 2. 检查哪些进程在使用 |
| 18 | +echo "[2/6] 检查使用共享内存的进程..." |
| 19 | +USERS=$(lsof /dev/shm/cxlmemsim_shared 2>/dev/null | tail -n +2 | awk '{print $1}' | sort -u) |
| 20 | +if [ -z "$USERS" ]; then |
| 21 | + echo " ✗ 警告: 没有进程在使用共享内存" |
| 22 | +else |
| 23 | + echo " ✓ 使用中的进程:" |
| 24 | + echo "$USERS" | while read proc; do |
| 25 | + COUNT=$(lsof /dev/shm/cxlmemsim_shared 2>/dev/null | grep -c "$proc") |
| 26 | + echo " - $proc (${COUNT}个实例)" |
| 27 | + done |
| 28 | +fi |
| 29 | +echo "" |
| 30 | + |
| 31 | +# 3. 检查VM可达性 |
| 32 | +echo "[3/6] 检查VM网络连接..." |
| 33 | +if ping -c 1 -W 2 192.168.100.10 > /dev/null 2>&1; then |
| 34 | + echo " ✓ Node 0 (192.168.100.10) 可达" |
| 35 | + NODE0_UP=1 |
| 36 | +else |
| 37 | + echo " ✗ Node 0 (192.168.100.10) 不可达" |
| 38 | + NODE0_UP=0 |
| 39 | +fi |
| 40 | + |
| 41 | +if ping -c 1 -W 2 192.168.100.11 > /dev/null 2>&1; then |
| 42 | + echo " ✓ Node 1 (192.168.100.11) 可达" |
| 43 | + NODE1_UP=1 |
| 44 | +else |
| 45 | + echo " ✗ Node 1 (192.168.100.11) 不可达" |
| 46 | + NODE1_UP=0 |
| 47 | +fi |
| 48 | +echo "" |
| 49 | + |
| 50 | +if [ $NODE0_UP -eq 0 ] || [ $NODE1_UP -eq 0 ]; then |
| 51 | + echo "错误: 一个或多个VM不可达,无法继续测试" |
| 52 | + exit 1 |
| 53 | +fi |
| 54 | + |
| 55 | +# 4. 检查DAX设备 |
| 56 | +echo "[4/6] 检查VM内DAX设备..." |
| 57 | +NODE0_DAX=$(ssh root@192.168.100.10 "ls -l /dev/dax0.0 2>&1" | grep -c "^c") |
| 58 | +NODE1_DAX=$(ssh root@192.168.100.11 "ls -l /dev/dax0.0 2>&1" | grep -c "^c") |
| 59 | + |
| 60 | +if [ $NODE0_DAX -eq 1 ]; then |
| 61 | + echo " ✓ Node 0: /dev/dax0.0 存在" |
| 62 | +else |
| 63 | + echo " ✗ Node 0: /dev/dax0.0 不存在" |
| 64 | +fi |
| 65 | + |
| 66 | +if [ $NODE1_DAX -eq 1 ]; then |
| 67 | + echo " ✓ Node 1: /dev/dax0.0 存在" |
| 68 | +else |
| 69 | + echo " ✗ Node 1: /dev/dax0.0 不存在" |
| 70 | +fi |
| 71 | +echo "" |
| 72 | + |
| 73 | +# 5. 测试共享内存写入/读取 |
| 74 | +echo "[5/6] 测试共享内存读写..." |
| 75 | +TEST_STRING="SHARED_MEMORY_TEST_$(date +%s)" |
| 76 | +echo " 写入测试字符串到Node 0: $TEST_STRING" |
| 77 | + |
| 78 | +# 在Node 0写入 |
| 79 | +ssh root@192.168.100.10 "echo -n '$TEST_STRING' | dd of=/dev/dax0.0 bs=1 seek=1024 2>/dev/null" |
| 80 | +sleep 1 |
| 81 | + |
| 82 | +# 在Node 1读取 |
| 83 | +RESULT=$(ssh root@192.168.100.11 "dd if=/dev/dax0.0 bs=1 skip=1024 count=${#TEST_STRING} 2>/dev/null") |
| 84 | + |
| 85 | +if [ "$RESULT" = "$TEST_STRING" ]; then |
| 86 | + echo " ✓ 成功!Node 1 读取到Node 0写入的数据" |
| 87 | + echo " 写入: $TEST_STRING" |
| 88 | + echo " 读取: $RESULT" |
| 89 | + SHARED_WORKS=1 |
| 90 | +else |
| 91 | + echo " ✗ 失败!共享内存不工作" |
| 92 | + echo " 写入: $TEST_STRING" |
| 93 | + echo " 读取: $RESULT" |
| 94 | + SHARED_WORKS=0 |
| 95 | +fi |
| 96 | +echo "" |
| 97 | + |
| 98 | +# 6. 测试反向(Node 1写,Node 0读) |
| 99 | +echo "[6/6] 测试反向读写..." |
| 100 | +TEST_STRING2="REVERSE_TEST_$(date +%s)" |
| 101 | +echo " 写入测试字符串到Node 1: $TEST_STRING2" |
| 102 | + |
| 103 | +# 在Node 1写入 |
| 104 | +ssh root@192.168.100.11 "echo -n '$TEST_STRING2' | dd of=/dev/dax0.0 bs=1 seek=2048 2>/dev/null" |
| 105 | +sleep 1 |
| 106 | + |
| 107 | +# 在Node 0读取 |
| 108 | +RESULT2=$(ssh root@192.168.100.10 "dd if=/dev/dax0.0 bs=1 skip=2048 count=${#TEST_STRING2} 2>/dev/null") |
| 109 | + |
| 110 | +if [ "$RESULT2" = "$TEST_STRING2" ]; then |
| 111 | + echo " ✓ 成功!Node 0 读取到Node 1写入的数据" |
| 112 | + echo " 写入: $TEST_STRING2" |
| 113 | + echo " 读取: $RESULT2" |
| 114 | + REVERSE_WORKS=1 |
| 115 | +else |
| 116 | + echo " ✗ 失败!反向共享不工作" |
| 117 | + echo " 写入: $TEST_STRING2" |
| 118 | + echo " 读取: $RESULT2" |
| 119 | + REVERSE_WORKS=0 |
| 120 | +fi |
| 121 | +echo "" |
| 122 | + |
| 123 | +# 总结 |
| 124 | +echo "=========================================" |
| 125 | +echo "测试总结" |
| 126 | +echo "=========================================" |
| 127 | +if [ $SHARED_WORKS -eq 1 ] && [ $REVERSE_WORKS -eq 1 ]; then |
| 128 | + echo "✓ 共享内存配置正确!" |
| 129 | + echo "" |
| 130 | + echo "可以运行Tigon多节点测试了:" |
| 131 | + echo " cd /home/yhgan913/CXLMemSim/workloads/tigon" |
| 132 | + echo " export CXL_BACKEND=dax" |
| 133 | + echo " export CXL_MEMORY_RESOURCE=/dev/dax0.0" |
| 134 | + echo " ./scripts/run.sh TPCC TwoPLPasha 2 3 mixed 10 15 1 0 1 Clock OnDemand 200000000 1 WriteThrough None 30 10 BLACKHOLE 20000 0 0" |
| 135 | + exit 0 |
| 136 | +else |
| 137 | + echo "✗ 共享内存配置有问题" |
| 138 | + echo "" |
| 139 | + echo "故障排查:" |
| 140 | + echo " 1. 确认两个VM都使用 /dev/shm/cxlmemsim_shared" |
| 141 | + echo " 2. 检查 lsof /dev/shm/cxlmemsim_shared" |
| 142 | + echo " 3. 查看 /tmp/qemu0.log 和 /tmp/qemu1.log" |
| 143 | + echo " 4. 重启VM: ./restart_vms_shared.sh" |
| 144 | + exit 1 |
| 145 | +fi |
0 commit comments