|
25 | 25 | ############################################################################### |
26 | 26 | import datetime |
27 | 27 |
|
| 28 | +from nodescraper.enums.eventpriority import EventPriority |
28 | 29 | from nodescraper.enums.executionstatus import ExecutionStatus |
29 | 30 | from nodescraper.plugins.inband.dmesg.analyzer_args import DmesgAnalyzerArgs |
30 | 31 | from nodescraper.plugins.inband.dmesg.dmesg_analyzer import DmesgAnalyzer |
@@ -150,3 +151,56 @@ def test_exclude_category(system_info): |
150 | 151 | assert len(res.events) == 4 |
151 | 152 | for event in res.events: |
152 | 153 | assert event.category != "RAS" |
| 154 | + |
| 155 | + |
| 156 | +def test_page_fault(system_info): |
| 157 | + dmesg_data = DmesgData( |
| 158 | + dmesg_content=( |
| 159 | + "kern :err : 2025-01-01T00:00:00,000000+00:00 amdgpu 0000:03:00.0: amdgpu: [mmhub0] no-retry page fault (src_id:0 ring:0 vmid:0 pasid:0, for process pid 0 thread pid 0)\n" |
| 160 | + "kern :err : 2025-01-01T00:00:01,000000+00:00 amdgpu 0000:03:00.0: amdgpu: test example 123\n" |
| 161 | + "kern :err : 2025-01-01T00:00:02,000000+00:00 amdgpu 0000:03:00.0: amdgpu: test example 123\n" |
| 162 | + "kern :err : 2025-01-01T00:00:03,000000+00:00 amdgpu 0000:03:00.0: amdgpu: VM_L2_PROTECTION_FAULT_STATUS:0x00000000\n" |
| 163 | + "kern :err : 2025-01-01T00:00:04,000000+00:00 amdgpu 0000:03:00.0: amdgpu: Faulty UTCL2 client ID: ABC123 (0x000)\n" |
| 164 | + "kern :err : 2025-01-01T00:00:05,000000+00:00 amdgpu 0000:03:00.0: amdgpu: MORE_FAULTS: 0x0\n" |
| 165 | + "kern :err : 2025-01-01T00:00:06,000000+00:00 amdgpu 0000:03:00.0: amdgpu: WALKER_ERROR: 0x0\n" |
| 166 | + "kern :err : 2025-01-01T00:00:07,000000+00:00 amdgpu 0000:03:00.0: amdgpu: PERMISSION_FAULTS: 0x0\n" |
| 167 | + "kern :err : 2025-01-01T00:00:08,000000+00:00 amdgpu 0000:03:00.0: amdgpu: MAPPING_ERROR: 0x0\n" |
| 168 | + "kern :err : 2025-01-01T00:00:09,000000+00:00 amdgpu 0000:03:00.0: amdgpu: RW: 0x0\n" |
| 169 | + "kern :info : 2025-01-01T00:00:10,000000+00:00 TEST TEST\n" |
| 170 | + "kern :err : 2025-01-01T00:00:11,000000+00:00 amdgpu 0000:03:00.0: amdgpu: [gfxhub0] retry page fault (src_id:0 ring:0 vmid:0 pasid:0, for process pid 0 thread pid 0)\n" |
| 171 | + "kern :err : 2025-01-01T00:00:12,000000+00:00 amdgpu 0000:03:00.0: amdgpu: test example 123\n" |
| 172 | + "kern :err : 2025-01-01T00:00:13,000000+00:00 amdgpu 0000:03:00.0: amdgpu: test example 123\n" |
| 173 | + "kern :err : 2025-01-01T00:00:14,000000+00:00 amdgpu 0000:03:00.0: amdgpu: VM_L2_PROTECTION_FAULT_STATUS:0x00000000\n" |
| 174 | + "kern :err : 2025-01-01T00:00:15,000000+00:00 amdgpu 0000:03:00.0: amdgpu: Faulty UTCL2 client ID: ABC123 (0x000)\n" |
| 175 | + "kern :err : 2025-01-01T00:00:16,000000+00:00 amdgpu 0000:03:00.0: amdgpu: MORE_FAULTS: 0x0\n" |
| 176 | + "kern :err : 2025-01-01T00:00:17,000000+00:00 amdgpu 0000:03:00.0: amdgpu: WALKER_ERROR: 0x0\n" |
| 177 | + "kern :err : 2025-01-01T00:00:18,000000+00:00 amdgpu 0000:03:00.0: amdgpu: PERMISSION_FAULTS: 0x0\n" |
| 178 | + "kern :err : 2025-01-01T00:00:19,000000+00:00 amdgpu 0000:03:00.0: amdgpu: MAPPING_ERROR: 0x0\n" |
| 179 | + "kern :err : 2025-01-01T00:00:20,000000+00:00 amdgpu 0000:03:00.0: amdgpu: RW: 0x0\n" |
| 180 | + "kern :info : 2025-01-01T00:00:21,000000+00:00 TEST TEST\n" |
| 181 | + "kern :err : 2025-01-01T00:00:22,000000+00:00 amdgpu 0003:02:00.0: amdgpu: [gfxhub0] retry page fault (swpekfwpo\n" |
| 182 | + "kern :info : 2025-01-01T00:00:23,000000+00:00 TEST TEST\n" |
| 183 | + "kern :err : 2025-01-01T00:00:24,000000+00:00 amdgpu 0000:f5:00.0: amdgpu: [mmhub0] no-retry page fault (src_id:0 ring:0 vmid:0 pasid:0, for process pid 0 thread pid 0)\n" |
| 184 | + "kern :err : 2025-01-01T00:00:25,000000+00:00 amdgpu 0000:f5:00.0: amdgpu: test example 123\n" |
| 185 | + "kern :err : 2025-01-01T00:00:26,000000+00:00 amdgpu 0000:f5:00.0: amdgpu: test example 123\n" |
| 186 | + "kern :err : 2025-01-01T00:00:27,000000+00:00 amdgpu 0000:f5:00.0: amdgpu: test example 123\n" |
| 187 | + "kern :err : 2025-01-01T00:00:28,000000+00:00 amdgpu 0000:f5:00.0: amdgpu: VM_L2_PROTECTION_FAULT_STATUS:0x00000000\n" |
| 188 | + "kern :err : 2025-01-01T00:00:29,000000+00:00 amdgpu 0000:f5:00.0: amdgpu: Faulty UTCL2 client ID: ABC123 (0x000)\n" |
| 189 | + "kern :err : 2025-01-01T00:00:30,000000+00:00 amdgpu 0000:f5:00.0: amdgpu: MORE_FAULTS: 0x0\n" |
| 190 | + "kern :err : 2025-01-01T00:00:31,000000+00:00 amdgpu 0000:f5:00.0: amdgpu: WALKER_ERROR: 0x0\n" |
| 191 | + "kern :err : 2025-01-01T00:00:32,000000+00:00 amdgpu 0000:f5:00.0: amdgpu: PERMISSION_FAULTS: 0x0\n" |
| 192 | + "kern :err : 2025-01-01T00:00:33,000000+00:00 amdgpu 0000:f5:00.0: amdgpu: MAPPING_ERROR: 0x0\n" |
| 193 | + "kern :err : 2025-01-01T00:00:34,000000+00:00 amdgpu 0000:f5:00.0: amdgpu: RW: 0x0\n" |
| 194 | + ) |
| 195 | + ) |
| 196 | + |
| 197 | + analyzer = DmesgAnalyzer( |
| 198 | + system_info=system_info, |
| 199 | + ) |
| 200 | + |
| 201 | + res = analyzer.analyze_data(dmesg_data) |
| 202 | + assert res.status == ExecutionStatus.ERROR |
| 203 | + assert len(res.events) == 4 |
| 204 | + for event in res.events: |
| 205 | + assert event.priority == EventPriority.ERROR |
| 206 | + assert event.description == "amdgpu Page Fault" |
0 commit comments