|
23 | 23 |
|
24 | 24 | #include "amdgpu_ras.h"
|
25 | 25 |
|
26 |
| -static int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, |
27 |
| - void *ras_error_status, |
28 |
| - struct amdgpu_iv_entry *entry) |
29 |
| -{ |
30 |
| - return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true); |
31 |
| -} |
32 |
| - |
33 |
| -int amdgpu_umc_ras_late_init(struct amdgpu_device *adev) |
34 |
| -{ |
35 |
| - int r; |
36 |
| - struct ras_fs_if fs_info = { |
37 |
| - .sysfs_name = "umc_err_count", |
38 |
| - }; |
39 |
| - struct ras_ih_if ih_info = { |
40 |
| - .cb = amdgpu_umc_process_ras_data_cb, |
41 |
| - }; |
42 |
| - |
43 |
| - if (!adev->umc.ras_if) { |
44 |
| - adev->umc.ras_if = |
45 |
| - kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); |
46 |
| - if (!adev->umc.ras_if) |
47 |
| - return -ENOMEM; |
48 |
| - adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC; |
49 |
| - adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; |
50 |
| - adev->umc.ras_if->sub_block_index = 0; |
51 |
| - } |
52 |
| - ih_info.head = fs_info.head = *adev->umc.ras_if; |
53 |
| - |
54 |
| - r = amdgpu_ras_late_init(adev, adev->umc.ras_if, |
55 |
| - &fs_info, &ih_info); |
56 |
| - if (r) |
57 |
| - goto free; |
58 |
| - |
59 |
| - if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) { |
60 |
| - r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); |
61 |
| - if (r) |
62 |
| - goto late_fini; |
63 |
| - } else { |
64 |
| - r = 0; |
65 |
| - goto free; |
66 |
| - } |
67 |
| - |
68 |
| - /* ras init of specific umc version */ |
69 |
| - if (adev->umc.ras_funcs && |
70 |
| - adev->umc.ras_funcs->err_cnt_init) |
71 |
| - adev->umc.ras_funcs->err_cnt_init(adev); |
72 |
| - |
73 |
| - return 0; |
74 |
| - |
75 |
| -late_fini: |
76 |
| - amdgpu_ras_late_fini(adev, adev->umc.ras_if, &ih_info); |
77 |
| -free: |
78 |
| - kfree(adev->umc.ras_if); |
79 |
| - adev->umc.ras_if = NULL; |
80 |
| - return r; |
81 |
| -} |
82 |
| - |
83 |
| -void amdgpu_umc_ras_fini(struct amdgpu_device *adev) |
84 |
| -{ |
85 |
| - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && |
86 |
| - adev->umc.ras_if) { |
87 |
| - struct ras_common_if *ras_if = adev->umc.ras_if; |
88 |
| - struct ras_ih_if ih_info = { |
89 |
| - .head = *ras_if, |
90 |
| - .cb = amdgpu_umc_process_ras_data_cb, |
91 |
| - }; |
92 |
| - |
93 |
| - amdgpu_ras_late_fini(adev, ras_if, &ih_info); |
94 |
| - kfree(ras_if); |
95 |
| - } |
96 |
| -} |
97 |
| - |
98 |
| -int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, |
| 26 | +static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, |
99 | 27 | void *ras_error_status,
|
100 | 28 | struct amdgpu_iv_entry *entry,
|
101 | 29 | bool reset)
|
@@ -180,6 +108,100 @@ int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
|
180 | 108 | return AMDGPU_RAS_SUCCESS;
|
181 | 109 | }
|
182 | 110 |
|
| 111 | +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, |
| 112 | + void *ras_error_status, |
| 113 | + bool reset) |
| 114 | +{ |
| 115 | + int ret; |
| 116 | + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; |
| 117 | + struct ras_common_if head = { |
| 118 | + .block = AMDGPU_RAS_BLOCK__UMC, |
| 119 | + }; |
| 120 | + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); |
| 121 | + |
| 122 | + ret = |
| 123 | + amdgpu_umc_do_page_retirement(adev, ras_error_status, NULL, reset); |
| 124 | + |
| 125 | + if (ret == AMDGPU_RAS_SUCCESS && obj) { |
| 126 | + obj->err_data.ue_count += err_data->ue_count; |
| 127 | + obj->err_data.ce_count += err_data->ce_count; |
| 128 | + } |
| 129 | + |
| 130 | + return ret; |
| 131 | +} |
| 132 | + |
| 133 | +static int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, |
| 134 | + void *ras_error_status, |
| 135 | + struct amdgpu_iv_entry *entry) |
| 136 | +{ |
| 137 | + return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true); |
| 138 | +} |
| 139 | + |
| 140 | +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev) |
| 141 | +{ |
| 142 | + int r; |
| 143 | + struct ras_fs_if fs_info = { |
| 144 | + .sysfs_name = "umc_err_count", |
| 145 | + }; |
| 146 | + struct ras_ih_if ih_info = { |
| 147 | + .cb = amdgpu_umc_process_ras_data_cb, |
| 148 | + }; |
| 149 | + |
| 150 | + if (!adev->umc.ras_if) { |
| 151 | + adev->umc.ras_if = |
| 152 | + kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); |
| 153 | + if (!adev->umc.ras_if) |
| 154 | + return -ENOMEM; |
| 155 | + adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC; |
| 156 | + adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; |
| 157 | + adev->umc.ras_if->sub_block_index = 0; |
| 158 | + } |
| 159 | + ih_info.head = fs_info.head = *adev->umc.ras_if; |
| 160 | + |
| 161 | + r = amdgpu_ras_late_init(adev, adev->umc.ras_if, |
| 162 | + &fs_info, &ih_info); |
| 163 | + if (r) |
| 164 | + goto free; |
| 165 | + |
| 166 | + if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) { |
| 167 | + r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); |
| 168 | + if (r) |
| 169 | + goto late_fini; |
| 170 | + } else { |
| 171 | + r = 0; |
| 172 | + goto free; |
| 173 | + } |
| 174 | + |
| 175 | + /* ras init of specific umc version */ |
| 176 | + if (adev->umc.ras_funcs && |
| 177 | + adev->umc.ras_funcs->err_cnt_init) |
| 178 | + adev->umc.ras_funcs->err_cnt_init(adev); |
| 179 | + |
| 180 | + return 0; |
| 181 | + |
| 182 | +late_fini: |
| 183 | + amdgpu_ras_late_fini(adev, adev->umc.ras_if, &ih_info); |
| 184 | +free: |
| 185 | + kfree(adev->umc.ras_if); |
| 186 | + adev->umc.ras_if = NULL; |
| 187 | + return r; |
| 188 | +} |
| 189 | + |
| 190 | +void amdgpu_umc_ras_fini(struct amdgpu_device *adev) |
| 191 | +{ |
| 192 | + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && |
| 193 | + adev->umc.ras_if) { |
| 194 | + struct ras_common_if *ras_if = adev->umc.ras_if; |
| 195 | + struct ras_ih_if ih_info = { |
| 196 | + .head = *ras_if, |
| 197 | + .cb = amdgpu_umc_process_ras_data_cb, |
| 198 | + }; |
| 199 | + |
| 200 | + amdgpu_ras_late_fini(adev, ras_if, &ih_info); |
| 201 | + kfree(ras_if); |
| 202 | + } |
| 203 | +} |
| 204 | + |
183 | 205 | int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
|
184 | 206 | struct amdgpu_irq_src *source,
|
185 | 207 | struct amdgpu_iv_entry *entry)
|
|
0 commit comments