Skip to content

Commit 59f488b

Browse files
Yang Wangalexdeucher
authored andcommitted
drm/amdgpu: add ras event state device attribute support
add amdgpu ras 'event_state' sysfs device attribute support Signed-off-by: Yang Wang <[email protected]> Reviewed-by: Tao Zhou <[email protected]> Reviewed-by: Hawking Zhang <[email protected]> Signed-off-by: Alex Deucher <[email protected]>
1 parent 1dd3409 commit 59f488b

File tree

2 files changed

+59
-5
lines changed

2 files changed

+59
-5
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1731,6 +1731,39 @@ static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev,
17311731
return sysfs_emit(buf, "schema: 0x%x\n", con->schema);
17321732
}
17331733

1734+
static struct {
1735+
enum ras_event_type type;
1736+
const char *name;
1737+
} dump_event[] = {
1738+
{RAS_EVENT_TYPE_FATAL, "Fatal Error"},
1739+
{RAS_EVENT_TYPE_POISON_CREATION, "Poison Creation"},
1740+
{RAS_EVENT_TYPE_POISON_CONSUMPTION, "Poison Consumption"},
1741+
};
1742+
1743+
static ssize_t amdgpu_ras_sysfs_event_state_show(struct device *dev,
1744+
struct device_attribute *attr, char *buf)
1745+
{
1746+
struct amdgpu_ras *con =
1747+
container_of(attr, struct amdgpu_ras, event_state_attr);
1748+
struct ras_event_manager *event_mgr = con->event_mgr;
1749+
struct ras_event_state *event_state;
1750+
int i, size = 0;
1751+
1752+
if (!event_mgr)
1753+
return -EINVAL;
1754+
1755+
size += sysfs_emit_at(buf, size, "current seqno: %llu\n", atomic64_read(&event_mgr->seqno));
1756+
for (i = 0; i < ARRAY_SIZE(dump_event); i++) {
1757+
event_state = &event_mgr->event_state[dump_event[i].type];
1758+
size += sysfs_emit_at(buf, size, "%s: count:%llu, last_seqno:%llu\n",
1759+
dump_event[i].name,
1760+
atomic64_read(&event_state->count),
1761+
event_state->last_seqno);
1762+
}
1763+
1764+
return (ssize_t)size;
1765+
}
1766+
17341767
static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
17351768
{
17361769
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -1748,6 +1781,7 @@ static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev)
17481781
&con->features_attr.attr,
17491782
&con->version_attr.attr,
17501783
&con->schema_attr.attr,
1784+
&con->event_state_attr.attr,
17511785
NULL
17521786
};
17531787
struct attribute_group group = {
@@ -1980,6 +2014,8 @@ static DEVICE_ATTR(version, 0444,
19802014
amdgpu_ras_sysfs_version_show, NULL);
19812015
static DEVICE_ATTR(schema, 0444,
19822016
amdgpu_ras_sysfs_schema_show, NULL);
2017+
static DEVICE_ATTR(event_state, 0444,
2018+
amdgpu_ras_sysfs_event_state_show, NULL);
19832019
static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
19842020
{
19852021
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -1990,6 +2026,7 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
19902026
&con->features_attr.attr,
19912027
&con->version_attr.attr,
19922028
&con->schema_attr.attr,
2029+
&con->event_state_attr.attr,
19932030
NULL
19942031
};
19952032
struct bin_attribute *bin_attrs[] = {
@@ -2012,6 +2049,10 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
20122049
con->schema_attr = dev_attr_schema;
20132050
sysfs_attr_init(attrs[2]);
20142051

2052+
/* add event_state entry */
2053+
con->event_state_attr = dev_attr_event_state;
2054+
sysfs_attr_init(attrs[3]);
2055+
20152056
if (amdgpu_bad_page_threshold != 0) {
20162057
/* add bad_page_features entry */
20172058
bin_attr_gpu_vram_bad_pages.private = NULL;
@@ -3440,13 +3481,17 @@ static int amdgpu_get_ras_schema(struct amdgpu_device *adev)
34403481

34413482
static void ras_event_mgr_init(struct ras_event_manager *mgr)
34423483
{
3484+
struct ras_event_state *event_state;
34433485
int i;
34443486

34453487
memset(mgr, 0, sizeof(*mgr));
34463488
atomic64_set(&mgr->seqno, 0);
34473489

3448-
for (i = 0; i < ARRAY_SIZE(mgr->last_seqno); i++)
3449-
mgr->last_seqno[i] = RAS_EVENT_INVALID_ID;
3490+
for (i = 0; i < ARRAY_SIZE(mgr->event_state); i++) {
3491+
event_state = &mgr->event_state[i];
3492+
event_state->last_seqno = RAS_EVENT_INVALID_ID;
3493+
atomic64_set(&event_state->count, 0);
3494+
}
34503495
}
34513496

34523497
static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev)
@@ -3961,6 +4006,7 @@ int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_
39614006
const void *caller)
39624007
{
39634008
struct ras_event_manager *event_mgr;
4009+
struct ras_event_state *event_state;
39644010
int ret = 0;
39654011

39664012
if (type >= RAS_EVENT_TYPE_COUNT) {
@@ -3974,7 +4020,9 @@ int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_
39744020
goto out;
39754021
}
39764022

3977-
event_mgr->last_seqno[type] = atomic64_inc_return(&event_mgr->seqno);
4023+
event_state = &event_mgr->event_state[type];
4024+
event_state->last_seqno = atomic64_inc_return(&event_mgr->seqno);
4025+
atomic64_inc(&event_state->count);
39784026

39794027
out:
39804028
if (ret && caller)
@@ -4000,7 +4048,7 @@ u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type
40004048
if (!event_mgr)
40014049
return RAS_EVENT_INVALID_ID;
40024050

4003-
id = event_mgr->last_seqno[type];
4051+
id = event_mgr->event_state[type].last_seqno;
40044052
break;
40054053
case RAS_EVENT_TYPE_INVALID:
40064054
default:

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -440,9 +440,14 @@ enum ras_event_type {
440440
RAS_EVENT_TYPE_COUNT,
441441
};
442442

443+
struct ras_event_state {
444+
u64 last_seqno;
445+
atomic64_t count;
446+
};
447+
443448
struct ras_event_manager {
444449
atomic64_t seqno;
445-
u64 last_seqno[RAS_EVENT_TYPE_COUNT];
450+
struct ras_event_state event_state[RAS_EVENT_TYPE_COUNT];
446451
};
447452

448453
struct ras_event_id {
@@ -496,6 +501,7 @@ struct amdgpu_ras {
496501
struct device_attribute features_attr;
497502
struct device_attribute version_attr;
498503
struct device_attribute schema_attr;
504+
struct device_attribute event_state_attr;
499505
struct bin_attribute badpages_attr;
500506
struct dentry *de_ras_eeprom_table;
501507
/* block array */

0 commit comments

Comments
 (0)