@@ -1731,6 +1731,39 @@ static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev,
1731
1731
return sysfs_emit (buf , "schema: 0x%x\n" , con -> schema );
1732
1732
}
1733
1733
1734
+ static struct {
1735
+ enum ras_event_type type ;
1736
+ const char * name ;
1737
+ } dump_event [] = {
1738
+ {RAS_EVENT_TYPE_FATAL , "Fatal Error" },
1739
+ {RAS_EVENT_TYPE_POISON_CREATION , "Poison Creation" },
1740
+ {RAS_EVENT_TYPE_POISON_CONSUMPTION , "Poison Consumption" },
1741
+ };
1742
+
1743
+ static ssize_t amdgpu_ras_sysfs_event_state_show (struct device * dev ,
1744
+ struct device_attribute * attr , char * buf )
1745
+ {
1746
+ struct amdgpu_ras * con =
1747
+ container_of (attr , struct amdgpu_ras , event_state_attr );
1748
+ struct ras_event_manager * event_mgr = con -> event_mgr ;
1749
+ struct ras_event_state * event_state ;
1750
+ int i , size = 0 ;
1751
+
1752
+ if (!event_mgr )
1753
+ return - EINVAL ;
1754
+
1755
+ size += sysfs_emit_at (buf , size , "current seqno: %llu\n" , atomic64_read (& event_mgr -> seqno ));
1756
+ for (i = 0 ; i < ARRAY_SIZE (dump_event ); i ++ ) {
1757
+ event_state = & event_mgr -> event_state [dump_event [i ].type ];
1758
+ size += sysfs_emit_at (buf , size , "%s: count:%llu, last_seqno:%llu\n" ,
1759
+ dump_event [i ].name ,
1760
+ atomic64_read (& event_state -> count ),
1761
+ event_state -> last_seqno );
1762
+ }
1763
+
1764
+ return (ssize_t )size ;
1765
+ }
1766
+
1734
1767
static void amdgpu_ras_sysfs_remove_bad_page_node (struct amdgpu_device * adev )
1735
1768
{
1736
1769
struct amdgpu_ras * con = amdgpu_ras_get_context (adev );
@@ -1748,6 +1781,7 @@ static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev)
1748
1781
& con -> features_attr .attr ,
1749
1782
& con -> version_attr .attr ,
1750
1783
& con -> schema_attr .attr ,
1784
+ & con -> event_state_attr .attr ,
1751
1785
NULL
1752
1786
};
1753
1787
struct attribute_group group = {
@@ -1980,6 +2014,8 @@ static DEVICE_ATTR(version, 0444,
1980
2014
amdgpu_ras_sysfs_version_show , NULL) ;
1981
2015
static DEVICE_ATTR (schema , 0444 ,
1982
2016
amdgpu_ras_sysfs_schema_show , NULL) ;
2017
+ static DEVICE_ATTR (event_state , 0444 ,
2018
+ amdgpu_ras_sysfs_event_state_show , NULL) ;
1983
2019
static int amdgpu_ras_fs_init (struct amdgpu_device * adev )
1984
2020
{
1985
2021
struct amdgpu_ras * con = amdgpu_ras_get_context (adev );
@@ -1990,6 +2026,7 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
1990
2026
& con -> features_attr .attr ,
1991
2027
& con -> version_attr .attr ,
1992
2028
& con -> schema_attr .attr ,
2029
+ & con -> event_state_attr .attr ,
1993
2030
NULL
1994
2031
};
1995
2032
struct bin_attribute * bin_attrs [] = {
@@ -2012,6 +2049,10 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
2012
2049
con -> schema_attr = dev_attr_schema ;
2013
2050
sysfs_attr_init (attrs [2 ]);
2014
2051
2052
+ /* add event_state entry */
2053
+ con -> event_state_attr = dev_attr_event_state ;
2054
+ sysfs_attr_init (attrs [3 ]);
2055
+
2015
2056
if (amdgpu_bad_page_threshold != 0 ) {
2016
2057
/* add bad_page_features entry */
2017
2058
bin_attr_gpu_vram_bad_pages .private = NULL ;
@@ -3440,13 +3481,17 @@ static int amdgpu_get_ras_schema(struct amdgpu_device *adev)
3440
3481
3441
3482
static void ras_event_mgr_init (struct ras_event_manager * mgr )
3442
3483
{
3484
+ struct ras_event_state * event_state ;
3443
3485
int i ;
3444
3486
3445
3487
memset (mgr , 0 , sizeof (* mgr ));
3446
3488
atomic64_set (& mgr -> seqno , 0 );
3447
3489
3448
- for (i = 0 ; i < ARRAY_SIZE (mgr -> last_seqno ); i ++ )
3449
- mgr -> last_seqno [i ] = RAS_EVENT_INVALID_ID ;
3490
+ for (i = 0 ; i < ARRAY_SIZE (mgr -> event_state ); i ++ ) {
3491
+ event_state = & mgr -> event_state [i ];
3492
+ event_state -> last_seqno = RAS_EVENT_INVALID_ID ;
3493
+ atomic64_set (& event_state -> count , 0 );
3494
+ }
3450
3495
}
3451
3496
3452
3497
static void amdgpu_ras_event_mgr_init (struct amdgpu_device * adev )
@@ -3961,6 +4006,7 @@ int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_
3961
4006
const void * caller )
3962
4007
{
3963
4008
struct ras_event_manager * event_mgr ;
4009
+ struct ras_event_state * event_state ;
3964
4010
int ret = 0 ;
3965
4011
3966
4012
if (type >= RAS_EVENT_TYPE_COUNT ) {
@@ -3974,7 +4020,9 @@ int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_
3974
4020
goto out ;
3975
4021
}
3976
4022
3977
- event_mgr -> last_seqno [type ] = atomic64_inc_return (& event_mgr -> seqno );
4023
+ event_state = & event_mgr -> event_state [type ];
4024
+ event_state -> last_seqno = atomic64_inc_return (& event_mgr -> seqno );
4025
+ atomic64_inc (& event_state -> count );
3978
4026
3979
4027
out :
3980
4028
if (ret && caller )
@@ -4000,7 +4048,7 @@ u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type
4000
4048
if (!event_mgr )
4001
4049
return RAS_EVENT_INVALID_ID ;
4002
4050
4003
- id = event_mgr -> last_seqno [type ];
4051
+ id = event_mgr -> event_state [type ]. last_seqno ;
4004
4052
break ;
4005
4053
case RAS_EVENT_TYPE_INVALID :
4006
4054
default :
0 commit comments