@@ -1537,7 +1537,6 @@ static int kfd_dev_create_p2p_links(void)
1537
1537
/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
1538
1538
static int fill_in_l1_pcache (struct kfd_cache_properties * * props_ext ,
1539
1539
struct kfd_gpu_cache_info * pcache_info ,
1540
- struct kfd_cu_info * cu_info ,
1541
1540
int cu_bitmask ,
1542
1541
int cache_type , unsigned int cu_processor_id ,
1543
1542
int cu_block )
@@ -1599,7 +1598,8 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
1599
1598
/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
1600
1599
static int fill_in_l2_l3_pcache (struct kfd_cache_properties * * props_ext ,
1601
1600
struct kfd_gpu_cache_info * pcache_info ,
1602
- struct kfd_cu_info * cu_info ,
1601
+ struct amdgpu_cu_info * cu_info ,
1602
+ struct amdgpu_gfx_config * gfx_info ,
1603
1603
int cache_type , unsigned int cu_processor_id ,
1604
1604
struct kfd_node * knode )
1605
1605
{
@@ -1610,7 +1610,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
1610
1610
1611
1611
start = ffs (knode -> xcc_mask ) - 1 ;
1612
1612
end = start + NUM_XCC (knode -> xcc_mask );
1613
- cu_sibling_map_mask = cu_info -> cu_bitmap [start ][0 ][0 ];
1613
+ cu_sibling_map_mask = cu_info -> bitmap [start ][0 ][0 ];
1614
1614
cu_sibling_map_mask &=
1615
1615
((1 << pcache_info [cache_type ].num_cu_shared ) - 1 );
1616
1616
first_active_cu = ffs (cu_sibling_map_mask );
@@ -1646,15 +1646,15 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
1646
1646
k = 0 ;
1647
1647
1648
1648
for (xcc = start ; xcc < end ; xcc ++ ) {
1649
- for (i = 0 ; i < cu_info -> num_shader_engines ; i ++ ) {
1650
- for (j = 0 ; j < cu_info -> num_shader_arrays_per_engine ; j ++ ) {
1649
+ for (i = 0 ; i < gfx_info -> max_shader_engines ; i ++ ) {
1650
+ for (j = 0 ; j < gfx_info -> max_sh_per_se ; j ++ ) {
1651
1651
pcache -> sibling_map [k ] = (uint8_t )(cu_sibling_map_mask & 0xFF );
1652
1652
pcache -> sibling_map [k + 1 ] = (uint8_t )((cu_sibling_map_mask >> 8 ) & 0xFF );
1653
1653
pcache -> sibling_map [k + 2 ] = (uint8_t )((cu_sibling_map_mask >> 16 ) & 0xFF );
1654
1654
pcache -> sibling_map [k + 3 ] = (uint8_t )((cu_sibling_map_mask >> 24 ) & 0xFF );
1655
1655
k += 4 ;
1656
1656
1657
- cu_sibling_map_mask = cu_info -> cu_bitmap [xcc ][i % 4 ][j + i / 4 ];
1657
+ cu_sibling_map_mask = cu_info -> bitmap [xcc ][i % 4 ][j + i / 4 ];
1658
1658
cu_sibling_map_mask &= ((1 << pcache_info [cache_type ].num_cu_shared ) - 1 );
1659
1659
}
1660
1660
}
@@ -1679,16 +1679,14 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
1679
1679
unsigned int cu_processor_id ;
1680
1680
int ret ;
1681
1681
unsigned int num_cu_shared ;
1682
- struct kfd_cu_info cu_info ;
1683
- struct kfd_cu_info * pcu_info ;
1682
+ struct amdgpu_cu_info * cu_info = & kdev -> adev -> gfx . cu_info ;
1683
+ struct amdgpu_gfx_config * gfx_info = & kdev -> adev -> gfx . config ;
1684
1684
int gpu_processor_id ;
1685
1685
struct kfd_cache_properties * props_ext ;
1686
1686
int num_of_entries = 0 ;
1687
1687
int num_of_cache_types = 0 ;
1688
1688
struct kfd_gpu_cache_info cache_info [KFD_MAX_CACHE_TYPES ];
1689
1689
1690
- amdgpu_amdkfd_get_cu_info (kdev -> adev , & cu_info );
1691
- pcu_info = & cu_info ;
1692
1690
1693
1691
gpu_processor_id = dev -> node_props .simd_id_base ;
1694
1692
@@ -1715,12 +1713,12 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
1715
1713
cu_processor_id = gpu_processor_id ;
1716
1714
if (pcache_info [ct ].cache_level == 1 ) {
1717
1715
for (xcc = start ; xcc < end ; xcc ++ ) {
1718
- for (i = 0 ; i < pcu_info -> num_shader_engines ; i ++ ) {
1719
- for (j = 0 ; j < pcu_info -> num_shader_arrays_per_engine ; j ++ ) {
1720
- for (k = 0 ; k < pcu_info -> num_cu_per_sh ; k += pcache_info [ct ].num_cu_shared ) {
1716
+ for (i = 0 ; i < gfx_info -> max_shader_engines ; i ++ ) {
1717
+ for (j = 0 ; j < gfx_info -> max_sh_per_se ; j ++ ) {
1718
+ for (k = 0 ; k < gfx_info -> max_cu_per_sh ; k += pcache_info [ct ].num_cu_shared ) {
1721
1719
1722
- ret = fill_in_l1_pcache (& props_ext , pcache_info , pcu_info ,
1723
- pcu_info -> cu_bitmap [xcc ][i % 4 ][j + i / 4 ], ct ,
1720
+ ret = fill_in_l1_pcache (& props_ext , pcache_info ,
1721
+ cu_info -> bitmap [xcc ][i % 4 ][j + i / 4 ], ct ,
1724
1722
cu_processor_id , k );
1725
1723
1726
1724
if (ret < 0 )
@@ -1733,17 +1731,17 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
1733
1731
1734
1732
/* Move to next CU block */
1735
1733
num_cu_shared = ((k + pcache_info [ct ].num_cu_shared ) <=
1736
- pcu_info -> num_cu_per_sh ) ?
1734
+ gfx_info -> max_cu_per_sh ) ?
1737
1735
pcache_info [ct ].num_cu_shared :
1738
- (pcu_info -> num_cu_per_sh - k );
1736
+ (gfx_info -> max_cu_per_sh - k );
1739
1737
cu_processor_id += num_cu_shared ;
1740
1738
}
1741
1739
}
1742
1740
}
1743
1741
}
1744
1742
} else {
1745
1743
ret = fill_in_l2_l3_pcache (& props_ext , pcache_info ,
1746
- pcu_info , ct , cu_processor_id , kdev );
1744
+ cu_info , gfx_info , ct , cu_processor_id , kdev );
1747
1745
1748
1746
if (ret < 0 )
1749
1747
break ;
@@ -1922,10 +1920,11 @@ int kfd_topology_add_device(struct kfd_node *gpu)
1922
1920
{
1923
1921
uint32_t gpu_id ;
1924
1922
struct kfd_topology_device * dev ;
1925
- struct kfd_cu_info * cu_info ;
1926
1923
int res = 0 ;
1927
1924
int i ;
1928
1925
const char * asic_name = amdgpu_asic_name [gpu -> adev -> asic_type ];
1926
+ struct amdgpu_gfx_config * gfx_info = & gpu -> adev -> gfx .config ;
1927
+ struct amdgpu_cu_info * cu_info = & gpu -> adev -> gfx .cu_info ;
1929
1928
1930
1929
gpu_id = kfd_generate_gpu_id (gpu );
1931
1930
if (gpu -> xcp && !gpu -> xcp -> ddev ) {
@@ -1963,12 +1962,6 @@ int kfd_topology_add_device(struct kfd_node *gpu)
1963
1962
/* Fill-in additional information that is not available in CRAT but
1964
1963
* needed for the topology
1965
1964
*/
1966
- cu_info = kzalloc (sizeof (struct kfd_cu_info ), GFP_KERNEL );
1967
- if (!cu_info )
1968
- return - ENOMEM ;
1969
-
1970
- amdgpu_amdkfd_get_cu_info (dev -> gpu -> adev , cu_info );
1971
-
1972
1965
for (i = 0 ; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE - 1 ; i ++ ) {
1973
1966
dev -> node_props .name [i ] = __tolower (asic_name [i ]);
1974
1967
if (asic_name [i ] == '\0' )
@@ -1977,7 +1970,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
1977
1970
dev -> node_props .name [i ] = '\0' ;
1978
1971
1979
1972
dev -> node_props .simd_arrays_per_engine =
1980
- cu_info -> num_shader_arrays_per_engine ;
1973
+ gfx_info -> max_sh_per_se ;
1981
1974
1982
1975
dev -> node_props .gfx_target_version =
1983
1976
gpu -> kfd -> device_info .gfx_target_version ;
@@ -2058,7 +2051,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
2058
2051
*/
2059
2052
if (dev -> gpu -> adev -> asic_type == CHIP_CARRIZO ) {
2060
2053
dev -> node_props .simd_count =
2061
- cu_info -> simd_per_cu * cu_info -> cu_active_number ;
2054
+ cu_info -> simd_per_cu * cu_info -> number ;
2062
2055
dev -> node_props .max_waves_per_simd = 10 ;
2063
2056
}
2064
2057
@@ -2085,8 +2078,6 @@ int kfd_topology_add_device(struct kfd_node *gpu)
2085
2078
2086
2079
kfd_notify_gpu_change (gpu_id , 1 );
2087
2080
2088
- kfree (cu_info );
2089
-
2090
2081
return 0 ;
2091
2082
}
2092
2083
0 commit comments