Skip to content

Commit 7449c7f

Browse files
authored
Add gfx1150 and complete gfx1151 support (#2154)
1 parent be49885 commit 7449c7f

File tree

101 files changed

+170
-100
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

101 files changed

+170
-100
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
- venv for Tensile create on Linux
1010
- Flag to keep build_tmp when running Tensile
1111
- Generalized profiling scripts
12-
- GFX1151 support
12+
- GFX1151 and GFX1150 support
1313
- Single-threaded support in TensileCreateLibrary
1414
- Logic to remove temporary build artifacts
1515

Tensile/AsmCaps.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,50 @@
771771
'v_mov_b64': False,
772772
'v_pk_fma_f16': True,
773773
'v_pk_fmac_f16': False},
774+
(11, 5, 0): {'HasAddLshl': True,
775+
'HasAtomicAdd': True,
776+
'HasDirectToLdsDest': False,
777+
'HasDirectToLdsNoDest': False,
778+
'HasExplicitCO': True,
779+
'HasExplicitNC': True,
780+
'HasGLCModifier': True,
781+
'HasNTModifier': False,
782+
'HasLshlOr': True,
783+
'HasMFMA': False,
784+
'HasMFMA_b8': False,
785+
'HasMFMA_bf16_1k': False,
786+
'HasMFMA_bf16_original': False,
787+
'HasMFMA_constSrc': False,
788+
'HasMFMA_f64': False,
789+
'HasMFMA_f8': False,
790+
'HasMFMA_i8_908': False,
791+
'HasMFMA_i8_940': False,
792+
'HasMFMA_vgpr': False,
793+
'HasMFMA_xf32': False,
794+
'HasSMulHi': True,
795+
'HasWMMA': True,
796+
'KernargPreloading': False,
797+
'MaxLgkmcnt': 15,
798+
'MaxVmcnt': 63,
799+
'SupportedISA': True,
800+
'SupportedSource': True,
801+
'VOP3v_dot4_i32_i8': False,
802+
'v_dot2_f32_f16': True,
803+
'v_dot2c_f32_f16': True,
804+
'v_dot4_i32_i8': False,
805+
'v_dot4c_i32_i8': False,
806+
'v_fma_f16': True,
807+
'v_fma_f32': True,
808+
'v_fma_f64': True,
809+
'v_fma_mix_f32': True,
810+
'v_fmac_f16': False,
811+
'v_fmac_f32': True,
812+
'v_mac_f16': False,
813+
'v_mac_f32': False,
814+
'v_mad_mix_f32': False,
815+
'v_mov_b64': False,
816+
'v_pk_fma_f16': True,
817+
'v_pk_fmac_f16': False},
774818
(11, 5, 1): {'HasAddLshl': True,
775819
'HasAtomicAdd': True,
776820
'HasDirectToLdsDest': False,

Tensile/Common.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ class DeveloperWarning(Warning):
247247
(9,4,0), (9,4,1), (9,4,2),
248248
(10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1),
249249
(11,0,0), (11,0,1), (11,0,2),
250-
(11,5,1),
250+
(11,5,0), (11,5,1),
251251
(12,0,0), (12,0,1)] # assembly kernels writer supports these architectures
252252

253253
globalParameters["KeepBuildTmp"] = True # Do not remove build artifacts during the build process or build_tmp after build completes
@@ -324,7 +324,7 @@ class DeveloperWarning(Warning):
324324
'gfx1010':'navi10', 'gfx1011':'navi12', 'gfx1012':'navi14',
325325
'gfx1030':'navi21', 'gfx1031':'navi22', 'gfx1032':'navi23', 'gfx1034':'navi24', 'gfx1035':'rembrandt',
326326
'gfx1100':'navi31', 'gfx1101':'navi32', 'gfx1102':'navi33',
327-
'gfx1151':'gfx1151',
327+
'gfx1150':'strixpoint', 'gfx1151':'strixhalo',
328328
'gfx1200':'gfx1200',
329329
'gfx1201':'gfx1201'
330330
}
@@ -2466,7 +2466,7 @@ def assignGlobalParameters( config, capabilitiesCache: Optional[dict] = None ):
24662466
if os.name == "nt":
24672467
globalParameters["CurrentISA"] = (9,0,6)
24682468
printWarning("Failed to detect ISA so forcing (gfx906) on windows")
2469-
isasWithDisabledHWMonitor = ((9,4,1), (9,4,2), (11,0,0), (11,0,1), (11,0,2), (12,0,0), (12,0,1))
2469+
isasWithDisabledHWMonitor = ((9,4,1), (9,4,2), (11,0,0), (11,0,1), (11,0,2), (11,5,0), (11,5,1), (12,0,0), (12,0,1))
24702470
if globalParameters["CurrentISA"] in isasWithDisabledHWMonitor:
24712471
isaString = ', '.join(map(gfxName, isasWithDisabledHWMonitor))
24722472
printWarning(f"HardwareMonitor currently disabled for {isaString}")

Tensile/Source/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ if(CMAKE_CXX_COMPILER MATCHES ".*/hipcc$" OR CMAKE_CXX_COMPILER MATCHES ".*clang
5151
endif()
5252

5353
if(CMAKE_CXX_COMPILER STREQUAL "hipcc")
54-
set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures")
54+
set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906:xnack- gfx908:xnack- gfx90a:xnack- gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1150 gfx1151 CACHE STRING "GPU architectures")
5555
else()
56-
set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "GPU architectures")
56+
set(TENSILE_GPU_ARCHS gfx803 gfx900 gfx906 gfx908 gfx90a gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 gfx1150 gfx1151 CACHE STRING "GPU architectures")
5757
endif()
5858

5959
include(CMakeDependentOption)

Tensile/Source/lib/include/Tensile/AMDGPU.hpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,9 @@ namespace Tensile
7373
gfx1035 = 1035,
7474
gfx1100 = 1100,
7575
gfx1101 = 1101,
76-
gfx1102 = 1102
76+
gfx1102 = 1102,
77+
gfx1150 = 1150,
78+
gfx1151 = 1151
7779
};
7880

7981
static std::string toString(Processor p)
@@ -118,6 +120,10 @@ namespace Tensile
118120
return "gfx1101";
119121
case AMDGPU::Processor::gfx1102:
120122
return "gfx1102";
123+
case AMDGPU::Processor::gfx1150:
124+
return "gfx1150";
125+
case AMDGPU::Processor::gfx1151:
126+
return "gfx1151";
121127
}
122128
return "";
123129
}
@@ -184,6 +190,14 @@ namespace Tensile
184190
{
185191
return AMDGPU::Processor::gfx1102;
186192
}
193+
else if(deviceString.find("gfx1150") != std::string::npos)
194+
{
195+
return AMDGPU::Processor::gfx1150;
196+
}
197+
else if(deviceString.find("gfx1151") != std::string::npos)
198+
{
199+
return AMDGPU::Processor::gfx1151;
200+
}
187201
else
188202
{
189203
return static_cast<AMDGPU::Processor>(0);

Tensile/Source/lib/include/Tensile/PlaceholderLibrary.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ namespace Tensile
5858
gfx1100,
5959
gfx1101,
6060
gfx1102,
61+
gfx1150,
62+
gfx1151,
6163
All
6264
};
6365

@@ -106,6 +108,10 @@ namespace Tensile
106108
return "TensileLibrary_*_gfx1101";
107109
case LazyLoadingInit::gfx1102:
108110
return "TensileLibrary_*_gfx1102";
111+
case LazyLoadingInit::gfx1150:
112+
return "TensileLibrary_*_gfx1150";
113+
case LazyLoadingInit::gfx1151:
114+
return "TensileLibrary_*_gfx1151";
109115
case LazyLoadingInit::None:
110116
return "";
111117
}

Tensile/Source/lib/include/Tensile/Serialization/Predicates.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,8 @@ namespace Tensile
232232
iot::enumCase(io, value, "gfx1100", AMDGPU::Processor::gfx1100);
233233
iot::enumCase(io, value, "gfx1101", AMDGPU::Processor::gfx1101);
234234
iot::enumCase(io, value, "gfx1102", AMDGPU::Processor::gfx1102);
235+
iot::enumCase(io, value, "gfx1150", AMDGPU::Processor::gfx1150);
236+
iot::enumCase(io, value, "gfx1151", AMDGPU::Processor::gfx1151);
235237
}
236238
};
237239

Tensile/Tests/disabled/direct_to_lds/dtl_dgemm.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
TestParameters:
2-
marks: [xfail, skip-gfx900, skip-gfx906, skip-gfx908, skip-gfx1010, skip-gfx1011, skip-gfx1012, skip-gfx1030, skip-gfx1100, skip-gfx1101, skip-gfx1102, skip-gfx1151] # not supported by arch
2+
marks: [xfail, skip-gfx900, skip-gfx906, skip-gfx908, skip-gfx1010, skip-gfx1011, skip-gfx1012, skip-gfx1030, skip-gfx1100, skip-gfx1101, skip-gfx1102, skip-gfx1150, skip-gfx1151] # not supported by arch
33
# Failing on latest ROCm build, re-enable when passing
44

55
GlobalParameters:

Tensile/Tests/disabled/direct_to_lds/dtl_dgemm_lite.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
TestParameters:
2-
marks: [xfail, skip-gfx900, skip-gfx906, skip-gfx908, skip-gfx1010, skip-gfx1011, skip-gfx1012, skip-gfx1030, skip-gfx1100, skip-gfx1101, skip-gfx1102, skip-gfx1151] # not supported by arch
2+
marks: [xfail, skip-gfx900, skip-gfx906, skip-gfx908, skip-gfx1010, skip-gfx1011, skip-gfx1012, skip-gfx1030, skip-gfx1100, skip-gfx1101, skip-gfx1102, skip-gfx1150, skip-gfx1151] # not supported by arch
33
# Failing on latest ROCm build, re-enable when passing
44

55
GlobalParameters:

Tensile/Tests/disabled/direct_to_lds/dtl_tsgr_dgemm.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
TestParameters:
2-
marks: [xfail, skip-gfx900, skip-gfx906, skip-gfx908, skip-gfx1010, skip-gfx1011, skip-gfx1012, skip-gfx1030, skip-gfx1100, skip-gfx1101, skip-gfx1102, skip-gfx1151] # not supported by arch
2+
marks: [xfail, skip-gfx900, skip-gfx906, skip-gfx908, skip-gfx1010, skip-gfx1011, skip-gfx1012, skip-gfx1030, skip-gfx1100, skip-gfx1101, skip-gfx1102, skip-gfx1150, skip-gfx1151] # not supported by arch
33
# Failing on latest ROCm build, re-enable when passing
44

55
GlobalParameters:

0 commit comments

Comments
 (0)