-
Notifications
You must be signed in to change notification settings - Fork 15.1k
AMDGPU: Implement s_wait_asynccnt and s_wait_tensorcnt for gfx1250 #148292
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Co-Authored-by: Stanislav Mekhanoshin <[email protected]> Co-Authored-by: Vang Thao <[email protected]>
|
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: Changpeng Fang (changpeng) ChangesFull diff: https://github.com/llvm/llvm-project/pull/148292.diff 8 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index a5ee8013adff6..4d371a9f7d6db 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -665,6 +665,9 @@ TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_barrier_arrive_rtn_b64, "LiLi*3Li", "n
TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst")
TARGET_BUILTIN(__builtin_amdgcn_s_monitor_sleep, "vIs", "n", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_wait_asynccnt, "vIUs", "n", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts")
+
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index 421099d3876e3..a1b91d0cc38dc 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -24,6 +24,24 @@ void test_s_monitor_sleep() {
__builtin_amdgcn_s_monitor_sleep(10);
}
+// CHECK-LABEL: @test_s_wait_asynccnt(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.amdgcn.s.wait.asynccnt(i16 0)
+// CHECK-NEXT: ret void
+//
+void test_s_wait_asynccnt() {
+ __builtin_amdgcn_s_wait_asynccnt(0);
+}
+
+// CHECK-LABEL: @test_s_wait_tensorcnt(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.amdgcn.s.wait.tensorcnt(i16 0)
+// CHECK-NEXT: ret void
+//
+void test_s_wait_tensorcnt() {
+ __builtin_amdgcn_s_wait_tensorcnt(0);
+}
+
// CHECK-LABEL: @test_cvt_f16_fp8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
index 7494c4f984353..9711b3bdded6b 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
@@ -12,6 +12,14 @@ void test_s_monitor_sleep(short a) {
__builtin_amdgcn_s_monitor_sleep(a); // expected-error {{'__builtin_amdgcn_s_monitor_sleep' must be a constant integer}}
}
+void test_s_wait_asynccnt(short a) {
+ __builtin_amdgcn_s_wait_asynccnt(a); // expected-error {{'__builtin_amdgcn_s_wait_asynccnt' must be a constant integer}}
+}
+
+void test_s_wait_tensorcnt(short a) {
+ __builtin_amdgcn_s_wait_tensorcnt(a); // expected-error {{'__builtin_amdgcn_s_wait_tensorcnt' must be a constant integer}}
+}
+
void test__builtin_amdgcn_cvt_f16_fp8(int a, int b) {
__builtin_amdgcn_cvt_f16_fp8(a, b); // expected-error {{'__builtin_amdgcn_cvt_f16_fp8' must be a constant integer}}
}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 16885f331e9dd..8016757cf0f3c 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -3510,6 +3510,18 @@ def int_amdgcn_ashr_pk_u8_i32 : ClangBuiltin<"__builtin_amdgcn_ashr_pk_u8_i32">,
// gfx1250 intrinsics
// ===----------------------------------------------------------------------===//
+// Async waits decrement ASYNCcnt and tensor waits decrement TENSORcnt which is
+// modeled as InaccessibleMem.
+class AMDGPUWaitAsyncIntrinsic :
+ Intrinsic<[], [llvm_i16_ty],
+ [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>, IntrWillReturn, IntrNoCallback,
+ IntrNoFree]>;
+
+def int_amdgcn_s_wait_asynccnt :
+ ClangBuiltin<"__builtin_amdgcn_s_wait_asynccnt">, AMDGPUWaitAsyncIntrinsic;
+def int_amdgcn_s_wait_tensorcnt :
+ ClangBuiltin<"__builtin_amdgcn_s_wait_tensorcnt">, AMDGPUWaitAsyncIntrinsic;
+
def int_amdgcn_ds_atomic_async_barrier_arrive_b64 :
ClangBuiltin<"__builtin_amdgcn_ds_atomic_async_barrier_arrive_b64">,
Intrinsic<[], [local_ptr_ty],
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index c7c4276e0e252..2472b76fcf02c 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1764,6 +1764,27 @@ let OtherPredicates = [HasExportInsts] in
[(int_amdgcn_s_wait_kmcnt timm:$simm16)]>;
} // End SubtargetPredicate = isGFX12Plus, hasSideEffects = 1
+let SubtargetPredicate = isGFX1250Plus, hasSideEffects = 1 in {
+ def S_WAIT_ASYNCCNT :
+ SOPP_Pseudo<"s_wait_asynccnt", (ins s16imm:$simm16), "$simm16",
+ [(int_amdgcn_s_wait_asynccnt timm:$simm16)]> {
+ let mayLoad = 1;
+ let mayStore = 1;
+ let maybeAtomic = 0;
+ let Uses = [ASYNCcnt];
+ let Defs = [ASYNCcnt];
+ }
+ def S_WAIT_TENSORCNT :
+ SOPP_Pseudo<"s_wait_tensorcnt", (ins s16imm:$simm16), "$simm16",
+ [(int_amdgcn_s_wait_tensorcnt timm:$simm16)]> {
+ let mayLoad = 1;
+ let mayStore = 1;
+ let maybeAtomic = 0;
+ let Uses = [TENSORcnt];
+ let Defs = [TENSORcnt];
+ }
+} // End SubtargetPredicate = isGFX1250Plus, hasSideEffects = 1
+
let SubtargetPredicate = HasWaitXcnt, hasSideEffects = 1 in {
def S_WAIT_XCNT :
SOPP_Pseudo<"s_wait_xcnt", (ins s16imm:$simm16), "$simm16">;
@@ -2609,6 +2630,8 @@ defm S_WAIT_STORECNT_DSCNT : SOPP_Real_32_gfx12<0x049>;
//===----------------------------------------------------------------------===//
defm S_SETPRIO_INC_WG : SOPP_Real_32_gfx12<0x03e>;
defm S_WAIT_XCNT : SOPP_Real_32_gfx12<0x045>;
+defm S_WAIT_ASYNCCNT : SOPP_Real_32_gfx12<0x04a>;
+defm S_WAIT_TENSORCNT : SOPP_Real_32_gfx12<0x04b>;
//===----------------------------------------------------------------------===//
// SOPP - GFX11, GFX12.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx1250.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx1250.ll
new file mode 100644
index 0000000000000..2173d07baa57e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx1250.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -check-prefix=GFX12
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -check-prefix=GFX12
+
+define amdgpu_ps void @test_asynccnt() {
+; GFX12-LABEL: test_asynccnt:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_asynccnt 0x0
+; GFX12-NEXT: s_endpgm
+ call void @llvm.amdgcn.s.wait.asynccnt(i16 0)
+ ret void
+}
+
+define amdgpu_ps void @test_tensorcnt() {
+; GFX12-LABEL: test_tensorcnt:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_tensorcnt 0x0
+; GFX12-NEXT: s_endpgm
+ call void @llvm.amdgcn.s.wait.tensorcnt(i16 0)
+ ret void
+}
+
+declare void @llvm.amdgcn.s.wait.asynccnt(i16)
+declare void @llvm.amdgcn.s.wait.tensorcnt(i16)
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s b/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s
index 6ebc17468eed6..234c2ed0de793 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s
@@ -1,6 +1,26 @@
// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefix=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR --implicit-check-not=error: -strict-whitespace %s
+s_wait_asynccnt 0x1234
+// GFX1250: [0x34,0x12,0xca,0xbf]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_wait_asynccnt 0xc1d1
+// GFX1250: [0xd1,0xc1,0xca,0xbf]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_wait_tensorcnt 0x0
+// GFX1250: [0x00,0x00,0xcb,0xbf]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_wait_tensorcnt 0x1
+// GFX1250: [0x01,0x00,0xcb,0xbf]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_wait_tensorcnt 0x3
+// GFX1250: [0x03,0x00,0xcb,0xbf]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
s_wait_xcnt 0x0
// GFX1250: [0x00,0x00,0xc5,0xbf]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt
index 220f9e5084f0e..e7026df3c0e2b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt
@@ -1,5 +1,20 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s
+# GFX1250: s_wait_asynccnt 0x1234 ; encoding: [0x34,0x12,0xca,0xbf]
+0x34,0x12,0xca,0xbf
+
+# GFX1250: s_wait_asynccnt 0xc1d1 ; encoding: [0xd1,0xc1,0xca,0xbf]
+0xd1,0xc1,0xca,0xbf
+
+# GFX1250: s_wait_tensorcnt 0x0 ; encoding: [0x00,0x00,0xcb,0xbf]
+0x00,0x00,0xcb,0xbf
+
+# GFX1250: s_wait_tensorcnt 0x1 ; encoding: [0x01,0x00,0xcb,0xbf]
+0x01,0x00,0xcb,0xbf
+
+# GFX1250: s_wait_tensorcnt 0x3 ; encoding: [0x03,0x00,0xcb,0xbf]
+0x03,0x00,0xcb,0xbf
+
# GFX1250: s_wait_xcnt 0x0 ; encoding: [0x00,0x00,0xc5,0xbf]
0x00,0x00,0xc5,0xbf
|
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/3/builds/18891 Here is the relevant piece of the build log for the reference |
No description provided.