Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,9 @@ TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_barrier_arrive_rtn_b64, "LiLi*3Li", "n
TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst")
TARGET_BUILTIN(__builtin_amdgcn_s_monitor_sleep, "vIs", "n", "gfx1250-insts")

TARGET_BUILTIN(__builtin_amdgcn_s_wait_asynccnt, "vIUs", "n", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts")

TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
Expand Down
18 changes: 18 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,24 @@ void test_s_monitor_sleep() {
__builtin_amdgcn_s_monitor_sleep(10);
}

// CHECK-LABEL: @test_s_wait_asynccnt(
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @llvm.amdgcn.s.wait.asynccnt(i16 0)
// CHECK-NEXT: ret void
//
void test_s_wait_asynccnt() {
__builtin_amdgcn_s_wait_asynccnt(0);
}

// CHECK-LABEL: @test_s_wait_tensorcnt(
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @llvm.amdgcn.s.wait.tensorcnt(i16 0)
// CHECK-NEXT: ret void
//
void test_s_wait_tensorcnt() {
__builtin_amdgcn_s_wait_tensorcnt(0);
}

// CHECK-LABEL: @test_cvt_f16_fp8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
Expand Down
8 changes: 8 additions & 0 deletions clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ void test_s_monitor_sleep(short a) {
__builtin_amdgcn_s_monitor_sleep(a); // expected-error {{'__builtin_amdgcn_s_monitor_sleep' must be a constant integer}}
}

void test_s_wait_asynccnt(short a) {
__builtin_amdgcn_s_wait_asynccnt(a); // expected-error {{'__builtin_amdgcn_s_wait_asynccnt' must be a constant integer}}
}

void test_s_wait_tensorcnt(short a) {
__builtin_amdgcn_s_wait_tensorcnt(a); // expected-error {{'__builtin_amdgcn_s_wait_tensorcnt' must be a constant integer}}
}

void test__builtin_amdgcn_cvt_f16_fp8(int a, int b) {
__builtin_amdgcn_cvt_f16_fp8(a, b); // expected-error {{'__builtin_amdgcn_cvt_f16_fp8' must be a constant integer}}
}
Expand Down
12 changes: 12 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -3510,6 +3510,18 @@ def int_amdgcn_ashr_pk_u8_i32 : ClangBuiltin<"__builtin_amdgcn_ashr_pk_u8_i32">,
// gfx1250 intrinsics
// ===----------------------------------------------------------------------===//

// Async waits decrement ASYNCcnt and tensor waits decrement TENSORcnt which is
// modeled as InaccessibleMem.
class AMDGPUWaitAsyncIntrinsic :
Intrinsic<[], [llvm_i16_ty],
[IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>, IntrWillReturn, IntrNoCallback,
IntrNoFree]>;

def int_amdgcn_s_wait_asynccnt :
ClangBuiltin<"__builtin_amdgcn_s_wait_asynccnt">, AMDGPUWaitAsyncIntrinsic;
def int_amdgcn_s_wait_tensorcnt :
ClangBuiltin<"__builtin_amdgcn_s_wait_tensorcnt">, AMDGPUWaitAsyncIntrinsic;

def int_amdgcn_ds_atomic_async_barrier_arrive_b64 :
ClangBuiltin<"__builtin_amdgcn_ds_atomic_async_barrier_arrive_b64">,
Intrinsic<[], [local_ptr_ty],
Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/Target/AMDGPU/SOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1764,6 +1764,27 @@ let OtherPredicates = [HasExportInsts] in
[(int_amdgcn_s_wait_kmcnt timm:$simm16)]>;
} // End SubtargetPredicate = isGFX12Plus, hasSideEffects = 1

let SubtargetPredicate = isGFX1250Plus, hasSideEffects = 1 in {
def S_WAIT_ASYNCCNT :
SOPP_Pseudo<"s_wait_asynccnt", (ins s16imm:$simm16), "$simm16",
[(int_amdgcn_s_wait_asynccnt timm:$simm16)]> {
let mayLoad = 1;
let mayStore = 1;
let maybeAtomic = 0;
let Uses = [ASYNCcnt];
let Defs = [ASYNCcnt];
}
def S_WAIT_TENSORCNT :
SOPP_Pseudo<"s_wait_tensorcnt", (ins s16imm:$simm16), "$simm16",
[(int_amdgcn_s_wait_tensorcnt timm:$simm16)]> {
let mayLoad = 1;
let mayStore = 1;
let maybeAtomic = 0;
let Uses = [TENSORcnt];
let Defs = [TENSORcnt];
}
} // End SubtargetPredicate = isGFX1250Plus, hasSideEffects = 1

let SubtargetPredicate = HasWaitXcnt, hasSideEffects = 1 in {
def S_WAIT_XCNT :
SOPP_Pseudo<"s_wait_xcnt", (ins s16imm:$simm16), "$simm16">;
Expand Down Expand Up @@ -2609,6 +2630,8 @@ defm S_WAIT_STORECNT_DSCNT : SOPP_Real_32_gfx12<0x049>;
//===----------------------------------------------------------------------===//
defm S_SETPRIO_INC_WG : SOPP_Real_32_gfx12<0x03e>;
defm S_WAIT_XCNT : SOPP_Real_32_gfx12<0x045>;
defm S_WAIT_ASYNCCNT : SOPP_Real_32_gfx12<0x04a>;
defm S_WAIT_TENSORCNT : SOPP_Real_32_gfx12<0x04b>;

//===----------------------------------------------------------------------===//
// SOPP - GFX11, GFX12.
Expand Down
24 changes: 24 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx1250.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -check-prefix=GFX12
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -check-prefix=GFX12

define amdgpu_ps void @test_asynccnt() {
; GFX12-LABEL: test_asynccnt:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_asynccnt 0x0
; GFX12-NEXT: s_endpgm
call void @llvm.amdgcn.s.wait.asynccnt(i16 0)
ret void
}

define amdgpu_ps void @test_tensorcnt() {
; GFX12-LABEL: test_tensorcnt:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_wait_tensorcnt 0x0
; GFX12-NEXT: s_endpgm
call void @llvm.amdgcn.s.wait.tensorcnt(i16 0)
ret void
}

declare void @llvm.amdgcn.s.wait.asynccnt(i16)
declare void @llvm.amdgcn.s.wait.tensorcnt(i16)
20 changes: 20 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s
Original file line number Diff line number Diff line change
@@ -1,6 +1,26 @@
// RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefix=GFX1250 %s
// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR --implicit-check-not=error: -strict-whitespace %s

s_wait_asynccnt 0x1234
// GFX1250: [0x34,0x12,0xca,0xbf]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

s_wait_asynccnt 0xc1d1
// GFX1250: [0xd1,0xc1,0xca,0xbf]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

s_wait_tensorcnt 0x0
// GFX1250: [0x00,0x00,0xcb,0xbf]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

s_wait_tensorcnt 0x1
// GFX1250: [0x01,0x00,0xcb,0xbf]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

s_wait_tensorcnt 0x3
// GFX1250: [0x03,0x00,0xcb,0xbf]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

s_wait_xcnt 0x0
// GFX1250: [0x00,0x00,0xc5,0xbf]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
Expand Down
15 changes: 15 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s

# GFX1250: s_wait_asynccnt 0x1234 ; encoding: [0x34,0x12,0xca,0xbf]
0x34,0x12,0xca,0xbf

# GFX1250: s_wait_asynccnt 0xc1d1 ; encoding: [0xd1,0xc1,0xca,0xbf]
0xd1,0xc1,0xca,0xbf

# GFX1250: s_wait_tensorcnt 0x0 ; encoding: [0x00,0x00,0xcb,0xbf]
0x00,0x00,0xcb,0xbf

# GFX1250: s_wait_tensorcnt 0x1 ; encoding: [0x01,0x00,0xcb,0xbf]
0x01,0x00,0xcb,0xbf

# GFX1250: s_wait_tensorcnt 0x3 ; encoding: [0x03,0x00,0xcb,0xbf]
0x03,0x00,0xcb,0xbf

# GFX1250: s_wait_xcnt 0x0 ; encoding: [0x00,0x00,0xc5,0xbf]
0x00,0x00,0xc5,0xbf

Expand Down
Loading