Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,13 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
case AMDGPU::V_CVT_PK_F32_BF8_sdwa:
// Does not support input modifiers: noabs, noneg, nosext.
return false;
case AMDGPU::V_CNDMASK_B32_sdwa:
// FIXME SISrcMods uses the same bitmask for SEXT and NEG
// modifiers and hence each instruction can only support one type
// of modifier; SEXT gets turned into NEG for this instruction.
if (Sext)
return false;
break;
}

// Find operand in instruction that matches source operand and replace it with
Expand Down
47 changes: 47 additions & 0 deletions llvm/test/CodeGen/AMDGPU/sdwa-peephole-cndmask-sext.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck %s
; XFAIL: *

; FIXME The sext modifier is turned into a neg modifier in the asm output

define void @widget(ptr addrspace(7) %arg, <1 x i1> %arg1, <1 x i1> %arg2) #0 {
; CHECK-LABEL: widget:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s0, 0
; CHECK-NEXT: s_mov_b32 s1, s0
; CHECK-NEXT: s_mov_b32 s2, s0
; CHECK-NEXT: s_mov_b32 s3, s0
; CHECK-NEXT: buffer_load_ubyte v0, off, s[0:3], 0
; CHECK-NEXT: v_and_b32_e32 v1, 1, v5
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: s_and_saveexec_b64 s[0:1], vcc
; CHECK-NEXT: ; %bb.1: ; %cond.load
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: ds_read_b32 v1, v1
; CHECK-NEXT: ; %bb.2: ; %else
; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]
; CHECK-NEXT: s_and_saveexec_b64 s[0:1], vcc
; CHECK-NEXT: s_cbranch_execz .LBB0_4
; CHECK-NEXT: ; %bb.3: ; %cond.store
; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_cndmask_b32_sdwa v0, v2, sext(v0), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_or_b32_e32 v0, v0, v1
; CHECK-NEXT: ds_write_b32 v2, v0
; CHECK-NEXT: .LBB0_4: ; %else1
; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
bb:
%load = load <1 x i8>, ptr addrspace(7) null, align 1
%sext = sext <1 x i8> %load to <1 x i32>
%select = select <1 x i1> %arg1, <1 x i32> %sext, <1 x i32> zeroinitializer
%call = tail call <1 x i32> @llvm.masked.load.v1i32.p3(ptr addrspace(3) null, i32 1, <1 x i1> %arg1, <1 x i32> zeroinitializer)
%or = or <1 x i32> %select, %call
tail call void @llvm.masked.store.v1i32.p3(<1 x i32> %or, ptr addrspace(3) null, i32 1, <1 x i1> %arg1)
tail call void @llvm.amdgcn.s.waitcnt(i32 0)
ret void
}
Loading