Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2822,9 +2822,13 @@ SDValue SITargetLowering::LowerFormalArguments(
const Function &Fn = MF.getFunction();
FunctionType *FType = MF.getFunction().getFunctionType();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
bool IsUnsupportedHsa = false;

if (Subtarget->isAmdHsaOS() && AMDGPU::isGraphics(CallConv)) {
report_fatal_error("unsupported non-compute shaders with HSA");
DiagnosticInfoUnsupported NoGraphicsHSA(
Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc());
DAG.getContext()->diagnose(NoGraphicsHSA);
IsUnsupportedHsa = true;
}

SmallVector<ISD::InputArg, 16> Splits;
Expand Down Expand Up @@ -2933,7 +2937,7 @@ SDValue SITargetLowering::LowerFormalArguments(

for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
const ISD::InputArg &Arg = Ins[i];
if (Arg.isOrigArg() && Skipped[Arg.getOrigArgIndex()]) {
if ((Arg.isOrigArg() && Skipped[Arg.getOrigArgIndex()]) || IsUnsupportedHsa) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you want to keep this as it is with the separated handling down here, I'd go with a broader IsError or something.
However, I think it would be cleaner to just have a separate loop populating InVals in the error case with the undefs

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

However, I think it would be cleaner to just have a separate loop populating InVals in the error case with the undefs

When I do this:

  if (Subtarget->isAmdHsaOS() && AMDGPU::isGraphics(CallConv)) {
    DiagnosticInfoUnsupported NoGraphicsHSA(
        Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc());
    DAG.getContext()->diagnose(NoGraphicsHSA);
    for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
      const ISD::InputArg &Arg = Ins[i];
      InVals.push_back(DAG.getUNDEF(Arg.VT));
    }
    return Chain;
  }

I get the following error later:
llc: ~/git/trunk21.0/llvm-project/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h:945: void llvm::SIMachineFunctionInfo::setScratchRSrcReg(llvm::Register): Assertion `Reg != 0 && "Should never be unset"' failed.
I suspect the problem is that the other initialization stuff isn't done because of the early return.

Or are you thinking about something else?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh right, the other special input setup stuff still needs to happen. I suppose you can rename the variable

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although in that case we should stop requiring the CC lowering to set up the scratch resource. It's really a fixed constant

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh right, the other special input setup stuff still needs to happen. I suppose you can rename the variable

Already done :)

Although in that case we should stop requiring the CC lowering to set up the scratch resource. It's really a fixed constant

Hm, the function seems to do a lot of things (which I don't fully understand yet) in the case of IsGraphics and IsEntryFunc. Is it possible to pull all this into the error handling without too much code duplication?

InVals.push_back(DAG.getUNDEF(Arg.VT));
continue;
}
Expand Down
174 changes: 174 additions & 0 deletions llvm/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --tool not --version 5
; RUN: not llc -mtriple=amdgcn-unknown-amdhsa -O0 -stop-after=amdgpu-isel -o - < %s 2>&1 | FileCheck %s

@I = global i32 42
@P = global ptr @I

; CHECK: error: <unknown>:0:0: in function pixel_shader_zero_args void (): unsupported non-compute shaders with HSA
; CHECK: error: <unknown>:0:0: in function pixel_shader_one_arg void (ptr): unsupported non-compute shaders with HSA
; CHECK: error: <unknown>:0:0: in function pixel_shader_two_args void (ptr, i32): unsupported non-compute shaders with HSA
; CHECK: error: <unknown>:0:0: in function vertex_shader_zero_args void (): unsupported non-compute shaders with HSA
; CHECK: error: <unknown>:0:0: in function vertex_shader_one_arg void (ptr): unsupported non-compute shaders with HSA
; CHECK: error: <unknown>:0:0: in function vertex_shader_two_args void (ptr, i32): unsupported non-compute shaders with HSA
; CHECK: error: <unknown>:0:0: in function geometry_shader_zero_args void (): unsupported non-compute shaders with HSA
; CHECK: error: <unknown>:0:0: in function geometry_shader_one_arg void (ptr): unsupported non-compute shaders with HSA
; CHECK: error: <unknown>:0:0: in function geometry_shader_two_args void (ptr, i32): unsupported non-compute shaders with HSA

; CHECK-LABEL: name: pixel_shader_zero_args
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: %2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @I, target-flags(amdgpu-gotprel32-hi) @I, implicit-def dead $scc
; CHECK-NEXT: %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %2, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
; CHECK-NEXT: %5:vreg_64 = COPY %3
; CHECK-NEXT: %4:vgpr_32 = FLAT_LOAD_DWORD killed %5, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @I)
; CHECK-NEXT: %6:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @P, target-flags(amdgpu-gotprel32-hi) @P, implicit-def dead $scc
; CHECK-NEXT: %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %6, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
; CHECK-NEXT: %8:vreg_64 = COPY %7
; CHECK-NEXT: FLAT_STORE_DWORD killed %8, killed %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into @P)
; CHECK-NEXT: S_ENDPGM 0
define amdgpu_ps void @pixel_shader_zero_args() {
%i = load i32, ptr @I
store i32 %i, ptr @P
ret void
}

; CHECK-LABEL: name: pixel_shader_one_arg
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: %4:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %5:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %6:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %7:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %3:sreg_64 = REG_SEQUENCE %4, %subreg.sub0, %6, %subreg.sub1
; CHECK-NEXT: %2:vreg_64 = COPY %3
; CHECK-NEXT: %8:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @I, target-flags(amdgpu-gotprel32-hi) @I, implicit-def dead $scc
; CHECK-NEXT: %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %8, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
; CHECK-NEXT: %11:vreg_64 = COPY %9
; CHECK-NEXT: %10:vgpr_32 = FLAT_LOAD_DWORD killed %11, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @I)
; CHECK-NEXT: %12:vreg_64 = COPY %3
; CHECK-NEXT: FLAT_STORE_DWORD %12, killed %10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.p)
; CHECK-NEXT: S_ENDPGM 0
define amdgpu_ps void @pixel_shader_one_arg(ptr %p) {
%i = load i32, ptr @I
store i32 %i, ptr %p
ret void
}

; CHECK-LABEL: name: pixel_shader_two_args
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: %5:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %6:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %7:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %8:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %4:sreg_64 = REG_SEQUENCE %5, %subreg.sub0, %7, %subreg.sub1
; CHECK-NEXT: %3:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: %2:vreg_64 = COPY %4
; CHECK-NEXT: S_ENDPGM 0
define amdgpu_ps void @pixel_shader_two_args(ptr %p, i32 %i) {
store i32 %i, ptr %p
ret void
}

; CHECK-LABEL: name: vertex_shader_zero_args
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: %2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @I, target-flags(amdgpu-gotprel32-hi) @I, implicit-def dead $scc
; CHECK-NEXT: %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %2, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
; CHECK-NEXT: %5:vreg_64 = COPY %3
; CHECK-NEXT: %4:vgpr_32 = FLAT_LOAD_DWORD killed %5, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @I)
; CHECK-NEXT: %6:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @P, target-flags(amdgpu-gotprel32-hi) @P, implicit-def dead $scc
; CHECK-NEXT: %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %6, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
; CHECK-NEXT: %8:vreg_64 = COPY %7
; CHECK-NEXT: FLAT_STORE_DWORD killed %8, killed %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into @P)
; CHECK-NEXT: S_ENDPGM 0
define amdgpu_vs void @vertex_shader_zero_args() {
%i = load i32, ptr @I
store i32 %i, ptr @P
ret void
}

; CHECK-LABEL: name: vertex_shader_one_arg
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: %4:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %5:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %6:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %7:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %3:sreg_64 = REG_SEQUENCE %4, %subreg.sub0, %6, %subreg.sub1
; CHECK-NEXT: %2:vreg_64 = COPY %3
; CHECK-NEXT: %8:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @I, target-flags(amdgpu-gotprel32-hi) @I, implicit-def dead $scc
; CHECK-NEXT: %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %8, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
; CHECK-NEXT: %11:vreg_64 = COPY %9
; CHECK-NEXT: %10:vgpr_32 = FLAT_LOAD_DWORD killed %11, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @I)
; CHECK-NEXT: %12:vreg_64 = COPY %3
; CHECK-NEXT: FLAT_STORE_DWORD %12, killed %10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.p)
; CHECK-NEXT: S_ENDPGM 0
define amdgpu_vs void @vertex_shader_one_arg(ptr %p) {
%i = load i32, ptr @I
store i32 %i, ptr %p
ret void
}

; CHECK-LABEL: name: vertex_shader_two_args
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: %5:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %6:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %7:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %8:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %4:sreg_64 = REG_SEQUENCE %5, %subreg.sub0, %7, %subreg.sub1
; CHECK-NEXT: %3:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: %2:vreg_64 = COPY %4
; CHECK-NEXT: S_ENDPGM 0
define amdgpu_vs void @vertex_shader_two_args(ptr %p, i32 %i) {
store i32 %i, ptr %p
ret void
}

; CHECK-LABEL: name: geometry_shader_zero_args
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: %2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @I, target-flags(amdgpu-gotprel32-hi) @I, implicit-def dead $scc
; CHECK-NEXT: %3:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %2, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
; CHECK-NEXT: %5:vreg_64 = COPY %3
; CHECK-NEXT: %4:vgpr_32 = FLAT_LOAD_DWORD killed %5, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @I)
; CHECK-NEXT: %6:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @P, target-flags(amdgpu-gotprel32-hi) @P, implicit-def dead $scc
; CHECK-NEXT: %7:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %6, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
; CHECK-NEXT: %8:vreg_64 = COPY %7
; CHECK-NEXT: FLAT_STORE_DWORD killed %8, killed %4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into @P)
; CHECK-NEXT: S_ENDPGM 0
define amdgpu_gs void @geometry_shader_zero_args() {
%i = load i32, ptr @I
store i32 %i, ptr @P
ret void
}

; CHECK-LABEL: name: geometry_shader_one_arg
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: %4:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %5:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %6:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %7:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %3:sreg_64 = REG_SEQUENCE %4, %subreg.sub0, %6, %subreg.sub1
; CHECK-NEXT: %2:vreg_64 = COPY %3
; CHECK-NEXT: %8:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @I, target-flags(amdgpu-gotprel32-hi) @I, implicit-def dead $scc
; CHECK-NEXT: %9:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed %8, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
; CHECK-NEXT: %11:vreg_64 = COPY %9
; CHECK-NEXT: %10:vgpr_32 = FLAT_LOAD_DWORD killed %11, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @I)
; CHECK-NEXT: %12:vreg_64 = COPY %3
; CHECK-NEXT: FLAT_STORE_DWORD %12, killed %10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.p)
; CHECK-NEXT: S_ENDPGM 0
define amdgpu_gs void @geometry_shader_one_arg(ptr %p) {
%i = load i32, ptr @I
store i32 %i, ptr %p
ret void
}

; CHECK-LABEL: name: geometry_shader_two_args
; CHECK: bb.0 (%ir-block.0):
; CHECK-NEXT: %5:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %6:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %7:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %8:sreg_32 = IMPLICIT_DEF
; CHECK-NEXT: %4:sreg_64 = REG_SEQUENCE %5, %subreg.sub0, %7, %subreg.sub1
; CHECK-NEXT: %3:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: %2:vreg_64 = COPY %4
; CHECK-NEXT: S_ENDPGM 0
define amdgpu_gs void @geometry_shader_two_args(ptr %p, i32 %i) {
store i32 %i, ptr %p
ret void
}
Loading