Skip to content

Commit 22968f5

Browse files
authored
[DAG] Add strictfp implicit def reg after metadata. (#168282)
This prevents a machine verifier error, where it "Expected implicit register after groups". Fixes #158661
1 parent 19c1381 commit 22968f5

File tree

6 files changed

+82
-19
lines changed

6 files changed

+82
-19
lines changed

llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -565,13 +565,6 @@ bool InlineAsmLowering::lowerInlineAsm(
565565
}
566566
}
567567

568-
// Add rounding control registers as implicit def for inline asm.
569-
if (MF.getFunction().hasFnAttribute(Attribute::StrictFP)) {
570-
ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
571-
for (MCPhysReg Reg : RCRegs)
572-
Inst.addReg(Reg, RegState::ImplicitDefine);
573-
}
574-
575568
if (auto Bundle = Call.getOperandBundle(LLVMContext::OB_convergencectrl)) {
576569
auto *Token = Bundle->Inputs[0].get();
577570
ArrayRef<Register> SourceRegs = GetOrCreateVRegs(*Token);
@@ -583,6 +576,13 @@ bool InlineAsmLowering::lowerInlineAsm(
583576
if (const MDNode *SrcLoc = Call.getMetadata("srcloc"))
584577
Inst.addMetadata(SrcLoc);
585578

579+
// Add rounding control registers as implicit def for inline asm.
580+
if (MF.getFunction().hasFnAttribute(Attribute::StrictFP)) {
581+
ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
582+
for (MCPhysReg Reg : RCRegs)
583+
Inst.addReg(Reg, RegState::ImplicitDefine);
584+
}
585+
586586
// All inputs are handled, insert the instruction now
587587
MIRBuilder.insertInstr(Inst);
588588

llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1416,13 +1416,6 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
14161416
}
14171417
}
14181418

1419-
// Add rounding control registers as implicit def for inline asm.
1420-
if (MF->getFunction().hasFnAttribute(Attribute::StrictFP)) {
1421-
ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
1422-
for (MCPhysReg Reg : RCRegs)
1423-
MIB.addReg(Reg, RegState::ImplicitDefine);
1424-
}
1425-
14261419
// GCC inline assembly allows input operands to also be early-clobber
14271420
// output operands (so long as the operand is written only after it's
14281421
// used), but this does not match the semantics of our early-clobber flag.
@@ -1443,6 +1436,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
14431436
if (MD)
14441437
MIB.addMetadata(MD);
14451438

1439+
// Add rounding control registers as implicit def for inline asm.
1440+
if (MF->getFunction().hasFnAttribute(Attribute::StrictFP)) {
1441+
ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
1442+
for (MCPhysReg Reg : RCRegs)
1443+
MIB.addReg(Reg, RegState::ImplicitDefine);
1444+
}
1445+
14461446
MBB->insert(InsertPos, MIB);
14471447
break;
14481448
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs < %s | FileCheck %s
3+
4+
define i32 @foo() strictfp {
5+
; CHECK-LABEL: foo:
6+
; CHECK: // %bb.0: // %entry
7+
; CHECK-NEXT: mov w0, #-1 // =0xffffffff
8+
; CHECK-NEXT: mov w8, #1 // =0x1
9+
; CHECK-NEXT: //APP
10+
; CHECK-NEXT: //NO_APP
11+
; CHECK-NEXT: ret
12+
entry:
13+
tail call void asm sideeffect "", "r"(i32 1) #1, !srcloc !0
14+
ret i32 -1
15+
}
16+
17+
!0 = !{i64 87}

llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -simplify-mir -stop-after=finalize-isel < %s | FileCheck -check-prefixes=SDAG %s
3-
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -simplify-mir -stop-after=finalize-isel < %s | FileCheck -check-prefixes=GISEL %s
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -simplify-mir -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG %s
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -simplify-mir -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefixes=GISEL %s
44

55
; Check that call / asm get an implicit-def $mode added to them in
66
; strictfp functions.
@@ -67,7 +67,7 @@ define float @asm_changes_mode(float %x, float %y) #0 {
6767
; SDAG-NEXT: {{ $}}
6868
; SDAG-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
6969
; SDAG-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
70-
; SDAG-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode
70+
; SDAG-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, !0, implicit-def $mode
7171
; SDAG-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
7272
; SDAG-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]]
7373
; SDAG-NEXT: SI_RETURN implicit $vgpr0
@@ -78,15 +78,17 @@ define float @asm_changes_mode(float %x, float %y) #0 {
7878
; GISEL-NEXT: {{ $}}
7979
; GISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
8080
; GISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
81-
; GISEL-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode
81+
; GISEL-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, !0, implicit-def $mode
8282
; GISEL-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
8383
; GISEL-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]]
8484
; GISEL-NEXT: SI_RETURN implicit $vgpr0
85-
call void asm sideeffect "; maybe defs mode", ""()
85+
call void asm sideeffect "; maybe defs mode", ""(), !srcloc !0
8686
%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore")
8787
ret float %val
8888
}
8989

9090
declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
9191

9292
attributes #0 = { strictfp "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
93+
94+
!0 = !{i64 87}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=armv7-none-eabi -verify-machineinstrs < %s | FileCheck %s
3+
4+
define i32 @foo() strictfp {
5+
; CHECK-LABEL: foo:
6+
; CHECK: @ %bb.0: @ %entry
7+
; CHECK-NEXT: mov r0, #1
8+
; CHECK-NEXT: @APP
9+
; CHECK-NEXT: @NO_APP
10+
; CHECK-NEXT: mvn r0, #0
11+
; CHECK-NEXT: bx lr
12+
entry:
13+
tail call void asm sideeffect "", "r"(i32 1) #1, !srcloc !0
14+
ret i32 -1
15+
}
16+
17+
!0 = !{i64 87}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefixes=X86
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefixes=X64
4+
5+
define i32 @foo() strictfp {
6+
; X86-LABEL: foo:
7+
; X86: # %bb.0: # %entry
8+
; X86-NEXT: movl $1, %eax
9+
; X86-NEXT: #APP
10+
; X86-NEXT: #NO_APP
11+
; X86-NEXT: movl $-1, %eax
12+
; X86-NEXT: retl
13+
;
14+
; X64-LABEL: foo:
15+
; X64: # %bb.0: # %entry
16+
; X64-NEXT: movl $1, %eax
17+
; X64-NEXT: #APP
18+
; X64-NEXT: #NO_APP
19+
; X64-NEXT: movl $-1, %eax
20+
; X64-NEXT: retq
21+
entry:
22+
tail call void asm sideeffect "", "r"(i32 1) #1, !srcloc !0
23+
ret i32 -1
24+
}
25+
26+
27+
!0 = !{i64 87}

0 commit comments

Comments
 (0)