-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[DAG] Add strictfp implicit def reg after metadata. #168282
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This prevents a machine verifier error, where it "Expected implicit register after groups". Fixes llvm#158661
|
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) ChangesThis prevents a machine verifier error, where it "Expected implicit register after groups". Fixes #158661 Full diff: https://github.com/llvm/llvm-project/pull/168282.diff 6 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index a8661ce629a4f..9837c0ca12990 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -565,13 +565,6 @@ bool InlineAsmLowering::lowerInlineAsm(
}
}
- // Add rounding control registers as implicit def for inline asm.
- if (MF.getFunction().hasFnAttribute(Attribute::StrictFP)) {
- ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
- for (MCPhysReg Reg : RCRegs)
- Inst.addReg(Reg, RegState::ImplicitDefine);
- }
-
if (auto Bundle = Call.getOperandBundle(LLVMContext::OB_convergencectrl)) {
auto *Token = Bundle->Inputs[0].get();
ArrayRef<Register> SourceRegs = GetOrCreateVRegs(*Token);
@@ -583,6 +576,13 @@ bool InlineAsmLowering::lowerInlineAsm(
if (const MDNode *SrcLoc = Call.getMetadata("srcloc"))
Inst.addMetadata(SrcLoc);
+ // Add rounding control registers as implicit def for inline asm.
+ if (MF.getFunction().hasFnAttribute(Attribute::StrictFP)) {
+ ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
+ for (MCPhysReg Reg : RCRegs)
+ Inst.addReg(Reg, RegState::ImplicitDefine);
+ }
+
// All inputs are handled, insert the instruction now
MIRBuilder.insertInstr(Inst);
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 72d0c44889048..52e8449fe510c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -1416,13 +1416,6 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
}
}
- // Add rounding control registers as implicit def for inline asm.
- if (MF->getFunction().hasFnAttribute(Attribute::StrictFP)) {
- ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
- for (MCPhysReg Reg : RCRegs)
- MIB.addReg(Reg, RegState::ImplicitDefine);
- }
-
// GCC inline assembly allows input operands to also be early-clobber
// output operands (so long as the operand is written only after it's
// used), but this does not match the semantics of our early-clobber flag.
@@ -1443,6 +1436,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
if (MD)
MIB.addMetadata(MD);
+ // Add rounding control registers as implicit def for inline asm.
+ if (MF->getFunction().hasFnAttribute(Attribute::StrictFP)) {
+ ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
+ for (MCPhysReg Reg : RCRegs)
+ MIB.addReg(Reg, RegState::ImplicitDefine);
+ }
+
MBB->insert(InsertPos, MIB);
break;
}
diff --git a/llvm/test/CodeGen/AArch64/strictfp-inlineasm.ll b/llvm/test/CodeGen/AArch64/strictfp-inlineasm.ll
new file mode 100644
index 0000000000000..0bbf31c5c0d73
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/strictfp-inlineasm.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs < %s | FileCheck %s
+
+define i32 @foo() strictfp {
+; CHECK-LABEL: foo:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w0, #-1 // =0xffffffff
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: ret
+entry:
+ tail call void asm sideeffect "", "r"(i32 1) #1, !srcloc !0
+ ret i32 -1
+}
+
+!0 = !{i64 87}
diff --git a/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll b/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll
index 4b5a49fc0c2e9..562296fce4957 100644
--- a/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll
@@ -67,7 +67,7 @@ define float @asm_changes_mode(float %x, float %y) #0 {
; SDAG-NEXT: {{ $}}
; SDAG-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; SDAG-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; SDAG-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode
+ ; SDAG-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, !0, implicit-def $mode
; SDAG-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; SDAG-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]]
; SDAG-NEXT: SI_RETURN implicit $vgpr0
@@ -78,11 +78,11 @@ define float @asm_changes_mode(float %x, float %y) #0 {
; GISEL-NEXT: {{ $}}
; GISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GISEL-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode
+ ; GISEL-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, !0, implicit-def $mode
; GISEL-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GISEL-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]]
; GISEL-NEXT: SI_RETURN implicit $vgpr0
- call void asm sideeffect "; maybe defs mode", ""()
+ call void asm sideeffect "; maybe defs mode", ""(), !srcloc !0
%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore")
ret float %val
}
@@ -90,3 +90,5 @@ define float @asm_changes_mode(float %x, float %y) #0 {
declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
attributes #0 = { strictfp "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+
+!0 = !{i64 87}
diff --git a/llvm/test/CodeGen/ARM/strictfp-inlineasm.ll b/llvm/test/CodeGen/ARM/strictfp-inlineasm.ll
new file mode 100644
index 0000000000000..2d898a87b978d
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/strictfp-inlineasm.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=armv7-none-eabi -verify-machineinstrs < %s | FileCheck %s
+
+define i32 @foo() strictfp {
+; CHECK-LABEL: foo:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: mov r0, #1
+; CHECK-NEXT: @APP
+; CHECK-NEXT: @NO_APP
+; CHECK-NEXT: mvn r0, #0
+; CHECK-NEXT: bx lr
+entry:
+ tail call void asm sideeffect "", "r"(i32 1) #1, !srcloc !0
+ ret i32 -1
+}
+
+!0 = !{i64 87}
diff --git a/llvm/test/CodeGen/X86/strictfp-inlineasm.ll b/llvm/test/CodeGen/X86/strictfp-inlineasm.ll
new file mode 100644
index 0000000000000..674c12a7e9bf3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/strictfp-inlineasm.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefixes=X64
+
+define i32 @foo() strictfp {
+; X86-LABEL: foo:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movl $1, %eax
+; X86-NEXT: #APP
+; X86-NEXT: #NO_APP
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: foo:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movl $1, %eax
+; X64-NEXT: #APP
+; X64-NEXT: #NO_APP
+; X64-NEXT: movl $-1, %eax
+; X64-NEXT: retq
+entry:
+ tail call void asm sideeffect "", "r"(i32 1) #1, !srcloc !0
+ ret i32 -1
+}
+
+
+!0 = !{i64 87}
|
|
@llvm/pr-subscribers-backend-amdgpu Author: David Green (davemgreen) ChangesThis prevents a machine verifier error, where it "Expected implicit register after groups". Fixes #158661 Full diff: https://github.com/llvm/llvm-project/pull/168282.diff 6 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index a8661ce629a4f..9837c0ca12990 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -565,13 +565,6 @@ bool InlineAsmLowering::lowerInlineAsm(
}
}
- // Add rounding control registers as implicit def for inline asm.
- if (MF.getFunction().hasFnAttribute(Attribute::StrictFP)) {
- ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
- for (MCPhysReg Reg : RCRegs)
- Inst.addReg(Reg, RegState::ImplicitDefine);
- }
-
if (auto Bundle = Call.getOperandBundle(LLVMContext::OB_convergencectrl)) {
auto *Token = Bundle->Inputs[0].get();
ArrayRef<Register> SourceRegs = GetOrCreateVRegs(*Token);
@@ -583,6 +576,13 @@ bool InlineAsmLowering::lowerInlineAsm(
if (const MDNode *SrcLoc = Call.getMetadata("srcloc"))
Inst.addMetadata(SrcLoc);
+ // Add rounding control registers as implicit def for inline asm.
+ if (MF.getFunction().hasFnAttribute(Attribute::StrictFP)) {
+ ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
+ for (MCPhysReg Reg : RCRegs)
+ Inst.addReg(Reg, RegState::ImplicitDefine);
+ }
+
// All inputs are handled, insert the instruction now
MIRBuilder.insertInstr(Inst);
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 72d0c44889048..52e8449fe510c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -1416,13 +1416,6 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
}
}
- // Add rounding control registers as implicit def for inline asm.
- if (MF->getFunction().hasFnAttribute(Attribute::StrictFP)) {
- ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
- for (MCPhysReg Reg : RCRegs)
- MIB.addReg(Reg, RegState::ImplicitDefine);
- }
-
// GCC inline assembly allows input operands to also be early-clobber
// output operands (so long as the operand is written only after it's
// used), but this does not match the semantics of our early-clobber flag.
@@ -1443,6 +1436,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
if (MD)
MIB.addMetadata(MD);
+ // Add rounding control registers as implicit def for inline asm.
+ if (MF->getFunction().hasFnAttribute(Attribute::StrictFP)) {
+ ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
+ for (MCPhysReg Reg : RCRegs)
+ MIB.addReg(Reg, RegState::ImplicitDefine);
+ }
+
MBB->insert(InsertPos, MIB);
break;
}
diff --git a/llvm/test/CodeGen/AArch64/strictfp-inlineasm.ll b/llvm/test/CodeGen/AArch64/strictfp-inlineasm.ll
new file mode 100644
index 0000000000000..0bbf31c5c0d73
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/strictfp-inlineasm.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs < %s | FileCheck %s
+
+define i32 @foo() strictfp {
+; CHECK-LABEL: foo:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w0, #-1 // =0xffffffff
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: ret
+entry:
+ tail call void asm sideeffect "", "r"(i32 1) #1, !srcloc !0
+ ret i32 -1
+}
+
+!0 = !{i64 87}
diff --git a/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll b/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll
index 4b5a49fc0c2e9..562296fce4957 100644
--- a/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll
@@ -67,7 +67,7 @@ define float @asm_changes_mode(float %x, float %y) #0 {
; SDAG-NEXT: {{ $}}
; SDAG-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; SDAG-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; SDAG-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode
+ ; SDAG-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, !0, implicit-def $mode
; SDAG-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; SDAG-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]]
; SDAG-NEXT: SI_RETURN implicit $vgpr0
@@ -78,11 +78,11 @@ define float @asm_changes_mode(float %x, float %y) #0 {
; GISEL-NEXT: {{ $}}
; GISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GISEL-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode
+ ; GISEL-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, !0, implicit-def $mode
; GISEL-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GISEL-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]]
; GISEL-NEXT: SI_RETURN implicit $vgpr0
- call void asm sideeffect "; maybe defs mode", ""()
+ call void asm sideeffect "; maybe defs mode", ""(), !srcloc !0
%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore")
ret float %val
}
@@ -90,3 +90,5 @@ define float @asm_changes_mode(float %x, float %y) #0 {
declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
attributes #0 = { strictfp "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+
+!0 = !{i64 87}
diff --git a/llvm/test/CodeGen/ARM/strictfp-inlineasm.ll b/llvm/test/CodeGen/ARM/strictfp-inlineasm.ll
new file mode 100644
index 0000000000000..2d898a87b978d
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/strictfp-inlineasm.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=armv7-none-eabi -verify-machineinstrs < %s | FileCheck %s
+
+define i32 @foo() strictfp {
+; CHECK-LABEL: foo:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: mov r0, #1
+; CHECK-NEXT: @APP
+; CHECK-NEXT: @NO_APP
+; CHECK-NEXT: mvn r0, #0
+; CHECK-NEXT: bx lr
+entry:
+ tail call void asm sideeffect "", "r"(i32 1) #1, !srcloc !0
+ ret i32 -1
+}
+
+!0 = !{i64 87}
diff --git a/llvm/test/CodeGen/X86/strictfp-inlineasm.ll b/llvm/test/CodeGen/X86/strictfp-inlineasm.ll
new file mode 100644
index 0000000000000..674c12a7e9bf3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/strictfp-inlineasm.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefixes=X64
+
+define i32 @foo() strictfp {
+; X86-LABEL: foo:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movl $1, %eax
+; X86-NEXT: #APP
+; X86-NEXT: #NO_APP
+; X86-NEXT: movl $-1, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: foo:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movl $1, %eax
+; X64-NEXT: #APP
+; X64-NEXT: #NO_APP
+; X64-NEXT: movl $-1, %eax
+; X64-NEXT: retq
+entry:
+ tail call void asm sideeffect "", "r"(i32 1) #1, !srcloc !0
+ ret i32 -1
+}
+
+
+!0 = !{i64 87}
|
26199db to
70541af
Compare
arsenm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a really hacky system. We should just have the registers on the instructions and then treat them as invariant in non-strict functions
This prevents a machine verifier error, where it "Expected implicit register after groups".
Fixes #158661