From 70541afc4890ccdb97ef72fa9845bf8064281afc Mon Sep 17 00:00:00 2001 From: David Green Date: Sun, 16 Nov 2025 18:45:40 +0000 Subject: [PATCH] [DAG] Add strictfp implicit def reg after metadata. This prevents a machine verifier error, where it "Expected implicit register after groups". Fixes #158661 --- .../CodeGen/GlobalISel/InlineAsmLowering.cpp | 14 +++++----- .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 14 +++++----- .../CodeGen/AArch64/strictfp-inlineasm.ll | 17 ++++++++++++ .../CodeGen/AMDGPU/call-defs-mode-register.ll | 12 +++++---- llvm/test/CodeGen/ARM/strictfp-inlineasm.ll | 17 ++++++++++++ llvm/test/CodeGen/X86/strictfp-inlineasm.ll | 27 +++++++++++++++++++ 6 files changed, 82 insertions(+), 19 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/strictfp-inlineasm.ll create mode 100644 llvm/test/CodeGen/ARM/strictfp-inlineasm.ll create mode 100644 llvm/test/CodeGen/X86/strictfp-inlineasm.ll diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index a8661ce629a4f..9837c0ca12990 100644 --- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -565,13 +565,6 @@ bool InlineAsmLowering::lowerInlineAsm( } } - // Add rounding control registers as implicit def for inline asm. - if (MF.getFunction().hasFnAttribute(Attribute::StrictFP)) { - ArrayRef RCRegs = TLI->getRoundingControlRegisters(); - for (MCPhysReg Reg : RCRegs) - Inst.addReg(Reg, RegState::ImplicitDefine); - } - if (auto Bundle = Call.getOperandBundle(LLVMContext::OB_convergencectrl)) { auto *Token = Bundle->Inputs[0].get(); ArrayRef SourceRegs = GetOrCreateVRegs(*Token); @@ -583,6 +576,13 @@ bool InlineAsmLowering::lowerInlineAsm( if (const MDNode *SrcLoc = Call.getMetadata("srcloc")) Inst.addMetadata(SrcLoc); + // Add rounding control registers as implicit def for inline asm. + if (MF.getFunction().hasFnAttribute(Attribute::StrictFP)) { + ArrayRef RCRegs = TLI->getRoundingControlRegisters(); + for (MCPhysReg Reg : RCRegs) + Inst.addReg(Reg, RegState::ImplicitDefine); + } + // All inputs are handled, insert the instruction now MIRBuilder.insertInstr(Inst); diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 72d0c44889048..52e8449fe510c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -1416,13 +1416,6 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, } } - // Add rounding control registers as implicit def for inline asm. - if (MF->getFunction().hasFnAttribute(Attribute::StrictFP)) { - ArrayRef RCRegs = TLI->getRoundingControlRegisters(); - for (MCPhysReg Reg : RCRegs) - MIB.addReg(Reg, RegState::ImplicitDefine); - } - // GCC inline assembly allows input operands to also be early-clobber // output operands (so long as the operand is written only after it's // used), but this does not match the semantics of our early-clobber flag. @@ -1443,6 +1436,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, if (MD) MIB.addMetadata(MD); + // Add rounding control registers as implicit def for inline asm. + if (MF->getFunction().hasFnAttribute(Attribute::StrictFP)) { + ArrayRef RCRegs = TLI->getRoundingControlRegisters(); + for (MCPhysReg Reg : RCRegs) + MIB.addReg(Reg, RegState::ImplicitDefine); + } + MBB->insert(InsertPos, MIB); break; } diff --git a/llvm/test/CodeGen/AArch64/strictfp-inlineasm.ll b/llvm/test/CodeGen/AArch64/strictfp-inlineasm.ll new file mode 100644 index 0000000000000..0bbf31c5c0d73 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/strictfp-inlineasm.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs < %s | FileCheck %s + +define i32 @foo() strictfp { +; CHECK-LABEL: foo: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w0, #-1 // =0xffffffff +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ret +entry: + tail call void asm sideeffect "", "r"(i32 1) #1, !srcloc !0 + ret i32 -1 +} + +!0 = !{i64 87} diff --git a/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll b/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll index 4b5a49fc0c2e9..6a835292403ba 100644 --- a/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll +++ b/llvm/test/CodeGen/AMDGPU/call-defs-mode-register.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -simplify-mir -stop-after=finalize-isel < %s | FileCheck -check-prefixes=SDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -simplify-mir -stop-after=finalize-isel < %s | FileCheck -check-prefixes=GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -simplify-mir -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -simplify-mir -stop-after=finalize-isel -verify-machineinstrs < %s | FileCheck -check-prefixes=GISEL %s ; Check that call / asm get an implicit-def $mode added to them in ; strictfp functions. @@ -67,7 +67,7 @@ define float @asm_changes_mode(float %x, float %y) #0 { ; SDAG-NEXT: {{ $}} ; SDAG-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; SDAG-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; SDAG-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode + ; SDAG-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, !0, implicit-def $mode ; SDAG-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, 0, implicit $mode, implicit $exec ; SDAG-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] ; SDAG-NEXT: SI_RETURN implicit $vgpr0 @@ -78,11 +78,11 @@ define float @asm_changes_mode(float %x, float %y) #0 { ; GISEL-NEXT: {{ $}} ; GISEL-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GISEL-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GISEL-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, implicit-def $mode + ; GISEL-NEXT: INLINEASM &"; maybe defs mode", 1 /* sideeffect attdialect */, !0, implicit-def $mode ; GISEL-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec ; GISEL-NEXT: $vgpr0 = COPY [[V_ADD_F32_e64_]] ; GISEL-NEXT: SI_RETURN implicit $vgpr0 - call void asm sideeffect "; maybe defs mode", ""() + call void asm sideeffect "; maybe defs mode", ""(), !srcloc !0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") ret float %val } @@ -90,3 +90,5 @@ define float @asm_changes_mode(float %x, float %y) #0 { declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) attributes #0 = { strictfp "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-cluster-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-cluster-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-cluster-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } + +!0 = !{i64 87} diff --git a/llvm/test/CodeGen/ARM/strictfp-inlineasm.ll b/llvm/test/CodeGen/ARM/strictfp-inlineasm.ll new file mode 100644 index 0000000000000..2d898a87b978d --- /dev/null +++ b/llvm/test/CodeGen/ARM/strictfp-inlineasm.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=armv7-none-eabi -verify-machineinstrs < %s | FileCheck %s + +define i32 @foo() strictfp { +; CHECK-LABEL: foo: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: mov r0, #1 +; CHECK-NEXT: @APP +; CHECK-NEXT: @NO_APP +; CHECK-NEXT: mvn r0, #0 +; CHECK-NEXT: bx lr +entry: + tail call void asm sideeffect "", "r"(i32 1) #1, !srcloc !0 + ret i32 -1 +} + +!0 = !{i64 87} diff --git a/llvm/test/CodeGen/X86/strictfp-inlineasm.ll b/llvm/test/CodeGen/X86/strictfp-inlineasm.ll new file mode 100644 index 0000000000000..674c12a7e9bf3 --- /dev/null +++ b/llvm/test/CodeGen/X86/strictfp-inlineasm.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefixes=X64 + +define i32 @foo() strictfp { +; X86-LABEL: foo: +; X86: # %bb.0: # %entry +; X86-NEXT: movl $1, %eax +; X86-NEXT: #APP +; X86-NEXT: #NO_APP +; X86-NEXT: movl $-1, %eax +; X86-NEXT: retl +; +; X64-LABEL: foo: +; X64: # %bb.0: # %entry +; X64-NEXT: movl $1, %eax +; X64-NEXT: #APP +; X64-NEXT: #NO_APP +; X64-NEXT: movl $-1, %eax +; X64-NEXT: retq +entry: + tail call void asm sideeffect "", "r"(i32 1) #1, !srcloc !0 + ret i32 -1 +} + + +!0 = !{i64 87}