From b9e836226b55578598ad51d86ace5baa156d4515 Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Wed, 30 Oct 2024 14:50:45 +0000 Subject: [PATCH 1/3] [AArch64] Create set.fpmr intrinsic and assembly lowering This patch introduces new llvm.set.fpmr intrinsics for setting value in FPMR register and adds its lowering to series of read-compare-write instructions. This intrinsic will be generated during lowering of FP8 C intrinsics into LLVM-IR introduced in later patch. --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 3 ++ llvm/lib/CodeGen/LivePhysRegs.cpp | 2 +- .../Target/AArch64/AArch64ISelLowering.cpp | 45 +++++++++++++++++++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 3 ++ llvm/lib/Target/AArch64/AArch64InstrInfo.td | 5 +++ llvm/test/CodeGen/AArch64/arm64-fpenv.ll | 16 ++++++- 6 files changed, 72 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 594069c619ceb..4579f3fda523c 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -778,6 +778,9 @@ def int_aarch64_get_fpcr : FPENV_Get_Intrinsic; def int_aarch64_set_fpcr : FPENV_Set_Intrinsic; def int_aarch64_get_fpsr : FPENV_Get_Intrinsic; def int_aarch64_set_fpsr : FPENV_Set_Intrinsic; +def int_aarch64_set_fpmr : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrInaccessibleMemOnly]>{ + let TargetPrefix = "aarch64"; +} // Armv8.5-A Random number generation intrinsics def int_aarch64_rndr : RNDR_Intrinsic; diff --git a/llvm/lib/CodeGen/LivePhysRegs.cpp b/llvm/lib/CodeGen/LivePhysRegs.cpp index 96380d4084825..330ea65770847 100644 --- a/llvm/lib/CodeGen/LivePhysRegs.cpp +++ b/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -262,7 +262,7 @@ void llvm::addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs) { const MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); for (MCPhysReg Reg : LiveRegs) { - if (MRI.isReserved(Reg)) + if (TRI.getReservedRegs(MF).test(Reg)) continue; // Skip the register if we are about to add one of its super registers. if (any_of(TRI.superregs(Reg), [&](MCPhysReg SReg) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 32ba2866ac818..f5d08bf8d57a4 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3007,6 +3007,49 @@ MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet( return BB; } +MachineBasicBlock * +AArch64TargetLowering::EmitLoweredSetFpmr(MachineInstr &MI, + MachineBasicBlock *MBB) const { + MachineFunction *MF = MBB->getParent(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + Register NewFpmrVal = MI.getOperand(0).getReg(); + + // Test if FPMR is set correctly already + Register OldFpmrVal = + MI.getMF()->getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); + BuildMI(*MBB, MI, DL, TII->get(AArch64::MRS), OldFpmrVal) + .addImm(0xda22) + .addUse(AArch64::FPMR, RegState::Implicit); + BuildMI(*MBB, MI, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) + .addReg(OldFpmrVal) + .addReg(NewFpmrVal) + .addImm(0); + + MachineBasicBlock *MsrBB = MF->CreateMachineBasicBlock(LLVM_BB); + // Transfer rest of current basic-block to EndBB + MachineBasicBlock *EndBB = MBB->splitAt(MI); + MF->insert(++MBB->getIterator(), MsrBB); + + // If already set continue + BuildMI(*MBB, MI, DL, TII->get(AArch64::Bcc)) + .addImm(AArch64CC::EQ) + .addMBB(EndBB); + + BuildMI(*MsrBB, MsrBB->begin(), DL, TII->get(AArch64::MSR)) + .addImm(0xda22) + .addReg(NewFpmrVal) + .addDef(AArch64::FPMR, RegState::Implicit); + + MBB->addSuccessor(MsrBB); + // MsrBB falls through to the end. + MsrBB->addSuccessor(EndBB); + + MI.eraseFromParent(); + return EndBB; +} + MachineBasicBlock * AArch64TargetLowering::EmitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const { @@ -3292,6 +3335,8 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true); case AArch64::MOVT_TIZ_PSEUDO: return EmitZTInstr(MI, BB, AArch64::MOVT_TIZ, /*Op0IsDef=*/true); + case AArch64::SET_FPMR: + return EmitLoweredSetFpmr(MI, BB); } } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index d696355bb062a..4d72c93d25582 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -647,6 +647,9 @@ class AArch64TargetLowering : public TargetLowering { MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredSetFpmr(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 6194de2d56b63..be62daceeed75 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -2145,6 +2145,11 @@ def MSR_FPSR : Pseudo<(outs), (ins GPR64:$val), PseudoInstExpansion<(MSR 0xda21, GPR64:$val)>, Sched<[WriteSys]>; +let Uses = [FPMR], Defs = [FPMR, NZCV], usesCustomInserter = 1 in +def SET_FPMR : Pseudo<(outs), (ins GPR64:$val), + [(int_aarch64_set_fpmr i64:$val)]>, + Sched<[WriteSys]>; + // Generic system instructions def SYSxt : SystemXtI<0, "sys">; def SYSLxt : SystemLXtI<1, "sysl">; diff --git a/llvm/test/CodeGen/AArch64/arm64-fpenv.ll b/llvm/test/CodeGen/AArch64/arm64-fpenv.ll index 030809caee339..deb0a2df7b7fb 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fpenv.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fpenv.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mtriple=aarch64 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64 -mattr=+fpmr -verify-machineinstrs < %s | FileCheck %s define i64 @get_fpcr() #0 { ; CHECK-LABEL: get_fpcr: @@ -37,6 +37,20 @@ define void @set_fpsr(i64 %sr) { ret void } +define dso_local void @set_fpmr(i64 %sr) { +; CHECK-LABEL: set_fpmr: +; CHECK: // %bb.0: +; CHECK-NEXT: mrs x8, FPMR +; CHECK-NEXT: cmp x8, x0 +; CHECK-NEXT: b.eq .LBB4_2 +; CHECK-NEXT: // %bb.1: +; CHECK-NEXT: msr FPMR, x0 +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: ret + call void @llvm.aarch64.set.fpmr(i64 %sr) + ret void +} + declare i64 @llvm.aarch64.get.fpcr() declare void @llvm.aarch64.set.fpcr(i64) declare i64 @llvm.aarch64.get.fpsr() From 4ee6072f65e4d73f7ae35c1add83c5af2c1ae2ce Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Tue, 5 Nov 2024 14:01:25 +0000 Subject: [PATCH 2/3] switch to using simple msr and address comments --- .../Target/AArch64/AArch64ISelLowering.cpp | 45 ------------------- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 3 -- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 3 +- llvm/test/CodeGen/AArch64/arm64-fpenv.ll | 9 +--- 4 files changed, 4 insertions(+), 56 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f5d08bf8d57a4..32ba2866ac818 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3007,49 +3007,6 @@ MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet( return BB; } -MachineBasicBlock * -AArch64TargetLowering::EmitLoweredSetFpmr(MachineInstr &MI, - MachineBasicBlock *MBB) const { - MachineFunction *MF = MBB->getParent(); - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); - DebugLoc DL = MI.getDebugLoc(); - const BasicBlock *LLVM_BB = MBB->getBasicBlock(); - Register NewFpmrVal = MI.getOperand(0).getReg(); - - // Test if FPMR is set correctly already - Register OldFpmrVal = - MI.getMF()->getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); - BuildMI(*MBB, MI, DL, TII->get(AArch64::MRS), OldFpmrVal) - .addImm(0xda22) - .addUse(AArch64::FPMR, RegState::Implicit); - BuildMI(*MBB, MI, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) - .addReg(OldFpmrVal) - .addReg(NewFpmrVal) - .addImm(0); - - MachineBasicBlock *MsrBB = MF->CreateMachineBasicBlock(LLVM_BB); - // Transfer rest of current basic-block to EndBB - MachineBasicBlock *EndBB = MBB->splitAt(MI); - MF->insert(++MBB->getIterator(), MsrBB); - - // If already set continue - BuildMI(*MBB, MI, DL, TII->get(AArch64::Bcc)) - .addImm(AArch64CC::EQ) - .addMBB(EndBB); - - BuildMI(*MsrBB, MsrBB->begin(), DL, TII->get(AArch64::MSR)) - .addImm(0xda22) - .addReg(NewFpmrVal) - .addDef(AArch64::FPMR, RegState::Implicit); - - MBB->addSuccessor(MsrBB); - // MsrBB falls through to the end. - MsrBB->addSuccessor(EndBB); - - MI.eraseFromParent(); - return EndBB; -} - MachineBasicBlock * AArch64TargetLowering::EmitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const { @@ -3335,8 +3292,6 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true); case AArch64::MOVT_TIZ_PSEUDO: return EmitZTInstr(MI, BB, AArch64::MOVT_TIZ, /*Op0IsDef=*/true); - case AArch64::SET_FPMR: - return EmitLoweredSetFpmr(MI, BB); } } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 4d72c93d25582..d696355bb062a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -647,9 +647,6 @@ class AArch64TargetLowering : public TargetLowering { MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitLoweredSetFpmr(MachineInstr &MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index be62daceeed75..b9498c5b7e590 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -2145,9 +2145,10 @@ def MSR_FPSR : Pseudo<(outs), (ins GPR64:$val), PseudoInstExpansion<(MSR 0xda21, GPR64:$val)>, Sched<[WriteSys]>; -let Uses = [FPMR], Defs = [FPMR, NZCV], usesCustomInserter = 1 in +let Uses = [FPMR], Defs = [FPMR, NZCV] in def SET_FPMR : Pseudo<(outs), (ins GPR64:$val), [(int_aarch64_set_fpmr i64:$val)]>, + PseudoInstExpansion<(MSR 0xda22, GPR64:$val)>, Sched<[WriteSys]>; // Generic system instructions diff --git a/llvm/test/CodeGen/AArch64/arm64-fpenv.ll b/llvm/test/CodeGen/AArch64/arm64-fpenv.ll index deb0a2df7b7fb..412f89ae67439 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fpenv.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fpenv.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mtriple=aarch64 -mattr=+fpmr -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64 -verify-machineinstrs < %s | FileCheck %s define i64 @get_fpcr() #0 { ; CHECK-LABEL: get_fpcr: @@ -37,15 +37,10 @@ define void @set_fpsr(i64 %sr) { ret void } -define dso_local void @set_fpmr(i64 %sr) { +define void @set_fpmr(i64 %sr) { ; CHECK-LABEL: set_fpmr: ; CHECK: // %bb.0: -; CHECK-NEXT: mrs x8, FPMR -; CHECK-NEXT: cmp x8, x0 -; CHECK-NEXT: b.eq .LBB4_2 -; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: msr FPMR, x0 -; CHECK-NEXT: .LBB4_2: ; CHECK-NEXT: ret call void @llvm.aarch64.set.fpmr(i64 %sr) ret void From 4851feb239534448617e23ba3865683775bcb924 Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Tue, 5 Nov 2024 16:17:57 +0000 Subject: [PATCH 3/3] fix incorrect defs and refactor code --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 6 +++--- llvm/lib/CodeGen/LivePhysRegs.cpp | 2 +- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 4579f3fda523c..6a09a8647096f 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -771,6 +771,8 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrNoMem, IntrHasSideEffects]>; class RNDR_Intrinsic : DefaultAttrsIntrinsic<[llvm_i64_ty, llvm_i1_ty], [], [IntrNoMem, IntrHasSideEffects]>; + class FPMR_Set_Intrinsic + : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrInaccessibleMemOnly]>; } // FP environment registers. @@ -778,9 +780,7 @@ def int_aarch64_get_fpcr : FPENV_Get_Intrinsic; def int_aarch64_set_fpcr : FPENV_Set_Intrinsic; def int_aarch64_get_fpsr : FPENV_Get_Intrinsic; def int_aarch64_set_fpsr : FPENV_Set_Intrinsic; -def int_aarch64_set_fpmr : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrInaccessibleMemOnly]>{ - let TargetPrefix = "aarch64"; -} +def int_aarch64_set_fpmr : FPMR_Set_Intrinsic; // Armv8.5-A Random number generation intrinsics def int_aarch64_rndr : RNDR_Intrinsic; diff --git a/llvm/lib/CodeGen/LivePhysRegs.cpp b/llvm/lib/CodeGen/LivePhysRegs.cpp index 330ea65770847..96380d4084825 100644 --- a/llvm/lib/CodeGen/LivePhysRegs.cpp +++ b/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -262,7 +262,7 @@ void llvm::addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs) { const MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); for (MCPhysReg Reg : LiveRegs) { - if (TRI.getReservedRegs(MF).test(Reg)) + if (MRI.isReserved(Reg)) continue; // Skip the register if we are about to add one of its super registers. if (any_of(TRI.superregs(Reg), [&](MCPhysReg SReg) { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index b9498c5b7e590..ecea5f418b8ec 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -2145,8 +2145,8 @@ def MSR_FPSR : Pseudo<(outs), (ins GPR64:$val), PseudoInstExpansion<(MSR 0xda21, GPR64:$val)>, Sched<[WriteSys]>; -let Uses = [FPMR], Defs = [FPMR, NZCV] in -def SET_FPMR : Pseudo<(outs), (ins GPR64:$val), +let Defs = [FPMR] in +def MSR_FPMR : Pseudo<(outs), (ins GPR64:$val), [(int_aarch64_set_fpmr i64:$val)]>, PseudoInstExpansion<(MSR 0xda22, GPR64:$val)>, Sched<[WriteSys]>;