Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/Target/RISCV/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ add_llvm_target(RISCVCodeGen
RISCVOptWInstrs.cpp
RISCVPostRAExpandPseudoInsts.cpp
RISCVPushPopOptimizer.cpp
RISCVPromoteConstant.cpp
RISCVRedundantCopyElimination.cpp
RISCVRegisterInfo.cpp
RISCVSelectionDAGInfo.cpp
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCV.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

namespace llvm {
class FunctionPass;
class ModulePass;
class InstructionSelector;
class PassRegistry;
class RISCVRegisterBankInfo;
Expand Down Expand Up @@ -111,6 +112,8 @@ void initializeRISCVO0PreLegalizerCombinerPass(PassRegistry &);
FunctionPass *createRISCVPreLegalizerCombiner();
void initializeRISCVPreLegalizerCombinerPass(PassRegistry &);

ModulePass *createRISCVPromoteConstantPass();

FunctionPass *createRISCVVLOptimizerPass();
void initializeRISCVVLOptimizerPass(PassRegistry &);

Expand Down
212 changes: 212 additions & 0 deletions llvm/lib/Target/RISCV/RISCVPromoteConstant.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
//==- RISCVPromoteConstant.cpp - Promote constant fp to global for RISC-V --==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"

using namespace llvm;

#define DEBUG_TYPE "riscv-promote-const"

STATISTIC(NumPromoted, "Number of promoted constants");
STATISTIC(NumPromotedUses, "Number of promoted constants uses");

namespace {

class RISCVPromoteConstant : public ModulePass {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also add new pass manager version

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on my understanding form WritingAnLLVMNewPMPass.rst, I'd be writing a version deriving from PassInfoMixIn? This is more future proof, but I'm also not seeing any of the other RISC-V specific passes having done this yet.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On this particular suggestion, I would actively prefer this be done in a follow on change. We do not have new pass manager versions of most RISCV codegen passes, and we should do these together.

public:
static char ID;
RISCVPromoteConstant() : ModulePass(ID) {}

StringRef getPassName() const override { return "RISC-V Promote Constant"; }

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetPassConfig>();
AU.setPreservesCFG();
}

/// Iterate over the functions and promote the double fp constants that
/// would otherwise go into the constant pool to a constant array.
bool runOnModule(Module &M) override {
LLVM_DEBUG(dbgs() << getPassName() << '\n');
// TargetMachine and Subtarget are needed to query isFPImmlegal. Get them
// from TargetPassConfig.
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
const TargetMachine &TM = TPC.getTM<TargetMachine>();
if (skipModule(M))
return false;
bool Changed = false;
for (auto &MF : M) {
const RISCVSubtarget &ST = TM.getSubtarget<RISCVSubtarget>(MF);
const RISCVTargetLowering *TLI = ST.getTargetLowering();
Changed |= runOnFunction(MF, TLI);
}
return Changed;
}

private:
bool runOnFunction(Function &F, const RISCVTargetLowering *TLI);
};
} // end anonymous namespace

char RISCVPromoteConstant::ID = 0;

ModulePass *llvm::createRISCVPromoteConstantPass() {
return new RISCVPromoteConstant();
}

bool RISCVPromoteConstant::runOnFunction(Function &F,
const RISCVTargetLowering *TLI) {
// Bail out and make no transformation if the target doesn't support
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to skip optnone functions / skipFunction

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

skipFunction isn't available in a ModulePass, but I've added a skip if OptNone (and you can see if call check skipModule in runOnModule).

// doubles, or if we're not targeting RV64 as we currently see some
// regressions for those targets.
if (!TLI->isTypeLegal(MVT::f64) || !TLI->isTypeLegal(MVT::i64))
return false;

// Collect all unique double constants used in the function, and track their
// offset within the newly created global array. Also track uses that will
// be replaced later.
DenseMap<ConstantFP *, unsigned> ConstantMap;
SmallVector<Constant *, 16> ConstantVector;
DenseMap<ConstantFP *, SmallVector<Use *, 8>> UsesInFunc;

for (Instruction &I : instructions(F)) {
// PHI nodes are handled specially in a second loop below.
if (isa<PHINode>(I))
continue;
for (Use &U : I.operands()) {
if (auto *C = dyn_cast<ConstantFP>(U.get())) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about vector constants?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to confirm: Is this a concern about correctness or a suggestion for another profitable transform? Fully agreed it could be worthwhile doing promotion for vector constants and I'd like to return to that, but I'm trying to keep this very narrowly scoped initially.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Covering more cases

if (C->getType()->isDoubleTy()) {
if (TLI->isFPImmLegal(C->getValueAPF(), MVT::f64,
/*ForCodeSize*/ false))
continue;
UsesInFunc[C].push_back(&U);
if (ConstantMap.find(C) == ConstantMap.end()) {
ConstantMap[C] = ConstantVector.size();
ConstantVector.push_back(C);
++NumPromoted;
}
}
}
}
}

// Collect uses from PHI nodes after other uses, because when transforming
// the function, we handle PHI uses afterwards.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment isn't really making sense to me? Why does it matter than the other lowering does two phases? Is there something about the order of the resulting global which is critical?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it's the awkward issue related to choosing a different insertion point when the use is a PHI or for a "normal" use.

But taking a step back, I feel I've failed to simplify this as much as possible. I'd started like AArch64ConstantPromotion with doing minimal inserts based on the dominator tree but decided it wasn't worth it initially, but never revisisted if there's any benefit to inserting as close to the use as possible. Things are kept much simpler by always inserting in the first possible insertion point (impact described in forthcoming top-level PR comment).

for (BasicBlock &BB : F) {
for (PHINode &PN : BB.phis()) {
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
if (auto *C = dyn_cast<ConstantFP>(PN.getIncomingValue(i))) {
if (C->getType()->isDoubleTy()) {
if (TLI->isFPImmLegal(C->getValueAPF(), MVT::f64,
/*ForCodeSize*/ false))
continue;
UsesInFunc[C].push_back(&PN.getOperandUse(i));
if (ConstantMap.find(C) == ConstantMap.end()) {
ConstantMap[C] = ConstantVector.size();
ConstantVector.push_back(C);
++NumPromoted;
}
}
}
}
}
}

// Bail out if no promotable constants found.
if (ConstantVector.empty())
return false;

// Create a global array containing the promoted constants.
Module *M = F.getParent();
Type *DoubleTy = Type::getDoubleTy(M->getContext());
ArrayType *ArrayTy = ArrayType::get(DoubleTy, ConstantVector.size());
Constant *GlobalArrayInitializer =
ConstantArray::get(ArrayTy, ConstantVector);

auto *GlobalArray = new GlobalVariable(
*M, ArrayTy,
/*isConstant=*/true, GlobalValue::InternalLinkage, GlobalArrayInitializer,
".promoted_doubles." + F.getName());

// Create GEP for the base pointer in the function entry.
IRBuilder<> EntryBuilder(&F.getEntryBlock().front());
Value *BasePtr = EntryBuilder.CreateConstInBoundsGEP2_64(
GlobalArray->getValueType(), GlobalArray, 0, 0, "doubles.base");

// A cache to hold the loaded value for a given constant within a basic block.
DenseMap<std::pair<ConstantFP *, BasicBlock *>, Value *> LocalLoads;

// Replace all uses with the loaded value.
for (Constant *ConstVal : ConstantVector) {
auto *Const = cast<ConstantFP>(ConstVal);
const auto &Uses = UsesInFunc.at(Const);
unsigned Idx = ConstantMap.at(Const);

for (Use *U : Uses) {
Instruction *UserInst = cast<Instruction>(U->getUser());
BasicBlock *InsertionBB;
Instruction *InsertionPt;

if (auto *PN = dyn_cast<PHINode>(UserInst)) {
// If the user is a PHI node, we must insert the load in the
// corresponding predecessor basic block, before its terminator.
unsigned OperandIdx = U->getOperandNo();
InsertionBB = PN->getIncomingBlock(OperandIdx);
InsertionPt = InsertionBB->getTerminator();
} else {
// For any other instruction, we can insert the load right before it.
InsertionBB = UserInst->getParent();
InsertionPt = UserInst;
}

auto CacheKey = std::make_pair(Const, InsertionBB);
Value *LoadedVal = nullptr;

// Re-use a load if it exists in the insertion block.
if (LocalLoads.count(CacheKey)) {
LoadedVal = LocalLoads.at(CacheKey);
} else {
// Otherwise, create a new GEP and Load at the correct insertion point.
IRBuilder<> Builder(InsertionPt);
Value *ElementPtr = Builder.CreateConstInBoundsGEP1_64(
DoubleTy, BasePtr, Idx, "double.addr");
LoadedVal = Builder.CreateLoad(DoubleTy, ElementPtr, "double.val");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alignment?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The loads will be created with appropraite alignment - but of course the lack of an IR->IR test means that wasn't obvious! I've added tests now, which confirms this.


// Cache the newly created load for this block.
LocalLoads[CacheKey] = LoadedVal;
}

U->set(LoadedVal);
++NumPromotedUses;
}
}

return true;
}
2 changes: 2 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,8 @@ void RISCVPassConfig::addIRPasses() {
}

bool RISCVPassConfig::addPreISel() {
if (TM->getOptLevel() != CodeGenOptLevel::None)
addPass(createRISCVPromoteConstantPass());
if (TM->getOptLevel() != CodeGenOptLevel::None) {
// Add a barrier before instruction selection so that we will not get
// deleted block address after enabling default outlining. See D99707 for
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vararg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ define i32 @va1(ptr %fmt, ...) {
; RV32-NEXT: G_VASTART [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.va)
; RV32-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load (p0) from %ir.va)
; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; RV32-NEXT: %20:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s32)
; RV32-NEXT: G_STORE %20(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va)
; RV32-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s32)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this change actually from the review, or just a stale auto-update? It sorta looks like the later.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment appears to be unaddressed.

; RV32-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va)
; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur)
; RV32-NEXT: $x10 = COPY [[LOAD1]](s32)
; RV32-NEXT: PseudoRET implicit $x10
Expand Down Expand Up @@ -105,8 +105,8 @@ define i32 @va1(ptr %fmt, ...) {
; RV64-NEXT: G_VASTART [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.va)
; RV64-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load (p0) from %ir.va, align 4)
; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; RV64-NEXT: %20:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s64)
; RV64-NEXT: G_STORE %20(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4)
; RV64-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s64)
; RV64-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4)
; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur)
; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32)
; RV64-NEXT: $x10 = COPY [[ANYEXT]](s64)
Expand Down Expand Up @@ -687,8 +687,8 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; RV32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]]
; RV32-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32)
; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; RV32-NEXT: %25:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s32)
; RV32-NEXT: G_STORE %25(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va)
; RV32-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s32)
; RV32-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va)
; RV32-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32)
; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3)
; RV32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64)
Expand Down Expand Up @@ -733,8 +733,8 @@ define i64 @va2(ptr %fmt, ...) nounwind {
; RV64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]]
; RV64-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32)
; RV64-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; RV64-NEXT: %25:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s64)
; RV64-NEXT: G_STORE %25(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4)
; RV64-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s64)
; RV64-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4)
; RV64-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32)
; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3)
; RV64-NEXT: $x10 = COPY [[LOAD1]](s64)
Expand Down Expand Up @@ -974,8 +974,8 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]]
; RV32-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32)
; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; RV32-NEXT: %24:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s32)
; RV32-NEXT: G_STORE %24(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va)
; RV32-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s32)
; RV32-NEXT: G_STORE [[PTR_ADD5]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va)
; RV32-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32)
; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3)
; RV32-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[MV]], [[LOAD1]]
Expand Down Expand Up @@ -1020,8 +1020,8 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
; RV64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]]
; RV64-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32)
; RV64-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; RV64-NEXT: %25:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s64)
; RV64-NEXT: G_STORE %25(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4)
; RV64-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[INTTOPTR]], [[C3]](s64)
; RV64-NEXT: G_STORE [[PTR_ADD6]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4)
; RV64-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32)
; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3)
; RV64-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[COPY1]], [[LOAD1]]
Expand Down Expand Up @@ -1724,8 +1724,8 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; RV32-NEXT: G_VASTART [[FRAME_INDEX2]](p0) :: (store (s32) into %ir.va)
; RV32-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (dereferenceable load (p0) from %ir.va)
; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; RV32-NEXT: %21:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s32)
; RV32-NEXT: G_STORE %21(p0), [[FRAME_INDEX2]](p0) :: (store (p0) into %ir.va)
; RV32-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s32)
; RV32-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX2]](p0) :: (store (p0) into %ir.va)
; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur)
; RV32-NEXT: $x10 = COPY [[LOAD1]](s32)
; RV32-NEXT: PseudoRET implicit $x10
Expand Down Expand Up @@ -1763,8 +1763,8 @@ define i32 @va_large_stack(ptr %fmt, ...) {
; RV64-NEXT: G_VASTART [[FRAME_INDEX2]](p0) :: (store (s64) into %ir.va)
; RV64-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (dereferenceable load (p0) from %ir.va, align 4)
; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; RV64-NEXT: %21:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s64)
; RV64-NEXT: G_STORE %21(p0), [[FRAME_INDEX2]](p0) :: (store (p0) into %ir.va, align 4)
; RV64-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw nusw inbounds G_PTR_ADD [[LOAD]], [[C1]](s64)
; RV64-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX2]](p0) :: (store (p0) into %ir.va, align 4)
; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur)
; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32)
; RV64-NEXT: $x10 = COPY [[ANYEXT]](s64)
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/RISCV/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
; CHECK-NEXT: CodeGen Prepare
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Exception handling preparation
; CHECK-NEXT: RISC-V Promote Constant
; CHECK-NEXT: A No-Op Barrier Pass
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Merge internal globals
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/double-imm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ define double @double_imm() nounwind {
;
; CHECK64D-LABEL: double_imm:
; CHECK64D: # %bb.0:
; CHECK64D-NEXT: lui a0, %hi(.LCPI0_0)
; CHECK64D-NEXT: fld fa0, %lo(.LCPI0_0)(a0)
; CHECK64D-NEXT: lui a0, %hi(.promoted_doubles.double_imm)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, looks like maybe we shouldn't apply the transform unless there's more than one double in the function?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed, added.

; CHECK64D-NEXT: fld fa0, %lo(.promoted_doubles.double_imm)(a0)
; CHECK64D-NEXT: ret
;
; CHECKRV32ZDINX-LABEL: double_imm:
Expand All @@ -31,8 +31,8 @@ define double @double_imm() nounwind {
;
; CHECKRV64ZDINX-LABEL: double_imm:
; CHECKRV64ZDINX: # %bb.0:
; CHECKRV64ZDINX-NEXT: lui a0, %hi(.LCPI0_0)
; CHECKRV64ZDINX-NEXT: ld a0, %lo(.LCPI0_0)(a0)
; CHECKRV64ZDINX-NEXT: lui a0, %hi(.promoted_doubles.double_imm)
; CHECKRV64ZDINX-NEXT: ld a0, %lo(.promoted_doubles.double_imm)(a0)
; CHECKRV64ZDINX-NEXT: ret
ret double 3.1415926535897931159979634685441851615905761718750
}
Expand Down
Loading
Loading