Skip to content

Commit 8bd6eda

Browse files
committed
.
Created using spr 1.3.5-bogner
2 parents ded5c47 + d368d11 commit 8bd6eda

File tree

17 files changed

+2215
-4178
lines changed

17 files changed

+2215
-4178
lines changed
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
//===- llvm/Support/DebugLog.h - Logging like debug output ------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// This file contains macros for logging like debug output. It builds upon the
9+
// support in Debug.h but provides a utility function for common debug output
10+
// style.
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef LLVM_SUPPORT_DEBUGLOG_H
14+
#define LLVM_SUPPORT_DEBUGLOG_H
15+
16+
#include "llvm/Support/Debug.h"
17+
#include "llvm/Support/raw_ostream.h"
18+
19+
namespace llvm {
20+
#ifndef NDEBUG
21+
22+
// Output with given inputs and trailing newline. E.g.,
23+
// LDBG() << "Bitset contains: " << Bitset;
24+
// is equivalent to
25+
// LLVM_DEBUG(dbgs() << DEBUG_TYPE << " [" << __FILE__ << ":" << __LINE__
26+
// << "] " << "Bitset contains: " << Bitset << "\n");
27+
#define LDBG() DEBUGLOG_WITH_STREAM_AND_TYPE(llvm::dbgs(), DEBUG_TYPE)
28+
29+
#define DEBUGLOG_WITH_STREAM_AND_TYPE(STREAM, TYPE) \
30+
for (bool _c = (::llvm::DebugFlag && ::llvm::isCurrentDebugType(TYPE)); _c; \
31+
_c = false) \
32+
::llvm::impl::LogWithNewline(TYPE, __FILE__, __LINE__, (STREAM))
33+
34+
namespace impl {
35+
class LogWithNewline {
36+
public:
37+
LogWithNewline(const char *debug_type, const char *file, int line,
38+
raw_ostream &os)
39+
: os(os) {
40+
if (debug_type)
41+
os << debug_type << " ";
42+
os << "[" << file << ":" << line << "] ";
43+
}
44+
~LogWithNewline() { os << '\n'; }
45+
template <typename T> raw_ostream &operator<<(const T &t) && {
46+
return os << t;
47+
}
48+
49+
// Prevent copying, as this class manages newline responsibility and is
50+
// intended for use as a temporary.
51+
LogWithNewline(const LogWithNewline &) = delete;
52+
LogWithNewline &operator=(const LogWithNewline &) = delete;
53+
LogWithNewline &operator=(LogWithNewline &&) = delete;
54+
55+
private:
56+
raw_ostream &os;
57+
};
58+
} // end namespace impl
59+
#else
60+
// As others in Debug, When compiling without assertions, the -debug-* options
61+
// and all inputs too LDBG() are ignored.
62+
#define LDBG() \
63+
for (bool _c = false; _c; _c = false) \
64+
::llvm::nulls()
65+
#endif
66+
} // end namespace llvm
67+
68+
#endif // LLVM_SUPPORT_DEBUGLOG_H

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,27 @@ static Value *getMask(Value *WideMask, unsigned Factor,
587587
}
588588
}
589589

590+
if (auto *SVI = dyn_cast<ShuffleVectorInst>(WideMask)) {
591+
// Check that the shuffle mask is: a) an interleave, b) all of the same
592+
// set of the elements, and c) contained by the first source. (c) could
593+
// be relaxed if desired.
594+
unsigned NumSrcElts =
595+
cast<FixedVectorType>(SVI->getOperand(1)->getType())->getNumElements();
596+
SmallVector<unsigned> StartIndexes;
597+
if (ShuffleVectorInst::isInterleaveMask(SVI->getShuffleMask(), Factor,
598+
NumSrcElts * 2, StartIndexes) &&
599+
llvm::all_of(StartIndexes, [](unsigned Start) { return Start == 0; }) &&
600+
llvm::all_of(SVI->getShuffleMask(), [&NumSrcElts](int Idx) {
601+
return Idx < (int)NumSrcElts;
602+
})) {
603+
auto *LeafMaskTy =
604+
VectorType::get(Type::getInt1Ty(SVI->getContext()), LeafValueEC);
605+
IRBuilder<> Builder(SVI);
606+
return Builder.CreateExtractVector(LeafMaskTy, SVI->getOperand(0),
607+
uint64_t(0));
608+
}
609+
}
610+
590611
return nullptr;
591612
}
592613

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1618,6 +1618,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
16181618
}
16191619
}
16201620

1621+
// Customize load and store operation for bf16 if zfh isn't enabled.
1622+
if (Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh()) {
1623+
setOperationAction(ISD::LOAD, MVT::bf16, Custom);
1624+
setOperationAction(ISD::STORE, MVT::bf16, Custom);
1625+
}
1626+
16211627
// Function alignments.
16221628
const Align FunctionAlignment(Subtarget.hasStdExtZca() ? 2 : 4);
16231629
setMinFunctionAlignment(FunctionAlignment);
@@ -7216,6 +7222,47 @@ static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) {
72167222
return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
72177223
}
72187224

7225+
SDValue
7226+
RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Load(SDValue Op,
7227+
SelectionDAG &DAG) const {
7228+
assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7229+
"Unexpected bfloat16 load lowering");
7230+
7231+
SDLoc DL(Op);
7232+
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
7233+
EVT MemVT = LD->getMemoryVT();
7234+
SDValue Load = DAG.getExtLoad(
7235+
ISD::ZEXTLOAD, DL, Subtarget.getXLenVT(), LD->getChain(),
7236+
LD->getBasePtr(),
7237+
EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()),
7238+
LD->getMemOperand());
7239+
// Using mask to make bf16 nan-boxing valid when we don't have flh
7240+
// instruction. -65536 would be treat as a small number and thus it can be
7241+
// directly used lui to get the constant.
7242+
SDValue mask = DAG.getSignedConstant(-65536, DL, Subtarget.getXLenVT());
7243+
SDValue OrSixteenOne =
7244+
DAG.getNode(ISD::OR, DL, Load.getValueType(), {Load, mask});
7245+
SDValue ConvertedResult =
7246+
DAG.getNode(RISCVISD::NDS_FMV_BF16_X, DL, MVT::bf16, OrSixteenOne);
7247+
return DAG.getMergeValues({ConvertedResult, Load.getValue(1)}, DL);
7248+
}
7249+
7250+
SDValue
7251+
RISCVTargetLowering::lowerXAndesBfHCvtBFloat16Store(SDValue Op,
7252+
SelectionDAG &DAG) const {
7253+
assert(Subtarget.hasVendorXAndesBFHCvt() && !Subtarget.hasStdExtZfh() &&
7254+
"Unexpected bfloat16 store lowering");
7255+
7256+
StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
7257+
SDLoc DL(Op);
7258+
SDValue FMV = DAG.getNode(RISCVISD::NDS_FMV_X_ANYEXTBF16, DL,
7259+
Subtarget.getXLenVT(), ST->getValue());
7260+
return DAG.getTruncStore(
7261+
ST->getChain(), DL, FMV, ST->getBasePtr(),
7262+
EVT::getIntegerVT(*DAG.getContext(), ST->getMemoryVT().getSizeInBits()),
7263+
ST->getMemOperand());
7264+
}
7265+
72197266
SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
72207267
SelectionDAG &DAG) const {
72217268
switch (Op.getOpcode()) {
@@ -7914,6 +7961,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
79147961
return DAG.getMergeValues({Pair, Chain}, DL);
79157962
}
79167963

7964+
if (VT == MVT::bf16)
7965+
return lowerXAndesBfHCvtBFloat16Load(Op, DAG);
7966+
79177967
// Handle normal vector tuple load.
79187968
if (VT.isRISCVVectorTuple()) {
79197969
SDLoc DL(Op);
@@ -7998,6 +8048,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
79988048
{Store->getChain(), Lo, Hi, Store->getBasePtr()}, MVT::i64,
79998049
Store->getMemOperand());
80008050
}
8051+
8052+
if (VT == MVT::bf16)
8053+
return lowerXAndesBfHCvtBFloat16Store(Op, DAG);
8054+
80018055
// Handle normal vector tuple store.
80028056
if (VT.isRISCVVectorTuple()) {
80038057
SDLoc DL(Op);

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,9 @@ class RISCVTargetLowering : public TargetLowering {
578578
SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
579579
SDValue lowerPARTIAL_REDUCE_MLA(SDValue Op, SelectionDAG &DAG) const;
580580

581+
SDValue lowerXAndesBfHCvtBFloat16Load(SDValue Op, SelectionDAG &DAG) const;
582+
SDValue lowerXAndesBfHCvtBFloat16Store(SDValue Op, SelectionDAG &DAG) const;
583+
581584
bool isEligibleForTailCallOptimization(
582585
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
583586
const SmallVector<CCValAssign, 16> &ArgLocs) const;

llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,20 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
//===----------------------------------------------------------------------===//
14+
// RISC-V specific DAG Nodes.
15+
//===----------------------------------------------------------------------===//
16+
17+
def SDT_NDS_FMV_BF16_X
18+
: SDTypeProfile<1, 1, [SDTCisVT<0, bf16>, SDTCisVT<1, XLenVT>]>;
19+
def SDT_NDS_FMV_X_ANYEXTBF16
20+
: SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisVT<1, bf16>]>;
21+
22+
def riscv_nds_fmv_bf16_x
23+
: SDNode<"RISCVISD::NDS_FMV_BF16_X", SDT_NDS_FMV_BF16_X>;
24+
def riscv_nds_fmv_x_anyextbf16
25+
: SDNode<"RISCVISD::NDS_FMV_X_ANYEXTBF16", SDT_NDS_FMV_X_ANYEXTBF16>;
26+
1327
//===----------------------------------------------------------------------===//
1428
// Operand and SDNode transformation definitions.
1529
//===----------------------------------------------------------------------===//
@@ -773,6 +787,25 @@ def : Pat<(bf16 (fpround FPR32:$rs)),
773787
(NDS_FCVT_BF16_S FPR32:$rs)>;
774788
} // Predicates = [HasVendorXAndesBFHCvt]
775789

790+
let isCodeGenOnly = 1 in {
791+
def NDS_FMV_BF16_X : FPUnaryOp_r<0b1111000, 0b00000, 0b000, FPR16, GPR, "fmv.w.x">,
792+
Sched<[WriteFMovI32ToF32, ReadFMovI32ToF32]>;
793+
def NDS_FMV_X_BF16 : FPUnaryOp_r<0b1110000, 0b00000, 0b000, GPR, FPR16, "fmv.x.w">,
794+
Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]>;
795+
}
796+
797+
let Predicates = [HasVendorXAndesBFHCvt] in {
798+
def : Pat<(riscv_nds_fmv_bf16_x GPR:$src), (NDS_FMV_BF16_X GPR:$src)>;
799+
def : Pat<(riscv_nds_fmv_x_anyextbf16 (bf16 FPR16:$src)),
800+
(NDS_FMV_X_BF16 (bf16 FPR16:$src))>;
801+
} // Predicates = [HasVendorXAndesBFHCvt]
802+
803+
// Use flh/fsh to load/store bf16 if zfh is enabled.
804+
let Predicates = [HasStdExtZfh, HasVendorXAndesBFHCvt] in {
805+
def : LdPat<load, FLH, bf16>;
806+
def : StPat<store, FSH, FPR16, bf16>;
807+
} // Predicates = [HasStdExtZfh, HasVendorXAndesBFHCvt]
808+
776809
let Predicates = [HasVendorXAndesVBFHCvt] in {
777810
defm PseudoNDS_VFWCVT_S_BF16 : VPseudoVWCVT_S_BF16;
778811
defm PseudoNDS_VFNCVT_BF16_S : VPseudoVNCVT_BF16_S;

llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -224,10 +224,10 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
224224
Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
225225
Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
226226
Value *BasePtr = Builder.CreatePtrAdd(Ptr, Offset);
227-
// Note: Same VL as above, but i32 not xlen due to signature of
228-
// vp.strided.load
229-
VL = Builder.CreateElementCount(Builder.getInt32Ty(),
230-
VTy->getElementCount());
227+
// For rv64, need to truncate i64 to i32 to match signature. As VL is at most
228+
// the number of active lanes (which is bounded by i32) this is safe.
229+
VL = Builder.CreateTrunc(VL, Builder.getInt32Ty());
230+
231231
CallInst *CI =
232232
Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
233233
{VTy, BasePtr->getType(), Stride->getType()},
@@ -302,10 +302,9 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
302302
Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
303303
Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes);
304304
Value *BasePtr = Builder.CreatePtrAdd(Ptr, Offset);
305-
// Note: Same VL as above, but i32 not xlen due to signature of
306-
// vp.strided.store
307-
VL = Builder.CreateElementCount(Builder.getInt32Ty(),
308-
VTy->getElementCount());
305+
// For rv64, need to truncate i64 to i32 to match signature. As VL is at
306+
// most the number of active lanes (which is bounded by i32) this is safe.
307+
VL = Builder.CreateTrunc(VL, Builder.getInt32Ty());
309308

310309
CallInst *CI =
311310
Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_store,

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,6 @@ static cl::opt<bool> EnablePostMISchedLoadStoreClustering(
104104
cl::desc("Enable PostRA load and store clustering in the machine scheduler"),
105105
cl::init(true));
106106

107-
static cl::opt<bool>
108-
EnableVLOptimizer("riscv-enable-vl-optimizer",
109-
cl::desc("Enable the RISC-V VL Optimizer pass"),
110-
cl::init(true), cl::Hidden);
111-
112107
static cl::opt<bool> DisableVectorMaskMutation(
113108
"riscv-disable-vector-mask-mutation",
114109
cl::desc("Disable the vector mask scheduling mutation"), cl::init(false),
@@ -617,8 +612,7 @@ void RISCVPassConfig::addPreRegAlloc() {
617612
addPass(createRISCVPreRAExpandPseudoPass());
618613
if (TM->getOptLevel() != CodeGenOptLevel::None) {
619614
addPass(createRISCVMergeBaseOffsetOptPass());
620-
if (EnableVLOptimizer)
621-
addPass(createRISCVVLOptimizerPass());
615+
addPass(createRISCVVLOptimizerPass());
622616
}
623617

624618
addPass(createRISCVInsertReadWriteCSRPass());

0 commit comments

Comments
 (0)