Skip to content

Commit 32f6850

Browse files
authored
Merge branch 'main' into pr/glueged
2 parents 36f7a5d + caed089 commit 32f6850

36 files changed

+511
-586
lines changed

llvm/include/llvm/Analysis/TargetLibraryInfo.h

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
namespace llvm {
2424

2525
template <typename T> class ArrayRef;
26+
enum class VectorLibrary;
2627

2728
/// Provides info so a possible vectorization of a function can be
2829
/// computed. Function 'VectorFnName' is equivalent to 'ScalarFnName'
@@ -117,25 +118,6 @@ class TargetLibraryInfoImpl {
117118
const Module &M) const;
118119

119120
public:
120-
/// List of known vector-functions libraries.
121-
///
122-
/// The vector-functions library defines, which functions are vectorizable
123-
/// and with which factor. The library can be specified by either frontend,
124-
/// or a commandline option, and then used by
125-
/// addVectorizableFunctionsFromVecLib for filling up the tables of
126-
/// vectorizable functions.
127-
enum VectorLibrary {
128-
NoLibrary, // Don't use any vector library.
129-
Accelerate, // Use Accelerate framework.
130-
DarwinLibSystemM, // Use Darwin's libsystem_m.
131-
LIBMVEC, // GLIBC Vector Math library.
132-
MASSV, // IBM MASS vector library.
133-
SVML, // Intel short vector math library.
134-
SLEEFGNUABI, // SLEEF - SIMD Library for Evaluating Elementary Functions.
135-
ArmPL, // Arm Performance Libraries.
136-
AMDLIBM // AMD Math Vector library.
137-
};
138-
139121
TargetLibraryInfoImpl() = delete;
140122
LLVM_ABI explicit TargetLibraryInfoImpl(const Triple &T);
141123

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
//===------------------------------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_IR_SYSTEMLIBRARIES_H
10+
#define LLVM_IR_SYSTEMLIBRARIES_H
11+
12+
namespace llvm {
13+
/// List of known vector-functions libraries.
14+
///
15+
/// The vector-functions library defines, which functions are vectorizable
16+
/// and with which factor. The library can be specified by either frontend,
17+
/// or a commandline option, and then used by
18+
/// addVectorizableFunctionsFromVecLib for filling up the tables of
19+
/// vectorizable functions.
20+
enum class VectorLibrary {
21+
NoLibrary, // Don't use any vector library.
22+
Accelerate, // Use Accelerate framework.
23+
DarwinLibSystemM, // Use Darwin's libsystem_m.
24+
LIBMVEC, // GLIBC Vector Math library.
25+
MASSV, // IBM MASS vector library.
26+
SVML, // Intel short vector math library.
27+
SLEEFGNUABI, // SLEEF - SIMD Library for Evaluating Elementary Functions.
28+
ArmPL, // Arm Performance Libraries.
29+
AMDLIBM // AMD Math Vector library.
30+
};
31+
32+
/// Command line flag value for the vector math library to use
33+
///
34+
/// FIXME: This should come from a module flag, and not be mutually exclusive
35+
extern VectorLibrary ClVectorLibrary;
36+
37+
} // namespace llvm
38+
39+
#endif // LLVM_IR_SYSTEMLIBRARIES_H

llvm/lib/Analysis/TargetLibraryInfo.cpp

Lines changed: 10 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -15,33 +15,11 @@
1515
#include "llvm/ADT/SmallString.h"
1616
#include "llvm/IR/Constants.h"
1717
#include "llvm/IR/Module.h"
18+
#include "llvm/IR/SystemLibraries.h"
1819
#include "llvm/InitializePasses.h"
19-
#include "llvm/Support/CommandLine.h"
2020
#include "llvm/TargetParser/Triple.h"
2121
using namespace llvm;
2222

23-
static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
24-
"vector-library", cl::Hidden, cl::desc("Vector functions library"),
25-
cl::init(TargetLibraryInfoImpl::NoLibrary),
26-
cl::values(clEnumValN(TargetLibraryInfoImpl::NoLibrary, "none",
27-
"No vector functions library"),
28-
clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
29-
"Accelerate framework"),
30-
clEnumValN(TargetLibraryInfoImpl::DarwinLibSystemM,
31-
"Darwin_libsystem_m", "Darwin libsystem_m"),
32-
clEnumValN(TargetLibraryInfoImpl::LIBMVEC, "LIBMVEC",
33-
"GLIBC Vector Math library"),
34-
clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV",
35-
"IBM MASS vector library"),
36-
clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
37-
"Intel SVML library"),
38-
clEnumValN(TargetLibraryInfoImpl::SLEEFGNUABI, "sleefgnuabi",
39-
"SIMD Library for Evaluating Elementary Functions"),
40-
clEnumValN(TargetLibraryInfoImpl::ArmPL, "ArmPL",
41-
"Arm Performance Libraries"),
42-
clEnumValN(TargetLibraryInfoImpl::AMDLIBM, "AMDLIBM",
43-
"AMD vector math library")));
44-
4523
StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] =
4624
{
4725
#define TLI_DEFINE_STRING
@@ -1392,15 +1370,15 @@ const VecDesc VecFuncs_AMDLIBM[] = {
13921370
void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
13931371
enum VectorLibrary VecLib, const llvm::Triple &TargetTriple) {
13941372
switch (VecLib) {
1395-
case Accelerate: {
1373+
case VectorLibrary::Accelerate: {
13961374
addVectorizableFunctions(VecFuncs_Accelerate);
13971375
break;
13981376
}
1399-
case DarwinLibSystemM: {
1377+
case VectorLibrary::DarwinLibSystemM: {
14001378
addVectorizableFunctions(VecFuncs_DarwinLibSystemM);
14011379
break;
14021380
}
1403-
case LIBMVEC: {
1381+
case VectorLibrary::LIBMVEC: {
14041382
switch (TargetTriple.getArch()) {
14051383
default:
14061384
break;
@@ -1415,15 +1393,15 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
14151393
}
14161394
break;
14171395
}
1418-
case MASSV: {
1396+
case VectorLibrary::MASSV: {
14191397
addVectorizableFunctions(VecFuncs_MASSV);
14201398
break;
14211399
}
1422-
case SVML: {
1400+
case VectorLibrary::SVML: {
14231401
addVectorizableFunctions(VecFuncs_SVML);
14241402
break;
14251403
}
1426-
case SLEEFGNUABI: {
1404+
case VectorLibrary::SLEEFGNUABI: {
14271405
switch (TargetTriple.getArch()) {
14281406
default:
14291407
break;
@@ -1439,7 +1417,7 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
14391417
}
14401418
break;
14411419
}
1442-
case ArmPL: {
1420+
case VectorLibrary::ArmPL: {
14431421
switch (TargetTriple.getArch()) {
14441422
default:
14451423
break;
@@ -1450,11 +1428,11 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
14501428
}
14511429
break;
14521430
}
1453-
case AMDLIBM: {
1431+
case VectorLibrary::AMDLIBM: {
14541432
addVectorizableFunctions(VecFuncs_AMDLIBM);
14551433
break;
14561434
}
1457-
case NoLibrary:
1435+
case VectorLibrary::NoLibrary:
14581436
break;
14591437
}
14601438
}

llvm/lib/Frontend/Driver/CodeGenOptions.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include "llvm/Frontend/Driver/CodeGenOptions.h"
1010
#include "llvm/Analysis/TargetLibraryInfo.h"
11+
#include "llvm/IR/SystemLibraries.h"
1112
#include "llvm/ProfileData/InstrProfCorrelator.h"
1213
#include "llvm/TargetParser/Triple.h"
1314

@@ -25,35 +26,35 @@ TargetLibraryInfoImpl *createTLII(const llvm::Triple &TargetTriple,
2526
using VectorLibrary = llvm::driver::VectorLibrary;
2627
switch (Veclib) {
2728
case VectorLibrary::Accelerate:
28-
TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate,
29+
TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::Accelerate,
2930
TargetTriple);
3031
break;
3132
case VectorLibrary::LIBMVEC:
32-
TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::LIBMVEC,
33+
TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::LIBMVEC,
3334
TargetTriple);
3435
break;
3536
case VectorLibrary::MASSV:
36-
TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV,
37+
TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::MASSV,
3738
TargetTriple);
3839
break;
3940
case VectorLibrary::SVML:
40-
TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML,
41+
TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::SVML,
4142
TargetTriple);
4243
break;
4344
case VectorLibrary::SLEEF:
44-
TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SLEEFGNUABI,
45+
TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::SLEEFGNUABI,
4546
TargetTriple);
4647
break;
4748
case VectorLibrary::Darwin_libsystem_m:
4849
TLII->addVectorizableFunctionsFromVecLib(
49-
TargetLibraryInfoImpl::DarwinLibSystemM, TargetTriple);
50+
llvm::VectorLibrary::DarwinLibSystemM, TargetTriple);
5051
break;
5152
case VectorLibrary::ArmPL:
52-
TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::ArmPL,
53+
TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::ArmPL,
5354
TargetTriple);
5455
break;
5556
case VectorLibrary::AMDLIBM:
56-
TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::AMDLIBM,
57+
TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::AMDLIBM,
5758
TargetTriple);
5859
break;
5960
default:

llvm/lib/IR/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ add_llvm_component_library(LLVMCore
6767
ReplaceConstant.cpp
6868
Statepoint.cpp
6969
StructuralHash.cpp
70+
SystemLibraries.cpp
7071
Type.cpp
7172
TypedPointerType.cpp
7273
TypeFinder.cpp

llvm/lib/IR/SystemLibraries.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
//===-----------------------------------------------------------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/IR/SystemLibraries.h"
10+
#include "llvm/Support/CommandLine.h"
11+
12+
using namespace llvm;
13+
14+
VectorLibrary llvm::ClVectorLibrary;
15+
16+
static cl::opt<VectorLibrary, true> ClVectorLibraryOpt(
17+
"vector-library", cl::Hidden, cl::desc("Vector functions library"),
18+
cl::location(llvm::ClVectorLibrary), cl::init(VectorLibrary::NoLibrary),
19+
cl::values(
20+
clEnumValN(VectorLibrary::NoLibrary, "none",
21+
"No vector functions library"),
22+
clEnumValN(VectorLibrary::Accelerate, "Accelerate",
23+
"Accelerate framework"),
24+
clEnumValN(VectorLibrary::DarwinLibSystemM, "Darwin_libsystem_m",
25+
"Darwin libsystem_m"),
26+
clEnumValN(VectorLibrary::LIBMVEC, "LIBMVEC",
27+
"GLIBC Vector Math library"),
28+
clEnumValN(VectorLibrary::MASSV, "MASSV", "IBM MASS vector library"),
29+
clEnumValN(VectorLibrary::SVML, "SVML", "Intel SVML library"),
30+
clEnumValN(VectorLibrary::SLEEFGNUABI, "sleefgnuabi",
31+
"SIMD Library for Evaluating Elementary Functions"),
32+
clEnumValN(VectorLibrary::ArmPL, "ArmPL", "Arm Performance Libraries"),
33+
clEnumValN(VectorLibrary::AMDLIBM, "AMDLIBM",
34+
"AMD vector math library")));

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 73 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1123,24 +1123,85 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
11231123
}
11241124
}
11251125

1126-
// FORM_TRANSPOSED_REG_TUPLE nodes are created to improve register allocation
1127-
// where a consecutive multi-vector tuple is constructed from the same indices
1128-
// of multiple strided loads. This may still result in unnecessary copies
1129-
// between the loads and the tuple. Here we try to return a hint to assign the
1130-
// contiguous ZPRMulReg starting at the same register as the first operand of
1131-
// the pseudo, which should be a subregister of the first strided load.
1126+
// We add regalloc hints for different cases:
1127+
// * Choosing a better destination operand for predicated SVE instructions
1128+
// where the inactive lanes are undef, by choosing a register that is not
1129+
// unique to the other operands of the instruction.
11321130
//
1133-
// For example, if the first strided load has been assigned $z16_z20_z24_z28
1134-
// and the operands of the pseudo are each accessing subregister zsub2, we
1135-
// should look through through Order to find a contiguous register which
1136-
// begins with $z24 (i.e. $z24_z25_z26_z27).
1131+
// * Improve register allocation for SME multi-vector instructions where we can
1132+
// benefit from the strided- and contiguous register multi-vector tuples.
11371133
//
1134+
// Here FORM_TRANSPOSED_REG_TUPLE nodes are created to improve register
1135+
// allocation where a consecutive multi-vector tuple is constructed from the
1136+
// same indices of multiple strided loads. This may still result in
1137+
// unnecessary copies between the loads and the tuple. Here we try to return a
1138+
// hint to assign the contiguous ZPRMulReg starting at the same register as
1139+
// the first operand of the pseudo, which should be a subregister of the first
1140+
// strided load.
1141+
//
1142+
// For example, if the first strided load has been assigned $z16_z20_z24_z28
1143+
// and the operands of the pseudo are each accessing subregister zsub2, we
1144+
// should look through through Order to find a contiguous register which
1145+
// begins with $z24 (i.e. $z24_z25_z26_z27).
11381146
bool AArch64RegisterInfo::getRegAllocationHints(
11391147
Register VirtReg, ArrayRef<MCPhysReg> Order,
11401148
SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF,
11411149
const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
1142-
11431150
auto &ST = MF.getSubtarget<AArch64Subtarget>();
1151+
const AArch64InstrInfo *TII =
1152+
MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
1153+
const MachineRegisterInfo &MRI = MF.getRegInfo();
1154+
1155+
// For predicated SVE instructions where the inactive lanes are undef,
1156+
// pick a destination register that is not unique to avoid introducing
1157+
// a movprfx.
1158+
const TargetRegisterClass *RegRC = MRI.getRegClass(VirtReg);
1159+
if (AArch64::ZPRRegClass.hasSubClassEq(RegRC)) {
1160+
for (const MachineOperand &DefOp : MRI.def_operands(VirtReg)) {
1161+
const MachineInstr &Def = *DefOp.getParent();
1162+
if (DefOp.isImplicit() ||
1163+
(TII->get(Def.getOpcode()).TSFlags & AArch64::FalseLanesMask) !=
1164+
AArch64::FalseLanesUndef)
1165+
continue;
1166+
1167+
unsigned InstFlags =
1168+
TII->get(AArch64::getSVEPseudoMap(Def.getOpcode())).TSFlags;
1169+
1170+
for (MCPhysReg R : Order) {
1171+
auto AddHintIfSuitable = [&](MCPhysReg R, const MachineOperand &MO) {
1172+
// R is a suitable register hint if there exists an operand for the
1173+
// instruction that is not yet allocated a register or if R matches
1174+
// one of the other source operands.
1175+
if (!VRM->hasPhys(MO.getReg()) || VRM->getPhys(MO.getReg()) == R)
1176+
Hints.push_back(R);
1177+
};
1178+
1179+
switch (InstFlags & AArch64::DestructiveInstTypeMask) {
1180+
default:
1181+
break;
1182+
case AArch64::DestructiveTernaryCommWithRev:
1183+
AddHintIfSuitable(R, Def.getOperand(2));
1184+
AddHintIfSuitable(R, Def.getOperand(3));
1185+
AddHintIfSuitable(R, Def.getOperand(4));
1186+
break;
1187+
case AArch64::DestructiveBinaryComm:
1188+
case AArch64::DestructiveBinaryCommWithRev:
1189+
AddHintIfSuitable(R, Def.getOperand(2));
1190+
AddHintIfSuitable(R, Def.getOperand(3));
1191+
break;
1192+
case AArch64::DestructiveBinary:
1193+
case AArch64::DestructiveBinaryImm:
1194+
AddHintIfSuitable(R, Def.getOperand(2));
1195+
break;
1196+
}
1197+
}
1198+
}
1199+
1200+
if (Hints.size())
1201+
return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints,
1202+
MF, VRM);
1203+
}
1204+
11441205
if (!ST.hasSME() || !ST.isStreaming())
11451206
return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF,
11461207
VRM);
@@ -1153,8 +1214,7 @@ bool AArch64RegisterInfo::getRegAllocationHints(
11531214
// FORM_TRANSPOSED_REG_TUPLE pseudo, we want to favour reducing copy
11541215
// instructions over reducing the number of clobbered callee-save registers,
11551216
// so we add the strided registers as a hint.
1156-
const MachineRegisterInfo &MRI = MF.getRegInfo();
1157-
unsigned RegID = MRI.getRegClass(VirtReg)->getID();
1217+
unsigned RegID = RegRC->getID();
11581218
if (RegID == AArch64::ZPR2StridedOrContiguousRegClassID ||
11591219
RegID == AArch64::ZPR4StridedOrContiguousRegClassID) {
11601220

llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,11 @@ define <2 x i64> @test_mul_sub_2x64_2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c,
5252
; CHECK-NEXT: ptrue p0.d, vl2
5353
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
5454
; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
55-
; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3
5655
; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2
56+
; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3
5757
; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d
58-
; CHECK-NEXT: movprfx z1, z2
59-
; CHECK-NEXT: mul z1.d, p0/m, z1.d, z3.d
60-
; CHECK-NEXT: sub v0.2d, v1.2d, v0.2d
58+
; CHECK-NEXT: mul z2.d, p0/m, z2.d, z3.d
59+
; CHECK-NEXT: sub v0.2d, v2.2d, v0.2d
6160
; CHECK-NEXT: ret
6261
%div = sdiv <2 x i64> %a, %b
6362
%mul = mul <2 x i64> %c, %d

0 commit comments

Comments
 (0)