llvm
diff --git a/‎llvm/include/llvm/Analysis/TargetLibraryInfo.h‎
Lines changed: 1 addition & 19 deletions b/‎llvm/include/llvm/Analysis/TargetLibraryInfo.h‎
Lines changed: 1 addition & 19 deletions
diff --git a/‎llvm/include/llvm/IR/SystemLibraries.h‎
Lines changed: 39 additions & 0 deletions b/‎llvm/include/llvm/IR/SystemLibraries.h‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎llvm/lib/Analysis/TargetLibraryInfo.cpp‎
Lines changed: 10 additions & 32 deletions b/‎llvm/lib/Analysis/TargetLibraryInfo.cpp‎
Lines changed: 10 additions & 32 deletions
diff --git a/‎llvm/lib/Frontend/Driver/CodeGenOptions.cpp‎
Lines changed: 9 additions & 8 deletions b/‎llvm/lib/Frontend/Driver/CodeGenOptions.cpp‎
Lines changed: 9 additions & 8 deletions
diff --git a/‎llvm/lib/IR/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎llvm/lib/IR/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎llvm/lib/IR/SystemLibraries.cpp‎
Lines changed: 34 additions & 0 deletions b/‎llvm/lib/IR/SystemLibraries.cpp‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp‎
Lines changed: 73 additions & 13 deletions b/‎llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp‎
Lines changed: 73 additions & 13 deletions
diff --git a/‎llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll‎
Lines changed: 3 additions & 4 deletions b/‎llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll‎
Lines changed: 3 additions & 4 deletions
@@ -23,6 +23,7 @@
 namespace llvm {
 
 template <typename T> class ArrayRef;
+enum class VectorLibrary;
 
 /// Provides info so a possible vectorization of a function can be
 /// computed. Function 'VectorFnName' is equivalent to 'ScalarFnName'
@@ -117,25 +118,6 @@ class TargetLibraryInfoImpl {
                                        const Module &M) const;
 
 public:
-  /// List of known vector-functions libraries.
-  ///
-  /// The vector-functions library defines, which functions are vectorizable
-  /// and with which factor. The library can be specified by either frontend,
-  /// or a commandline option, and then used by
-  /// addVectorizableFunctionsFromVecLib for filling up the tables of
-  /// vectorizable functions.
-  enum VectorLibrary {
-    NoLibrary,        // Don't use any vector library.
-    Accelerate,       // Use Accelerate framework.
-    DarwinLibSystemM, // Use Darwin's libsystem_m.
-    LIBMVEC,          // GLIBC Vector Math library.
-    MASSV,            // IBM MASS vector library.
-    SVML,             // Intel short vector math library.
-    SLEEFGNUABI, // SLEEF - SIMD Library for Evaluating Elementary Functions.
-    ArmPL,       // Arm Performance Libraries.
-    AMDLIBM      // AMD Math Vector library.
-  };
-
   TargetLibraryInfoImpl() = delete;
   LLVM_ABI explicit TargetLibraryInfoImpl(const Triple &T);
 
 
@@ -0,0 +1,39 @@
+//===------------------------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_SYSTEMLIBRARIES_H
+#define LLVM_IR_SYSTEMLIBRARIES_H
+
+namespace llvm {
+/// List of known vector-functions libraries.
+///
+/// The vector-functions library defines, which functions are vectorizable
+/// and with which factor. The library can be specified by either frontend,
+/// or a commandline option, and then used by
+/// addVectorizableFunctionsFromVecLib for filling up the tables of
+/// vectorizable functions.
+enum class VectorLibrary {
+  NoLibrary,        // Don't use any vector library.
+  Accelerate,       // Use Accelerate framework.
+  DarwinLibSystemM, // Use Darwin's libsystem_m.
+  LIBMVEC,          // GLIBC Vector Math library.
+  MASSV,            // IBM MASS vector library.
+  SVML,             // Intel short vector math library.
+  SLEEFGNUABI,      // SLEEF - SIMD Library for Evaluating Elementary Functions.
+  ArmPL,            // Arm Performance Libraries.
+  AMDLIBM           // AMD Math Vector library.
+};
+
+/// Command line flag value for the vector math library to use
+///
+/// FIXME: This should come from a module flag, and not be mutually exclusive
+extern VectorLibrary ClVectorLibrary;
+
+} // namespace llvm
+
+#endif // LLVM_IR_SYSTEMLIBRARIES_H
@@ -15,33 +15,11 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/SystemLibraries.h"
 #include "llvm/InitializePasses.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/TargetParser/Triple.h"
 using namespace llvm;
 
-static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
-    "vector-library", cl::Hidden, cl::desc("Vector functions library"),
-    cl::init(TargetLibraryInfoImpl::NoLibrary),
-    cl::values(clEnumValN(TargetLibraryInfoImpl::NoLibrary, "none",
-                          "No vector functions library"),
-               clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
-                          "Accelerate framework"),
-               clEnumValN(TargetLibraryInfoImpl::DarwinLibSystemM,
-                          "Darwin_libsystem_m", "Darwin libsystem_m"),
-               clEnumValN(TargetLibraryInfoImpl::LIBMVEC, "LIBMVEC",
-                          "GLIBC Vector Math library"),
-               clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV",
-                          "IBM MASS vector library"),
-               clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
-                          "Intel SVML library"),
-               clEnumValN(TargetLibraryInfoImpl::SLEEFGNUABI, "sleefgnuabi",
-                          "SIMD Library for Evaluating Elementary Functions"),
-               clEnumValN(TargetLibraryInfoImpl::ArmPL, "ArmPL",
-                          "Arm Performance Libraries"),
-               clEnumValN(TargetLibraryInfoImpl::AMDLIBM, "AMDLIBM",
-                          "AMD vector math library")));
-
 StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] =
     {
 #define TLI_DEFINE_STRING
@@ -1392,15 +1370,15 @@ const VecDesc VecFuncs_AMDLIBM[] = {
 void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
     enum VectorLibrary VecLib, const llvm::Triple &TargetTriple) {
   switch (VecLib) {
-  case Accelerate: {
+  case VectorLibrary::Accelerate: {
     addVectorizableFunctions(VecFuncs_Accelerate);
     break;
   }
-  case DarwinLibSystemM: {
+  case VectorLibrary::DarwinLibSystemM: {
     addVectorizableFunctions(VecFuncs_DarwinLibSystemM);
     break;
   }
-  case LIBMVEC: {
+  case VectorLibrary::LIBMVEC: {
     switch (TargetTriple.getArch()) {
     default:
       break;
@@ -1415,15 +1393,15 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
     }
     break;
   }
-  case MASSV: {
+  case VectorLibrary::MASSV: {
     addVectorizableFunctions(VecFuncs_MASSV);
     break;
   }
-  case SVML: {
+  case VectorLibrary::SVML: {
     addVectorizableFunctions(VecFuncs_SVML);
     break;
   }
-  case SLEEFGNUABI: {
+  case VectorLibrary::SLEEFGNUABI: {
     switch (TargetTriple.getArch()) {
     default:
       break;
@@ -1439,7 +1417,7 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
     }
     break;
   }
-  case ArmPL: {
+  case VectorLibrary::ArmPL: {
     switch (TargetTriple.getArch()) {
     default:
       break;
@@ -1450,11 +1428,11 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
     }
     break;
   }
-  case AMDLIBM: {
+  case VectorLibrary::AMDLIBM: {
     addVectorizableFunctions(VecFuncs_AMDLIBM);
     break;
   }
-  case NoLibrary:
+  case VectorLibrary::NoLibrary:
     break;
   }
 }
 
@@ -8,6 +8,7 @@
 
 #include "llvm/Frontend/Driver/CodeGenOptions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/SystemLibraries.h"
 #include "llvm/ProfileData/InstrProfCorrelator.h"
 #include "llvm/TargetParser/Triple.h"
 
@@ -25,35 +26,35 @@ TargetLibraryInfoImpl *createTLII(const llvm::Triple &TargetTriple,
   using VectorLibrary = llvm::driver::VectorLibrary;
   switch (Veclib) {
   case VectorLibrary::Accelerate:
-    TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate,
+    TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::Accelerate,
                                              TargetTriple);
     break;
   case VectorLibrary::LIBMVEC:
-    TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::LIBMVEC,
+    TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::LIBMVEC,
                                              TargetTriple);
     break;
   case VectorLibrary::MASSV:
-    TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV,
+    TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::MASSV,
                                              TargetTriple);
     break;
   case VectorLibrary::SVML:
-    TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML,
+    TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::SVML,
                                              TargetTriple);
     break;
   case VectorLibrary::SLEEF:
-    TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SLEEFGNUABI,
+    TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::SLEEFGNUABI,
                                              TargetTriple);
     break;
   case VectorLibrary::Darwin_libsystem_m:
     TLII->addVectorizableFunctionsFromVecLib(
-        TargetLibraryInfoImpl::DarwinLibSystemM, TargetTriple);
+        llvm::VectorLibrary::DarwinLibSystemM, TargetTriple);
     break;
   case VectorLibrary::ArmPL:
-    TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::ArmPL,
+    TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::ArmPL,
                                              TargetTriple);
     break;
   case VectorLibrary::AMDLIBM:
-    TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::AMDLIBM,
+    TLII->addVectorizableFunctionsFromVecLib(llvm::VectorLibrary::AMDLIBM,
                                              TargetTriple);
     break;
   default:
 
@@ -67,6 +67,7 @@ add_llvm_component_library(LLVMCore
   ReplaceConstant.cpp
   Statepoint.cpp
   StructuralHash.cpp
+  SystemLibraries.cpp
   Type.cpp
   TypedPointerType.cpp
   TypeFinder.cpp
 
@@ -0,0 +1,34 @@
+//===-----------------------------------------------------------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/SystemLibraries.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+VectorLibrary llvm::ClVectorLibrary;
+
+static cl::opt<VectorLibrary, true> ClVectorLibraryOpt(
+    "vector-library", cl::Hidden, cl::desc("Vector functions library"),
+    cl::location(llvm::ClVectorLibrary), cl::init(VectorLibrary::NoLibrary),
+    cl::values(
+        clEnumValN(VectorLibrary::NoLibrary, "none",
+                   "No vector functions library"),
+        clEnumValN(VectorLibrary::Accelerate, "Accelerate",
+                   "Accelerate framework"),
+        clEnumValN(VectorLibrary::DarwinLibSystemM, "Darwin_libsystem_m",
+                   "Darwin libsystem_m"),
+        clEnumValN(VectorLibrary::LIBMVEC, "LIBMVEC",
+                   "GLIBC Vector Math library"),
+        clEnumValN(VectorLibrary::MASSV, "MASSV", "IBM MASS vector library"),
+        clEnumValN(VectorLibrary::SVML, "SVML", "Intel SVML library"),
+        clEnumValN(VectorLibrary::SLEEFGNUABI, "sleefgnuabi",
+                   "SIMD Library for Evaluating Elementary Functions"),
+        clEnumValN(VectorLibrary::ArmPL, "ArmPL", "Arm Performance Libraries"),
+        clEnumValN(VectorLibrary::AMDLIBM, "AMDLIBM",
+                   "AMD vector math library")));
@@ -1123,24 +1123,85 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   }
 }
 
-// FORM_TRANSPOSED_REG_TUPLE nodes are created to improve register allocation
-// where a consecutive multi-vector tuple is constructed from the same indices
-// of multiple strided loads. This may still result in unnecessary copies
-// between the loads and the tuple. Here we try to return a hint to assign the
-// contiguous ZPRMulReg starting at the same register as the first operand of
-// the pseudo, which should be a subregister of the first strided load.
+// We add regalloc hints for different cases:
+// * Choosing a better destination operand for predicated SVE instructions
+//   where the inactive lanes are undef, by choosing a register that is not
+//   unique to the other operands of the instruction.
 //
-// For example, if the first strided load has been assigned $z16_z20_z24_z28
-// and the operands of the pseudo are each accessing subregister zsub2, we
-// should look through through Order to find a contiguous register which
-// begins with $z24 (i.e. $z24_z25_z26_z27).
+// * Improve register allocation for SME multi-vector instructions where we can
+//   benefit from the strided- and contiguous register multi-vector tuples.
 //
+//   Here FORM_TRANSPOSED_REG_TUPLE nodes are created to improve register
+//   allocation where a consecutive multi-vector tuple is constructed from the
+//   same indices of multiple strided loads. This may still result in
+//   unnecessary copies between the loads and the tuple. Here we try to return a
+//   hint to assign the contiguous ZPRMulReg starting at the same register as
+//   the first operand of the pseudo, which should be a subregister of the first
+//   strided load.
+//
+//   For example, if the first strided load has been assigned $z16_z20_z24_z28
+//   and the operands of the pseudo are each accessing subregister zsub2, we
+//   should look through through Order to find a contiguous register which
+//   begins with $z24 (i.e. $z24_z25_z26_z27).
 bool AArch64RegisterInfo::getRegAllocationHints(
     Register VirtReg, ArrayRef<MCPhysReg> Order,
     SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF,
     const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
-
   auto &ST = MF.getSubtarget<AArch64Subtarget>();
+  const AArch64InstrInfo *TII =
+      MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  // For predicated SVE instructions where the inactive lanes are undef,
+  // pick a destination register that is not unique to avoid introducing
+  // a movprfx.
+  const TargetRegisterClass *RegRC = MRI.getRegClass(VirtReg);
+  if (AArch64::ZPRRegClass.hasSubClassEq(RegRC)) {
+    for (const MachineOperand &DefOp : MRI.def_operands(VirtReg)) {
+      const MachineInstr &Def = *DefOp.getParent();
+      if (DefOp.isImplicit() ||
+          (TII->get(Def.getOpcode()).TSFlags & AArch64::FalseLanesMask) !=
+              AArch64::FalseLanesUndef)
+        continue;
+
+      unsigned InstFlags =
+          TII->get(AArch64::getSVEPseudoMap(Def.getOpcode())).TSFlags;
+
+      for (MCPhysReg R : Order) {
+        auto AddHintIfSuitable = [&](MCPhysReg R, const MachineOperand &MO) {
+          // R is a suitable register hint if there exists an operand for the
+          // instruction that is not yet allocated a register or if R matches
+          // one of the other source operands.
+          if (!VRM->hasPhys(MO.getReg()) || VRM->getPhys(MO.getReg()) == R)
+            Hints.push_back(R);
+        };
+
+        switch (InstFlags & AArch64::DestructiveInstTypeMask) {
+        default:
+          break;
+        case AArch64::DestructiveTernaryCommWithRev:
+          AddHintIfSuitable(R, Def.getOperand(2));
+          AddHintIfSuitable(R, Def.getOperand(3));
+          AddHintIfSuitable(R, Def.getOperand(4));
+          break;
+        case AArch64::DestructiveBinaryComm:
+        case AArch64::DestructiveBinaryCommWithRev:
+          AddHintIfSuitable(R, Def.getOperand(2));
+          AddHintIfSuitable(R, Def.getOperand(3));
+          break;
+        case AArch64::DestructiveBinary:
+        case AArch64::DestructiveBinaryImm:
+          AddHintIfSuitable(R, Def.getOperand(2));
+          break;
+        }
+      }
+    }
+
+    if (Hints.size())
+      return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints,
+                                                       MF, VRM);
+  }
+
   if (!ST.hasSME() || !ST.isStreaming())
     return TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF,
                                                      VRM);
@@ -1153,8 +1214,7 @@ bool AArch64RegisterInfo::getRegAllocationHints(
   // FORM_TRANSPOSED_REG_TUPLE pseudo, we want to favour reducing copy
   // instructions over reducing the number of clobbered callee-save registers,
   // so we add the strided registers as a hint.
-  const MachineRegisterInfo &MRI = MF.getRegInfo();
-  unsigned RegID = MRI.getRegClass(VirtReg)->getID();
+  unsigned RegID = RegRC->getID();
   if (RegID == AArch64::ZPR2StridedOrContiguousRegClassID ||
       RegID == AArch64::ZPR4StridedOrContiguousRegClassID) {
 
 
@@ -52,12 +52,11 @@ define <2 x i64> @test_mul_sub_2x64_2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c,
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q3 killed $q3 def $z3
 ; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT:    // kill: def $q3 killed $q3 def $z3
 ; CHECK-NEXT:    sdiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    movprfx z1, z2
-; CHECK-NEXT:    mul z1.d, p0/m, z1.d, z3.d
-; CHECK-NEXT:    sub v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    mul z2.d, p0/m, z2.d, z3.d
+; CHECK-NEXT:    sub v0.2d, v2.2d, v0.2d
 ; CHECK-NEXT:    ret
   %div = sdiv <2 x i64> %a, %b
   %mul = mul <2 x i64> %c, %d