llvm · tschuett · Nov 1, 2024 · Nov 1, 2024 · Nov 1, 2024 · Nov 2, 2024
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -267,6 +267,9 @@ class LegalizationArtifactCombiner {
     const LLT DstTy = MRI.getType(DstReg);
     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
 
+    if (DstTy.isScalableVector())
+      return false;
+
     // Try to fold trunc(g_constant) when the smaller constant type is legal.
     auto *SrcMI = MRI.getVRegDef(SrcReg);
     if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -196,6 +196,8 @@ LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0,
 
 LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
   return [=](const LegalityQuery &Query) {
+    if (Query.MMODescrs[MMOIdx].MemoryTy.isScalableVector())
+      return true;
     return !llvm::has_single_bit<uint32_t>(
         Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes());
   };

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -61,11 +61,19 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   const LLT v2s64 = LLT::fixed_vector(2, 64);
   const LLT v2p0 = LLT::fixed_vector(2, p0);
 
+  // 128 bit
   const LLT nxv16s8 = LLT::scalable_vector(16, s8);
   const LLT nxv8s16 = LLT::scalable_vector(8, s16);
   const LLT nxv4s32 = LLT::scalable_vector(4, s32);
   const LLT nxv2s64 = LLT::scalable_vector(2, s64);
 
+  // 64 bit
+  const LLT nxv4s16 = LLT::scalable_vector(4, s16);
+  const LLT nxv2s32 = LLT::scalable_vector(2, s32);
+
+  // 32 bit
+  const LLT nxv2s16 = LLT::scalable_vector(2, s16);
+
   std::initializer_list<LLT> PackedVectorAllTypeList = {/* Begin 128bit types */
                                                         v16s8, v8s16, v4s32,
                                                         v2s64, v2p0,
@@ -385,7 +393,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
                                  {v8s16, p0, s128, 8},
                                  {v2s32, p0, s64, 8},
                                  {v4s32, p0, s128, 8},
-                                 {v2s64, p0, s128, 8}})
+                                 {v2s64, p0, s128, 8},
+                                 // SVE vscale x 64 bit base sizes
+                                 {nxv4s16, p0, nxv4s16, 8}})
       // These extends are also legal
       .legalForTypesWithMemDesc(
           {{s32, p0, s8, 8}, {s32, p0, s16, 8}, {s64, p0, s32, 8}})
@@ -442,16 +452,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
            {p0, p0, s64, 8},    {s128, p0, s128, 8},  {v16s8, p0, s128, 8},
            {v8s8, p0, s64, 8},  {v4s16, p0, s64, 8},  {v8s16, p0, s128, 8},
            {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
-      .legalForTypesWithMemDesc({
-          // SVE vscale x 128 bit base sizes
-          // TODO: Add nxv2p0. Consider bitcastIf.
-          //       See #92130
-          // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
-          {nxv16s8, p0, nxv16s8, 8},
-          {nxv8s16, p0, nxv8s16, 8},
-          {nxv4s32, p0, nxv4s32, 8},
-          {nxv2s64, p0, nxv2s64, 8},
-      })
+      .legalForTypesWithMemDesc(
+          {// SVE vscale x 128 bit base sizes
+           // TODO: Add nxv2p0. Consider bitcastIf.
+           //       See #92130
+           // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461
+           {nxv16s8, p0, nxv16s8, 8},
+           {nxv8s16, p0, nxv8s16, 8},
+           {nxv4s32, p0, nxv4s32, 8},
+           {nxv2s64, p0, nxv2s64, 8},
+           // SVE vscale x 64 bit base sizes
+           {nxv2s32, p0, nxv2s32, 8},
+           {nxv4s16, p0, nxv4s16, 8},
+           // SVE vscale x 32 bit base sizes
+           {nxv2s16, p0, nxv2s16, 8}})
       .clampScalar(0, s8, s64)
       .lowerIf([=](const LegalityQuery &Query) {
         return Query.Types[0].isScalar() &&
@@ -639,17 +653,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   getActionDefinitionsBuilder(G_TRUNC)
       .legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
+      .legalFor(HasSVE, {{nxv4s16, nxv4s32}})
       .moreElementsToNextPow2(0)
       .clampMaxNumElements(0, s8, 8)
       .clampMaxNumElements(0, s16, 4)
       .clampMaxNumElements(0, s32, 2)
       .minScalarOrEltIf(
-          [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
+          [=](const LegalityQuery &Query) {
+            return Query.Types[0].isFixedVector();
+          },
           0, s8)
       .lowerIf([=](const LegalityQuery &Query) {
         LLT DstTy = Query.Types[0];
         LLT SrcTy = Query.Types[1];
-        return DstTy.isVector() && SrcTy.getSizeInBits() > 128 &&
+        return DstTy.isFixedVector() && SrcTy.getSizeInBits() > 128 &&
                DstTy.getScalarSizeInBits() * 2 <= SrcTy.getScalarSizeInBits();
       })
       .clampMinNumElements(0, s8, 8)
@@ -1317,6 +1334,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
       .legalFor({{v8s8, v16s8}, {v4s16, v8s16}, {v2s32, v4s32}})
+      .legalFor(HasSVE, {{nxv2s16, nxv4s16},
+                         {nxv2s16, nxv8s16},
+                         {nxv4s16, nxv8s16},
+                         {nxv2s32, nxv4s32}})
       .widenScalarOrEltToNextPow2(0)
       .immIdx(0); // Inform verifier imm idx 0 is handled.
 

diff --git a/llvm/test/CodeGen/AArch64/extract_subvector.ll b/llvm/test/CodeGen/AArch64/extract_subvector.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve  | FileCheck %s
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s
+
+define void @extract_nxv2i32_nxv4i32(<vscale x 4 x i32> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv2i32_nxv4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    st1w { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
+  %ext = call <vscale x 2 x i32> @llvm.vector.extract.nxv2i32.nxv4i32(<vscale x 4 x i32> %arg, i64 0)
+  store <vscale x 2 x i32> %ext, ptr %p
+  ret void
+}
+
+define void @extract_nxv4i16_nxv8i16(<vscale x 8 x i16> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv4i16_nxv8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    st1h { z0.s }, p0, [x0]
+; CHECK-NEXT:    ret
+  %ext = call <vscale x 4 x i16> @llvm.vector.extract.nxv4i16.nxv8i16(<vscale x 8 x i16> %arg, i64 0)
+  store <vscale x 4 x i16> %ext, ptr %p
+  ret void
+}
+
+define void @extract_nxv2i16_nxv8i16_2(<vscale x 8 x i16> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv2i16_nxv8i16_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    uunpkhi z0.d, z0.s
+; CHECK-NEXT:    st1h { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
+  %ext = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %arg, i64 2)
+  store <vscale x 2 x i16> %ext, ptr %p
+  ret void
+}
+
+define void @extract_nxv2i16_nxv8i16(<vscale x 8 x i16> %arg, ptr %p) {
+; CHECK-LABEL: extract_nxv2i16_nxv8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    st1h { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
+  %ext = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv8i16(<vscale x 8 x i16> %arg, i64 0)
+  store <vscale x 2 x i16> %ext, ptr %p
+  ret void
+}
+
+define void @extract_nxv2i16_nxv4i16(ptr %p, ptr %p2) {
+; CHECK-LABEL: extract_nxv2i16_nxv4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    st1h { z0.d }, p0, [x1]
+; CHECK-NEXT:    ret
+  %vector = load <vscale x 4 x i16>, ptr %p
+  %ext = call <vscale x 2 x i16> @llvm.vector.extract.nxv2i16.nxv4i16(<vscale x 4 x i16> %vector, i64 0)
+  store <vscale x 2 x i16> %ext, ptr %p2
+  ret void
+}