- 
                Notifications
    
You must be signed in to change notification settings  - Fork 15.1k
 
[SelectionDAG][RISCV] Use VP_LOAD to widen MLOAD in type legalization when possible. #140595
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
… when possible. Padding the mask using 0 elements doesn't work for scalable vectors. Use VP_LOAD and change the VL instead. This fixes crash for Zve32x. Test was split since i64 isn't a valid element type for Zve32x. Fixes llvm#140198.
| 
          
 @llvm/pr-subscribers-llvm-selectiondag Author: Craig Topper (topperc) ChangesPadding the mask using 0 elements doesn't work for scalable vectors. Use VP_LOAD and change the VL instead. This fixes crash for Zve32x. Test was split since i64 isn't a valid element type for Zve32x. Fixes #140198. Full diff: https://github.com/llvm/llvm-project/pull/140595.diff 3 Files Affected: 
 diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 4e9a6942d6cd9..59a9dced6cf03 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -6111,18 +6111,37 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_COMPRESS(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
-
-  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
+  EVT VT = N->getValueType(0);
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   SDValue Mask = N->getMask();
   EVT MaskVT = Mask.getValueType();
   SDValue PassThru = GetWidenedVector(N->getPassThru());
   ISD::LoadExtType ExtType = N->getExtensionType();
   SDLoc dl(N);
 
+  EVT WideMaskVT =
+      EVT::getVectorVT(*DAG.getContext(), MaskVT.getVectorElementType(),
+                       WidenVT.getVectorElementCount());
+
+  if (ExtType == ISD::NON_EXTLOAD &&
+      TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WidenVT) &&
+      TLI.isTypeLegal(WideMaskVT)) {
+    Mask = DAG.getInsertSubvector(dl, DAG.getUNDEF(WideMaskVT), Mask, 0);
+    SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
+                                      VT.getVectorElementCount());
+    SDValue NewLoad =
+        DAG.getLoadVP(N->getAddressingMode(), ISD::NON_EXTLOAD, WidenVT, dl,
+                      N->getChain(), N->getBasePtr(), N->getOffset(), Mask, EVL,
+                      N->getMemoryVT(), N->getMemOperand());
+
+    // Modified the chain - switch anything that used the old chain to use
+    // the new one.
+    ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1));
+
+    return NewLoad;
+  }
+
   // The mask should be widened as well
-  EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
-                                    MaskVT.getVectorElementType(),
-                                    WidenVT.getVectorNumElements());
   Mask = ModifyToType(Mask, WideMaskVT, true);
 
   SDValue Res = DAG.getMaskedLoad(
diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-int-e64.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-int-e64.ll
new file mode 100644
index 0000000000000..493d55f6eefe6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-int-e64.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 1 x i64> @masked_load_nxv1i64(ptr %a, <vscale x 1 x i1> %mask) nounwind {
+; CHECK-LABEL: masked_load_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+  %load = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64(ptr %a, i32 8, <vscale x 1 x i1> %mask, <vscale x 1 x i64> undef)
+  ret <vscale x 1 x i64> %load
+}
+declare <vscale x 1 x i64> @llvm.masked.load.nxv1i64(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
+
+define <vscale x 2 x i64> @masked_load_nxv2i64(ptr %a, <vscale x 2 x i1> %mask) nounwind {
+; CHECK-LABEL: masked_load_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+  %load = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64(ptr %a, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
+  ret <vscale x 2 x i64> %load
+}
+declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
+
+define <vscale x 4 x i64> @masked_load_nxv4i64(ptr %a, <vscale x 4 x i1> %mask) nounwind {
+; CHECK-LABEL: masked_load_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+  %load = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64(ptr %a, i32 8, <vscale x 4 x i1> %mask, <vscale x 4 x i64> undef)
+  ret <vscale x 4 x i64> %load
+}
+declare <vscale x 4 x i64> @llvm.masked.load.nxv4i64(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i64>)
+
+define <vscale x 8 x i64> @masked_load_nxv8i64(ptr %a, <vscale x 8 x i1> %mask) nounwind {
+; CHECK-LABEL: masked_load_nxv8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vle64.v v8, (a0), v0.t
+; CHECK-NEXT:    ret
+  %load = call <vscale x 8 x i64> @llvm.masked.load.nxv8i64(ptr %a, i32 8, <vscale x 8 x i1> %mask, <vscale x 8 x i64> undef)
+  ret <vscale x 8 x i64> %load
+}
+declare <vscale x 8 x i64> @llvm.masked.load.nxv8i64(ptr, i32, <vscale x 8 x i1>, <vscale x 8 x i64>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
index f8c1d5e45bc28..d992669306fb1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
@@ -1,51 +1,66 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V
+; RUN: llc -mtriple=riscv32 -mattr=+zve32x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVE32
+; RUN: llc -mtriple=riscv64 -mattr=+zve32x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVE32
 
 define <vscale x 1 x i8> @masked_load_nxv1i8(ptr %a, <vscale x 1 x i1> %mask) nounwind {
-; CHECK-LABEL: masked_load_nxv1i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a0), v0.t
-; CHECK-NEXT:    ret
+; V-LABEL: masked_load_nxv1i8:
+; V:       # %bb.0:
+; V-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
+; V-NEXT:    vle8.v v8, (a0), v0.t
+; V-NEXT:    ret
+;
+; ZVE32-LABEL: masked_load_nxv1i8:
+; ZVE32:       # %bb.0:
+; ZVE32-NEXT:    csrr a1, vlenb
+; ZVE32-NEXT:    srli a1, a1, 3
+; ZVE32-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
+; ZVE32-NEXT:    vle8.v v8, (a0), v0.t
+; ZVE32-NEXT:    ret
   %load = call <vscale x 1 x i8> @llvm.masked.load.nxv1i8(ptr %a, i32 1, <vscale x 1 x i1> %mask, <vscale x 1 x i8> undef)
   ret <vscale x 1 x i8> %load
 }
 declare <vscale x 1 x i8> @llvm.masked.load.nxv1i8(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i8>)
 
 define <vscale x 1 x i16> @masked_load_nxv1i16(ptr %a, <vscale x 1 x i1> %mask) nounwind {
-; CHECK-LABEL: masked_load_nxv1i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a0), v0.t
-; CHECK-NEXT:    ret
+; V-LABEL: masked_load_nxv1i16:
+; V:       # %bb.0:
+; V-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; V-NEXT:    vle16.v v8, (a0), v0.t
+; V-NEXT:    ret
+;
+; ZVE32-LABEL: masked_load_nxv1i16:
+; ZVE32:       # %bb.0:
+; ZVE32-NEXT:    csrr a1, vlenb
+; ZVE32-NEXT:    srli a1, a1, 3
+; ZVE32-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
+; ZVE32-NEXT:    vle16.v v8, (a0), v0.t
+; ZVE32-NEXT:    ret
   %load = call <vscale x 1 x i16> @llvm.masked.load.nxv1i16(ptr %a, i32 2, <vscale x 1 x i1> %mask, <vscale x 1 x i16> undef)
   ret <vscale x 1 x i16> %load
 }
 declare <vscale x 1 x i16> @llvm.masked.load.nxv1i16(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i16>)
 
 define <vscale x 1 x i32> @masked_load_nxv1i32(ptr %a, <vscale x 1 x i1> %mask) nounwind {
-; CHECK-LABEL: masked_load_nxv1i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0), v0.t
-; CHECK-NEXT:    ret
+; V-LABEL: masked_load_nxv1i32:
+; V:       # %bb.0:
+; V-NEXT:    vsetvli a1, zero, e32, mf2, ta, ma
+; V-NEXT:    vle32.v v8, (a0), v0.t
+; V-NEXT:    ret
+;
+; ZVE32-LABEL: masked_load_nxv1i32:
+; ZVE32:       # %bb.0:
+; ZVE32-NEXT:    csrr a1, vlenb
+; ZVE32-NEXT:    srli a1, a1, 3
+; ZVE32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; ZVE32-NEXT:    vle32.v v8, (a0), v0.t
+; ZVE32-NEXT:    ret
   %load = call <vscale x 1 x i32> @llvm.masked.load.nxv1i32(ptr %a, i32 4, <vscale x 1 x i1> %mask, <vscale x 1 x i32> undef)
   ret <vscale x 1 x i32> %load
 }
 declare <vscale x 1 x i32> @llvm.masked.load.nxv1i32(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i32>)
 
-define <vscale x 1 x i64> @masked_load_nxv1i64(ptr %a, <vscale x 1 x i1> %mask) nounwind {
-; CHECK-LABEL: masked_load_nxv1i64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0), v0.t
-; CHECK-NEXT:    ret
-  %load = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64(ptr %a, i32 8, <vscale x 1 x i1> %mask, <vscale x 1 x i64> undef)
-  ret <vscale x 1 x i64> %load
-}
-declare <vscale x 1 x i64> @llvm.masked.load.nxv1i64(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
-
 define <vscale x 2 x i8> @masked_load_nxv2i8(ptr %a, <vscale x 2 x i1> %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv2i8:
 ; CHECK:       # %bb.0:
@@ -79,17 +94,6 @@ define <vscale x 2 x i32> @masked_load_nxv2i32(ptr %a, <vscale x 2 x i1> %mask)
 }
 declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
 
-define <vscale x 2 x i64> @masked_load_nxv2i64(ptr %a, <vscale x 2 x i1> %mask) nounwind {
-; CHECK-LABEL: masked_load_nxv2i64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0), v0.t
-; CHECK-NEXT:    ret
-  %load = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64(ptr %a, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
-  ret <vscale x 2 x i64> %load
-}
-declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
-
 define <vscale x 4 x i8> @masked_load_nxv4i8(ptr %a, <vscale x 4 x i1> %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv4i8:
 ; CHECK:       # %bb.0:
@@ -123,17 +127,6 @@ define <vscale x 4 x i32> @masked_load_nxv4i32(ptr %a, <vscale x 4 x i1> %mask)
 }
 declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
 
-define <vscale x 4 x i64> @masked_load_nxv4i64(ptr %a, <vscale x 4 x i1> %mask) nounwind {
-; CHECK-LABEL: masked_load_nxv4i64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0), v0.t
-; CHECK-NEXT:    ret
-  %load = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64(ptr %a, i32 8, <vscale x 4 x i1> %mask, <vscale x 4 x i64> undef)
-  ret <vscale x 4 x i64> %load
-}
-declare <vscale x 4 x i64> @llvm.masked.load.nxv4i64(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i64>)
-
 define <vscale x 8 x i8> @masked_load_nxv8i8(ptr %a, <vscale x 8 x i1> %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv8i8:
 ; CHECK:       # %bb.0:
@@ -167,17 +160,6 @@ define <vscale x 8 x i32> @masked_load_nxv8i32(ptr %a, <vscale x 8 x i1> %mask)
 }
 declare <vscale x 8 x i32> @llvm.masked.load.nxv8i32(ptr, i32, <vscale x 8 x i1>, <vscale x 8 x i32>)
 
-define <vscale x 8 x i64> @masked_load_nxv8i64(ptr %a, <vscale x 8 x i1> %mask) nounwind {
-; CHECK-LABEL: masked_load_nxv8i64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; CHECK-NEXT:    vle64.v v8, (a0), v0.t
-; CHECK-NEXT:    ret
-  %load = call <vscale x 8 x i64> @llvm.masked.load.nxv8i64(ptr %a, i32 8, <vscale x 8 x i1> %mask, <vscale x 8 x i64> undef)
-  ret <vscale x 8 x i64> %load
-}
-declare <vscale x 8 x i64> @llvm.masked.load.nxv8i64(ptr, i32, <vscale x 8 x i1>, <vscale x 8 x i64>)
-
 define <vscale x 16 x i8> @masked_load_nxv16i8(ptr %a, <vscale x 16 x i1> %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv16i8:
 ; CHECK:       # %bb.0:
 | 
    
          
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/test/CodeGen/RISCV/rvv/masked-load-int-e64.ll llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll llvm/test/CodeGen/RISCV/rvv/masked-load-int.llThe following files introduce new uses of undef: 
 Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields  In tests, avoid using  For example, this is considered a bad practice: define void @fn() {
  ...
  br i1 undef, ...
}Please use the following instead: define void @fn(i1 %cond) {
  ...
  br i1 %cond, ...
}Please refer to the Undefined Behavior Manual for more information.  | 
    
| ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s | ||
| ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s | ||
| 
               | 
          ||
| define <vscale x 1 x i64> @masked_load_nxv1i64(ptr %a, <vscale x 1 x i1> %mask) nounwind { | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are these crash tests? Or can they be pre-commited so the change is visible?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This file doesn't crash, but the <vscale 1 x 1 x *> tests in the other file are crashes with the new command line.
This file is just i64 tests that were moved out of the other file.
| declare <vscale x 1 x i32> @llvm.masked.load.nxv1i32(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i32>) | ||
| 
               | 
          ||
| define <vscale x 1 x i64> @masked_load_nxv1i64(ptr %a, <vscale x 1 x i1> %mask) nounwind { | ||
| ; CHECK-LABEL: masked_load_nxv1i64: | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bad test updates.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had to split the test file since i64 isn't a legal element type for masked load under Zve32.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
| 
           LLVM Buildbot has detected a new failure on builder  Full details are available at: https://lab.llvm.org/buildbot/#/builders/140/builds/23572 Here is the relevant piece of the build log for the reference | 
    
| 
           LLVM Buildbot has detected a new failure on builder  Full details are available at: https://lab.llvm.org/buildbot/#/builders/24/builds/8679 Here is the relevant piece of the build log for the reference | 
    
| 
           LLVM Buildbot has detected a new failure on builder  Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/28096 Here is the relevant piece of the build log for the reference | 
    
…144170) #140595 used vp.load in the cases where we need to widen masked.load. However, we didn't account for the passthru operand so it might miscompile when the passthru is not undef. While we can simply avoid using vp.load to widen when passthru is not undef, doing so will ran into the exact same crash described in #140198 , so for scalable vector, this patch manually merges the vp.load result with passthru when the latter is not undef.
…lvm#144170) llvm#140595 used vp.load in the cases where we need to widen masked.load. However, we didn't account for the passthru operand so it might miscompile when the passthru is not undef. While we can simply avoid using vp.load to widen when passthru is not undef, doing so will ran into the exact same crash described in llvm#140198 , so for scalable vector, this patch manually merges the vp.load result with passthru when the latter is not undef.
Padding the mask using 0 elements doesn't work for scalable vectors. Use VP_LOAD and change the VL instead.
This fixes crash for Zve32x. Test file was split since i64 isn't a valid element type for Zve32x.
Fixes #140198.