Skip to content

[ARM] Protect against odd sized vectors in isVTRNMask and friends #153413

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 13, 2025

Conversation

davemgreen
Copy link
Collaborator

Fixes the issue reported on #153138, where odd-sized vectors would cause the checks to iterate off the end of the mask.

Fixes the issue reported on llvm#153138, where odd-sized vectors would cause the
checks to iterate off the end of the mask.
@llvmbot
Copy link
Member

llvmbot commented Aug 13, 2025

@llvm/pr-subscribers-backend-arm

Author: David Green (davemgreen)

Changes

Fixes the issue reported on #153138, where odd-sized vectors would cause the checks to iterate off the end of the mask.


Full diff: https://github.com/llvm/llvm-project/pull/153413.diff

4 Files Affected:

  • (modified) llvm/lib/Target/ARM/ARMISelLowering.cpp (+4-4)
  • (modified) llvm/test/CodeGen/ARM/vtrn.ll (+125-55)
  • (modified) llvm/test/CodeGen/ARM/vuzp.ll (+56)
  • (modified) llvm/test/CodeGen/ARM/vzip.ll (+19)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index d386c917a256d..ef5ba4fdfd114 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -7406,7 +7406,7 @@ static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
     return false;
 
   unsigned NumElts = VT.getVectorNumElements();
-  if (M.size() != NumElts && M.size() != NumElts*2)
+  if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
     return false;
 
   // If the mask is twice as long as the input vector then we need to check the
@@ -7438,7 +7438,7 @@ static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
     return false;
 
   unsigned NumElts = VT.getVectorNumElements();
-  if (M.size() != NumElts && M.size() != NumElts*2)
+  if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
     return false;
 
   for (unsigned i = 0; i < M.size(); i += NumElts) {
@@ -7541,7 +7541,7 @@ static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
     return false;
 
   unsigned NumElts = VT.getVectorNumElements();
-  if (M.size() != NumElts && M.size() != NumElts*2)
+  if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
     return false;
 
   for (unsigned i = 0; i < M.size(); i += NumElts) {
@@ -7574,7 +7574,7 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
     return false;
 
   unsigned NumElts = VT.getVectorNumElements();
-  if (M.size() != NumElts && M.size() != NumElts*2)
+  if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
     return false;
 
   for (unsigned i = 0; i < M.size(); i += NumElts) {
diff --git a/llvm/test/CodeGen/ARM/vtrn.ll b/llvm/test/CodeGen/ARM/vtrn.ll
index 136fec3ac3167..63774694f8a9a 100644
--- a/llvm/test/CodeGen/ARM/vtrn.ll
+++ b/llvm/test/CodeGen/ARM/vtrn.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 define <8 x i8> @vtrni8(ptr %A, ptr %B) nounwind {
@@ -20,11 +21,11 @@ define <8 x i8> @vtrni8(ptr %A, ptr %B) nounwind {
 define <16 x i8> @vtrni8_Qres(ptr %A, ptr %B) nounwind {
 ; CHECK-LABEL: vtrni8_Qres:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
-; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
-; CHECK-NEXT:    vtrn.8 [[LDR0]], [[LDR1]]
-; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
-; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vtrn.8 d17, d16
+; CHECK-NEXT:    vmov r0, r1, d17
+; CHECK-NEXT:    vmov r2, r3, d16
 ; CHECK-NEXT:    mov pc, lr
   %tmp1 = load <8 x i8>, ptr %A
   %tmp2 = load <8 x i8>, ptr %B
@@ -52,11 +53,11 @@ define <4 x i16> @vtrni16(ptr %A, ptr %B) nounwind {
 define <8 x i16> @vtrni16_Qres(ptr %A, ptr %B) nounwind {
 ; CHECK-LABEL: vtrni16_Qres:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
-; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
-; CHECK-NEXT:    vtrn.16 [[LDR0]], [[LDR1]]
-; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
-; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vtrn.16 d17, d16
+; CHECK-NEXT:    vmov r0, r1, d17
+; CHECK-NEXT:    vmov r2, r3, d16
 ; CHECK-NEXT:    mov pc, lr
   %tmp1 = load <4 x i16>, ptr %A
   %tmp2 = load <4 x i16>, ptr %B
@@ -84,11 +85,11 @@ define <2 x i32> @vtrni32(ptr %A, ptr %B) nounwind {
 define <4 x i32> @vtrni32_Qres(ptr %A, ptr %B) nounwind {
 ; CHECK-LABEL: vtrni32_Qres:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
-; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
-; CHECK-NEXT:    vtrn.32 [[LDR0]], [[LDR1]]
-; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
-; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vtrn.32 d17, d16
+; CHECK-NEXT:    vmov r0, r1, d17
+; CHECK-NEXT:    vmov r2, r3, d16
 ; CHECK-NEXT:    mov pc, lr
   %tmp1 = load <2 x i32>, ptr %A
   %tmp2 = load <2 x i32>, ptr %B
@@ -116,11 +117,11 @@ define <2 x float> @vtrnf(ptr %A, ptr %B) nounwind {
 define <4 x float> @vtrnf_Qres(ptr %A, ptr %B) nounwind {
 ; CHECK-LABEL: vtrnf_Qres:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
-; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
-; CHECK-NEXT:    vtrn.32 [[LDR0]], [[LDR1]]
-; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
-; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vtrn.32 d17, d16
+; CHECK-NEXT:    vmov r0, r1, d17
+; CHECK-NEXT:    vmov r2, r3, d16
 ; CHECK-NEXT:    mov pc, lr
   %tmp1 = load <2 x float>, ptr %A
   %tmp2 = load <2 x float>, ptr %B
@@ -281,11 +282,11 @@ define <8 x i8> @vtrni8_undef(ptr %A, ptr %B) nounwind {
 define <16 x i8> @vtrni8_undef_Qres(ptr %A, ptr %B) nounwind {
 ; CHECK-LABEL: vtrni8_undef_Qres:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
-; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
-; CHECK-NEXT:    vtrn.8 [[LDR0]], [[LDR1]]
-; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
-; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vtrn.8 d17, d16
+; CHECK-NEXT:    vmov r0, r1, d17
+; CHECK-NEXT:    vmov r2, r3, d16
 ; CHECK-NEXT:    mov pc, lr
   %tmp1 = load <8 x i8>, ptr %A
   %tmp2 = load <8 x i8>, ptr %B
@@ -327,9 +328,15 @@ define <16 x i16> @vtrnQi16_undef_QQres(ptr %A, ptr %B) nounwind {
 }
 
 define <8 x i16> @vtrn_lower_shufflemask_undef(ptr %A, ptr %B) {
+; CHECK-LABEL: vtrn_lower_shufflemask_undef:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d17, [r0]
+; CHECK-NEXT:    vtrn.16 d17, d16
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d16
+; CHECK-NEXT:    mov pc, lr
 entry:
-  ; CHECK-LABEL: vtrn_lower_shufflemask_undef
-  ; CHECK: vtrn
 	%tmp1 = load <4 x i16>, ptr %A
 	%tmp2 = load <4 x i16>, ptr %B
   %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 5, i32 3, i32 7>
@@ -340,12 +347,26 @@ entry:
 ; values do modify the type. However, we get different input types, as some of
 ; them get truncated from i32 to i8 (from comparing cmp0 with cmp1) and some of
 ; them get truncated from i16 to i8 (from comparing cmp2 with cmp3).
-define <8 x i8> @vtrn_mismatched_builvector0(<8 x i8> %tr0, <8 x i8> %tr1,
-                                             <4 x i32> %cmp0, <4 x i32> %cmp1,
-                                             <4 x i16> %cmp2, <4 x i16> %cmp3) {
-  ; CHECK-LABEL: vtrn_mismatched_builvector0:
-  ; CHECK: vmovn.i32
-  ; CHECK: vbsl
+define <8 x i8> @vtrn_mismatched_builvector0(<8 x i8> %tr0, <8 x i8> %tr1, <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i16> %cmp2, <4 x i16> %cmp3) {
+; CHECK-LABEL: vtrn_mismatched_builvector0:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r12, sp
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT:    add r12, sp, #16
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT:    vcgt.u32 q8, q9, q8
+; CHECK-NEXT:    vldr d20, [sp, #32]
+; CHECK-NEXT:    vldr d18, [sp, #40]
+; CHECK-NEXT:    vcgt.u16 d18, d18, d20
+; CHECK-NEXT:    vmovn.i32 d16, q8
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vtrn.8 d16, d18
+; CHECK-NEXT:    vmov d18, r0, r1
+; CHECK-NEXT:    vshl.i8 d16, d16, #7
+; CHECK-NEXT:    vshr.s8 d16, d16, #7
+; CHECK-NEXT:    vbsl d16, d18, d17
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    mov pc, lr
   %c0 = icmp ult <4 x i32> %cmp0, %cmp1
   %c1 = icmp ult <4 x i16> %cmp2, %cmp3
   %c = shufflevector <4 x i1> %c0, <4 x i1> %c1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
@@ -356,12 +377,30 @@ define <8 x i8> @vtrn_mismatched_builvector0(<8 x i8> %tr0, <8 x i8> %tr1,
 ; Here we get a build_vector node, where half the incoming extract_element
 ; values do not modify the type (the values form cmp2), but half of them do
 ; (from the icmp operation).
-define <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1,
-                           <4 x i32> %cmp0, <4 x i32> %cmp1, ptr %cmp2_ptr) {
-  ; CHECK-LABEL: vtrn_mismatched_builvector1:
-  ; We need to extend the 4 x i8 to 4 x i16 in order to perform the vtrn
-  ; CHECK: vmovl
-  ; CHECK: vbsl
+; We need to extend the 4 x i8 to 4 x i16 in order to perform the vtrn
+define <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1, <4 x i32> %cmp0, <4 x i32> %cmp1, ptr %cmp2_ptr) {
+; CHECK-LABEL: vtrn_mismatched_builvector1:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r11, lr}
+; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    add r12, sp, #8
+; CHECK-NEXT:    add lr, sp, #24
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT:    ldr r12, [sp, #40]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [lr]
+; CHECK-NEXT:    vcgt.u32 q8, q9, q8
+; CHECK-NEXT:    vld1.32 {d18[0]}, [r12:32]
+; CHECK-NEXT:    vmovl.u8 q9, d18
+; CHECK-NEXT:    vmovn.i32 d16, q8
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vtrn.8 d16, d18
+; CHECK-NEXT:    vmov d18, r0, r1
+; CHECK-NEXT:    vshl.i8 d16, d16, #7
+; CHECK-NEXT:    vshr.s8 d16, d16, #7
+; CHECK-NEXT:    vbsl d16, d18, d17
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    pop {r11, lr}
+; CHECK-NEXT:    mov pc, lr
   %cmp2_load = load <4 x i8>, ptr %cmp2_ptr, align 4
   %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
   %c0 = icmp ult <4 x i32> %cmp0, %cmp1
@@ -373,15 +412,15 @@ define <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1,
 ; The shuffle mask is half a vtrn; we duplicate the half to produce the
 ; full result.
 define void @lower_twice_no_vtrn(ptr %A, ptr %B, ptr %C) {
+; CHECK-LABEL: lower_twice_no_vtrn:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d18, [r0]
+; CHECK-NEXT:    vtrn.16 d18, d16
+; CHECK-NEXT:    vorr d17, d16, d16
+; CHECK-NEXT:    vst1.64 {d16, d17}, [r2]
+; CHECK-NEXT:    mov pc, lr
 entry:
-  ; CHECK-LABEL: lower_twice_no_vtrn:
-  ; CHECK: @ %bb.0:
-  ; CHECK-NEXT: vldr d16, [r1]
-  ; CHECK-NEXT: vldr d18, [r0]
-  ; CHECK-NEXT: vtrn.16 d18, d16
-  ; CHECK-NEXT: vorr d17, d16, d16
-  ; CHECK-NEXT: vst1.64 {d16, d17}, [r2]
-  ; CHECK-NEXT: mov pc, lr
   %tmp1 = load <4 x i16>, ptr %A
   %tmp2 = load <4 x i16>, ptr %B
   %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 5, i32 3, i32 7, i32 1, i32 5, i32 3, i32 7>
@@ -392,18 +431,49 @@ entry:
 ; The shuffle mask is half a vtrn; we duplicate the half to produce the
 ; full result.
 define void @upper_twice_no_vtrn(ptr %A, ptr %B, ptr %C) {
+; CHECK-LABEL: upper_twice_no_vtrn:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vldr d16, [r1]
+; CHECK-NEXT:    vldr d18, [r0]
+; CHECK-NEXT:    vtrn.16 d18, d16
+; CHECK-NEXT:    vorr d19, d18, d18
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r2]
+; CHECK-NEXT:    mov pc, lr
 entry:
-  ; CHECK-LABEL: upper_twice_no_vtrn:
-  ; CHECK: @ %bb.0:
-  ; CHECK-NEXT: vldr d16, [r1]
-  ; CHECK-NEXT: vldr d18, [r0]
-  ; CHECK-NEXT: vtrn.16 d18, d16
-  ; CHECK-NEXT: vorr d19, d18, d18
-  ; CHECK-NEXT: vst1.64 {d18, d19}, [r2]
-  ; CHECK-NEXT: mov pc, lr
   %tmp1 = load <4 x i16>, ptr %A
   %tmp2 = load <4 x i16>, ptr %B
   %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 6, i32 0, i32 4, i32 2, i32 6>
   store <8 x i16> %0, ptr %C
   ret void
 }
+
+define void @test_15xi16(ptr %next.gep, ptr %next.gep13) {
+; CHECK-LABEL: test_15xi16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    add r2, r0, #2
+; CHECK-NEXT:    add r3, r0, #6
+; CHECK-NEXT:    vld1.16 {d16, d17}, [r2]!
+; CHECK-NEXT:    vld1.16 {d18}, [r2]!
+; CHECK-NEXT:    vld1.16 {d20, d21}, [r3]!
+; CHECK-NEXT:    ldr r2, [r2]
+; CHECK-NEXT:    vld1.16 {d22}, [r3]!
+; CHECK-NEXT:    vmov.16 d19[0], r2
+; CHECK-NEXT:    ldr r3, [r3]
+; CHECK-NEXT:    add r2, r0, #30
+; CHECK-NEXT:    add r0, r0, #34
+; CHECK-NEXT:    vmov.16 d19[1], r3
+; CHECK-NEXT:    vld1.16 {d19[2]}, [r2:16]
+; CHECK-NEXT:    vtrn.16 q8, q10
+; CHECK-NEXT:    vld1.16 {d19[3]}, [r0:16]
+; CHECK-NEXT:    vtrn.16 d18, d22
+; CHECK-NEXT:    vst1.16 {d16, d17}, [r1]!
+; CHECK-NEXT:    vst1.16 {d18, d19}, [r1]
+; CHECK-NEXT:    mov pc, lr
+  %a = getelementptr inbounds nuw i8, ptr %next.gep, i32 2
+  %b = load <15 x i16>, ptr %a, align 2
+  %c = getelementptr inbounds nuw i8, ptr %next.gep, i32 6
+  %d = load <15 x i16>, ptr %c, align 2
+  %interleaved.vec = shufflevector <15 x i16> %b, <15 x i16> %d, <16 x i32> <i32 0, i32 15, i32 2, i32 17, i32 4, i32 19, i32 6, i32 21, i32 8, i32 23, i32 10, i32 25, i32 12, i32 27, i32 14, i32 29>
+  store <16 x i16> %interleaved.vec, ptr %next.gep13, align 2
+  ret void
+}
diff --git a/llvm/test/CodeGen/ARM/vuzp.ll b/llvm/test/CodeGen/ARM/vuzp.ll
index 7e1dfba34db2e..d24dadc7fc401 100644
--- a/llvm/test/CodeGen/ARM/vuzp.ll
+++ b/llvm/test/CodeGen/ARM/vuzp.ll
@@ -535,3 +535,59 @@ define %struct.uint8x8x2_t @vuzp_extract_subvector(<16 x i8> %t) #0 {
   %.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
   ret %struct.uint8x8x2_t %.fca.0.1.insert
 }
+
+define void @test_15xi16(ptr %next.gep, ptr %next.gep13) {
+; CHECK-LABEL: test_15xi16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    add r2, r0, #2
+; CHECK-NEXT:    add r3, r0, #6
+; CHECK-NEXT:    vld1.16 {d20, d21}, [r2]!
+; CHECK-NEXT:    vld1.16 {d16}, [r2]!
+; CHECK-NEXT:    vmov.u16 r12, d16[0]
+; CHECK-NEXT:    ldr r2, [r2]
+; CHECK-NEXT:    vmov.u16 r4, d20[0]
+; CHECK-NEXT:    vld1.16 {d22, d23}, [r3]!
+; CHECK-NEXT:    vld1.16 {d24}, [r3]!
+; CHECK-NEXT:    vmov.u16 lr, d16[2]
+; CHECK-NEXT:    vmov.u16 r5, d22[0]
+; CHECK-NEXT:    vmov.u16 r6, d21[0]
+; CHECK-NEXT:    vmov.16 d17[0], r12
+; CHECK-NEXT:    vmov.16 d16[0], r4
+; CHECK-NEXT:    vmov.u16 r4, d24[0]
+; CHECK-NEXT:    vmov.u16 r12, d24[2]
+; CHECK-NEXT:    vmov.16 d17[1], lr
+; CHECK-NEXT:    vmov.16 d18[0], r5
+; CHECK-NEXT:    vmov.u16 r5, d20[2]
+; CHECK-NEXT:    vmov.u16 lr, d23[0]
+; CHECK-NEXT:    vmov.16 d19[0], r4
+; CHECK-NEXT:    vmov.u16 r4, d22[2]
+; CHECK-NEXT:    vmov.16 d16[1], r5
+; CHECK-NEXT:    vmov.u16 r5, d21[2]
+; CHECK-NEXT:    vmov.16 d17[2], r2
+; CHECK-NEXT:    ldr r2, [r3]
+; CHECK-NEXT:    vmov.16 d16[2], r6
+; CHECK-NEXT:    vmov.16 d18[1], r4
+; CHECK-NEXT:    vmov.u16 r4, d23[2]
+; CHECK-NEXT:    vmov.16 d19[1], r12
+; CHECK-NEXT:    vmov.16 d18[2], lr
+; CHECK-NEXT:    vmov.16 d19[2], r2
+; CHECK-NEXT:    add r2, r0, #30
+; CHECK-NEXT:    add r0, r0, #34
+; CHECK-NEXT:    vld1.16 {d17[3]}, [r2:16]
+; CHECK-NEXT:    vmov.16 d16[3], r5
+; CHECK-NEXT:    vmov.16 d18[3], r4
+; CHECK-NEXT:    vld1.16 {d19[3]}, [r0:16]
+; CHECK-NEXT:    vst1.16 {d16, d17}, [r1]!
+; CHECK-NEXT:    vst1.16 {d18, d19}, [r1]
+; CHECK-NEXT:    pop {r4, r5, r6, lr}
+; CHECK-NEXT:    mov pc, lr
+  %a = getelementptr inbounds nuw i8, ptr %next.gep, i32 2
+  %b = load <15 x i16>, ptr %a, align 2
+  %c = getelementptr inbounds nuw i8, ptr %next.gep, i32 6
+  %d = load <15 x i16>, ptr %c, align 2
+  %interleaved.vec = shufflevector <15 x i16> %b, <15 x i16> %d, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29>
+  store <16 x i16> %interleaved.vec, ptr %next.gep13, align 2
+  ret void
+}
diff --git a/llvm/test/CodeGen/ARM/vzip.ll b/llvm/test/CodeGen/ARM/vzip.ll
index dda774abd8516..ce40a2e48b6e8 100644
--- a/llvm/test/CodeGen/ARM/vzip.ll
+++ b/llvm/test/CodeGen/ARM/vzip.ll
@@ -381,3 +381,22 @@ entry:
   %vzip.i = shufflevector <8 x i8> %lane, <8 x i8> %lane3, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   ret <8 x i8> %vzip.i
 }
+
+define <16 x i16> @test_15xi16(ptr %next.gep, ptr %next.gep13) {
+; CHECK-LABEL: test_15xi16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    add r1, r1, #2
+; CHECK-NEXT:    mov r2, #4
+; CHECK-NEXT:    vld1.16 {d16, d17}, [r1], r2
+; CHECK-NEXT:    vld1.16 {d18, d19}, [r1]
+; CHECK-NEXT:    vzip.16 q8, q9
+; CHECK-NEXT:    vst1.16 {d16, d17}, [r0:128]!
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r0:128]
+; CHECK-NEXT:    mov pc, lr
+  %a = getelementptr inbounds nuw i8, ptr %next.gep, i32 2
+  %b = load <15 x i16>, ptr %a, align 2
+  %c = getelementptr inbounds nuw i8, ptr %next.gep, i32 6
+  %d = load <15 x i16>, ptr %c, align 2
+  %interleaved.vec = shufflevector <15 x i16> %b, <15 x i16> %d, <16 x i32> <i32 0, i32 15, i32 1, i32 16, i32 2, i32 17, i32 3, i32 18, i32 4, i32 19, i32 5, i32 20, i32 6, i32 21, i32 7, i32 22>
+  ret <16 x i16> %interleaved.vec
+}

Copy link
Contributor

@PeddleSpam PeddleSpam left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@davemgreen davemgreen merged commit 06d2d1e into llvm:main Aug 13, 2025
11 checks passed
@davemgreen davemgreen deleted the gh-arm-oddtrnmask branch August 13, 2025 19:57
@mstorsjo
Copy link
Member

Thanks for the quick fix!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants