Skip to content

Commit 19a0d04

Browse files
davemgreenaahrun
authored andcommitted
[AArch64] Guard for 128bit vectors in mull combine. (llvm#169839)
The test case generates a extract_subvector(index) leading into a mul. Make sure we don't try and treat the scalable vector extract as a 128bit vector in the mull combine. Fixes llvm#168912
1 parent 27f77ad commit 19a0d04

File tree

2 files changed

+31
-1
lines changed

2 files changed

+31
-1
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5804,8 +5804,10 @@ SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
58045804
if (VT.is64BitVector()) {
58055805
if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
58065806
isNullConstant(N0.getOperand(1)) &&
5807+
N0.getOperand(0).getValueType().is128BitVector() &&
58075808
N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5808-
isNullConstant(N1.getOperand(1))) {
5809+
isNullConstant(N1.getOperand(1)) &&
5810+
N1.getOperand(0).getValueType().is128BitVector()) {
58095811
N0 = N0.getOperand(0);
58105812
N1 = N1.getOperand(0);
58115813
VT = N0.getValueType();

llvm/test/CodeGen/AArch64/neon-extadd-extract.ll

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -771,3 +771,31 @@ entry:
771771
%m = mul <1 x i64> %s0, %t1
772772
ret <1 x i64> %m
773773
}
774+
775+
define <2 x i8> @extract_scalable_vec() vscale_range(1,16) "target-features"="+sve" {
776+
; CHECK-SD-LABEL: extract_scalable_vec:
777+
; CHECK-SD: // %bb.0: // %entry
778+
; CHECK-SD-NEXT: mov x8, xzr
779+
; CHECK-SD-NEXT: index z1.s, #2, #3
780+
; CHECK-SD-NEXT: ldr h0, [x8]
781+
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
782+
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
783+
; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.2s
784+
; CHECK-SD-NEXT: ret
785+
;
786+
; CHECK-GI-LABEL: extract_scalable_vec:
787+
; CHECK-GI: // %bb.0: // %entry
788+
; CHECK-GI-NEXT: mov x8, xzr
789+
; CHECK-GI-NEXT: mov x9, #1 // =0x1
790+
; CHECK-GI-NEXT: ld1 { v0.b }[0], [x8]
791+
; CHECK-GI-NEXT: ldr b1, [x9]
792+
; CHECK-GI-NEXT: adrp x8, .LCPI36_0
793+
; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
794+
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI36_0]
795+
; CHECK-GI-NEXT: mul v0.2s, v0.2s, v1.2s
796+
; CHECK-GI-NEXT: ret
797+
entry:
798+
%0 = load <2 x i8>, ptr null, align 2
799+
%mul = mul <2 x i8> %0, <i8 2, i8 5>
800+
ret <2 x i8> %mul
801+
}

0 commit comments

Comments
 (0)