Skip to content

Commit a381043

Browse files
committed
[AArch64][SVE] Combine UXT[BHW] intrinsics to AND.
Currently, we lower UXT[BHW] intrinsics into the corresponding UXT* instruction. However, when the governing predicate is all-true or the passthrough is undef (e.g. in the case of ``don't care'' merging), we can lower them into AND immediate instructions instead. For example: ```cpp svuint64_t foo_z(svuint64_t x) { return svextb_z(svptrue_b64(), x); } ``` Currently: ``` foo_z: ptrue p0.d movi v1.2d, #0000000000000000 uxtb z0.d, p0/m, z0.d ret ``` Becomes: ``` foo_z: and z0.d, z0.d, #0xff ret ``` We do this early in InstCombine in case it unblocks other simplifications.
1 parent ed0ee9a commit a381043

File tree

2 files changed

+44
-24
lines changed

2 files changed

+44
-24
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2640,6 +2640,26 @@ static std::optional<Instruction *> instCombinePTrue(InstCombiner &IC,
26402640
return std::nullopt;
26412641
}
26422642

2643+
static std::optional<Instruction *> instCombineSVEUxt(InstCombiner &IC,
2644+
IntrinsicInst &II,
2645+
unsigned NumBits) {
2646+
Value *Passthru = II.getOperand(0);
2647+
Value *Pg = II.getOperand(1);
2648+
Value *Op = II.getOperand(2);
2649+
2650+
// Convert UXT[BHW] to AND.
2651+
if (isa<UndefValue>(Passthru) || isAllActivePredicate(Pg)) {
2652+
auto *Ty = cast<VectorType>(II.getType());
2653+
auto MaskValue = APInt::getLowBitsSet(Ty->getScalarSizeInBits(), NumBits);
2654+
auto *Mask = ConstantVector::getSplat(
2655+
Ty->getElementCount(),
2656+
ConstantInt::get(Ty->getElementType(), MaskValue));
2657+
return IC.replaceInstUsesWith(II, IC.Builder.CreateAnd(Op, Mask));
2658+
}
2659+
2660+
return std::nullopt;
2661+
}
2662+
26432663
std::optional<Instruction *>
26442664
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
26452665
IntrinsicInst &II) const {
@@ -2745,6 +2765,12 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
27452765
return instCombineSVEInsr(IC, II);
27462766
case Intrinsic::aarch64_sve_ptrue:
27472767
return instCombinePTrue(IC, II);
2768+
case Intrinsic::aarch64_sve_uxtb:
2769+
return instCombineSVEUxt(IC, II, 8);
2770+
case Intrinsic::aarch64_sve_uxth:
2771+
return instCombineSVEUxt(IC, II, 16);
2772+
case Intrinsic::aarch64_sve_uxtw:
2773+
return instCombineSVEUxt(IC, II, 32);
27482774
}
27492775

27502776
return std::nullopt;

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-uxt.ll

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ target triple = "aarch64-unknown-linux-gnu"
66
define <vscale x 2 x i64> @uxtb_z_64(<vscale x 2 x i64> %0) #0 {
77
; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_z_64(
88
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
9-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]])
9+
; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 2 x i64> [[TMP0]], splat (i64 255)
1010
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
1111
;
1212
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
@@ -16,7 +16,7 @@ define <vscale x 2 x i64> @uxtb_z_64(<vscale x 2 x i64> %0) #0 {
1616
define <vscale x 2 x i64> @uxtb_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
1717
; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_m_64(
1818
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
19-
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]])
19+
; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 2 x i64> [[TMP0]], splat (i64 255)
2020
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
2121
;
2222
%3 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
@@ -26,8 +26,7 @@ define <vscale x 2 x i64> @uxtb_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %
2626
define <vscale x 2 x i64> @uxtb_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
2727
; CHECK-LABEL: define <vscale x 2 x i64> @uxtb_x_64(
2828
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
29-
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
30-
; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]])
29+
; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 2 x i64> [[TMP1]], splat (i64 255)
3130
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
3231
;
3332
%3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
@@ -62,7 +61,7 @@ define <vscale x 2 x i64> @uxtb_m_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2
6261
define <vscale x 4 x i32> @uxtb_z_32(<vscale x 4 x i32> %0) #0 {
6362
; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_z_32(
6463
; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]]) #[[ATTR0]] {
65-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP0]])
64+
; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 4 x i32> [[TMP0]], splat (i32 255)
6665
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
6766
;
6867
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %0)
@@ -72,7 +71,7 @@ define <vscale x 4 x i32> @uxtb_z_32(<vscale x 4 x i32> %0) #0 {
7271
define <vscale x 4 x i32> @uxtb_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1) #0 {
7372
; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_m_32(
7473
; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
75-
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP0]])
74+
; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 4 x i32> [[TMP0]], splat (i32 255)
7675
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
7776
;
7877
%3 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %0)
@@ -82,8 +81,7 @@ define <vscale x 4 x i32> @uxtb_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %
8281
define <vscale x 4 x i32> @uxtb_x_32(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1) #0 {
8382
; CHECK-LABEL: define <vscale x 4 x i32> @uxtb_x_32(
8483
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
85-
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
86-
; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP3]], <vscale x 4 x i32> [[TMP1]])
84+
; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 4 x i32> [[TMP1]], splat (i32 255)
8785
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
8886
;
8987
%3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %0)
@@ -118,7 +116,7 @@ define <vscale x 4 x i32> @uxtb_m_32_no_ptrue(<vscale x 16 x i1> %0, <vscale x 4
118116
define <vscale x 8 x i16> @uxtb_z_16(<vscale x 8 x i16> %0) #0 {
119117
; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_z_16(
120118
; CHECK-SAME: <vscale x 8 x i16> [[TMP0:%.*]]) #[[ATTR0]] {
121-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[TMP0]])
119+
; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 8 x i16> [[TMP0]], splat (i16 255)
122120
; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
123121
;
124122
%2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> %0)
@@ -128,7 +126,7 @@ define <vscale x 8 x i16> @uxtb_z_16(<vscale x 8 x i16> %0) #0 {
128126
define <vscale x 8 x i16> @uxtb_m_16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1) #0 {
129127
; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_m_16(
130128
; CHECK-SAME: <vscale x 8 x i16> [[TMP0:%.*]], <vscale x 8 x i16> [[TMP1:%.*]]) #[[ATTR0]] {
131-
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> [[TMP1]], <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[TMP0]])
129+
; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 8 x i16> [[TMP0]], splat (i16 255)
132130
; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP3]]
133131
;
134132
%3 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> %1, <vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> %0)
@@ -138,8 +136,7 @@ define <vscale x 8 x i16> @uxtb_m_16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %
138136
define <vscale x 8 x i16> @uxtb_x_16(<vscale x 16 x i1> %0, <vscale x 8 x i16> %1) #0 {
139137
; CHECK-LABEL: define <vscale x 8 x i16> @uxtb_x_16(
140138
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 8 x i16> [[TMP1:%.*]]) #[[ATTR0]] {
141-
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP0]])
142-
; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP3]], <vscale x 8 x i16> [[TMP1]])
139+
; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 8 x i16> [[TMP1]], splat (i16 255)
143140
; CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
144141
;
145142
%3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %0)
@@ -174,7 +171,7 @@ define <vscale x 8 x i16> @uxtb_m_16_no_ptrue(<vscale x 16 x i1> %0, <vscale x 8
174171
define <vscale x 2 x i64> @uxth_z_64(<vscale x 2 x i64> %0) #0 {
175172
; CHECK-LABEL: define <vscale x 2 x i64> @uxth_z_64(
176173
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
177-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]])
174+
; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 2 x i64> [[TMP0]], splat (i64 65535)
178175
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
179176
;
180177
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
@@ -184,7 +181,7 @@ define <vscale x 2 x i64> @uxth_z_64(<vscale x 2 x i64> %0) #0 {
184181
define <vscale x 2 x i64> @uxth_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
185182
; CHECK-LABEL: define <vscale x 2 x i64> @uxth_m_64(
186183
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
187-
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]])
184+
; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 2 x i64> [[TMP0]], splat (i64 65535)
188185
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
189186
;
190187
%3 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
@@ -194,8 +191,7 @@ define <vscale x 2 x i64> @uxth_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %
194191
define <vscale x 2 x i64> @uxth_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
195192
; CHECK-LABEL: define <vscale x 2 x i64> @uxth_x_64(
196193
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
197-
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
198-
; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]])
194+
; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 2 x i64> [[TMP1]], splat (i64 65535)
199195
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
200196
;
201197
%3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)
@@ -230,7 +226,7 @@ define <vscale x 2 x i64> @uxth_m_64_no_ptrue(<vscale x 16 x i1> %0, <vscale x 2
230226
define <vscale x 4 x i32> @uxth_z_32(<vscale x 4 x i32> %0) #0 {
231227
; CHECK-LABEL: define <vscale x 4 x i32> @uxth_z_32(
232228
; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]]) #[[ATTR0]] {
233-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP0]])
229+
; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 4 x i32> [[TMP0]], splat (i32 65535)
234230
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
235231
;
236232
%2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %0)
@@ -240,7 +236,7 @@ define <vscale x 4 x i32> @uxth_z_32(<vscale x 4 x i32> %0) #0 {
240236
define <vscale x 4 x i32> @uxth_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1) #0 {
241237
; CHECK-LABEL: define <vscale x 4 x i32> @uxth_m_32(
242238
; CHECK-SAME: <vscale x 4 x i32> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
243-
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> [[TMP1]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[TMP0]])
239+
; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 4 x i32> [[TMP0]], splat (i32 65535)
244240
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
245241
;
246242
%3 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> %1, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %0)
@@ -250,8 +246,7 @@ define <vscale x 4 x i32> @uxth_m_32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %
250246
define <vscale x 4 x i32> @uxth_x_32(<vscale x 16 x i1> %0, <vscale x 4 x i32> %1) #0 {
251247
; CHECK-LABEL: define <vscale x 4 x i32> @uxth_x_32(
252248
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 4 x i32> [[TMP1:%.*]]) #[[ATTR0]] {
253-
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP0]])
254-
; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP3]], <vscale x 4 x i32> [[TMP1]])
249+
; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 4 x i32> [[TMP1]], splat (i32 65535)
255250
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
256251
;
257252
%3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %0)
@@ -286,7 +281,7 @@ define <vscale x 4 x i32> @uxth_m_32_no_ptrue(<vscale x 16 x i1> %0, <vscale x 4
286281
define <vscale x 2 x i64> @uxtw_z_64(<vscale x 2 x i64> %0) #0 {
287282
; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_z_64(
288283
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
289-
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]])
284+
; CHECK-NEXT: [[TMP2:%.*]] = and <vscale x 2 x i64> [[TMP0]], splat (i64 4294967295)
290285
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
291286
;
292287
%2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
@@ -296,7 +291,7 @@ define <vscale x 2 x i64> @uxtw_z_64(<vscale x 2 x i64> %0) #0 {
296291
define <vscale x 2 x i64> @uxtw_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
297292
; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_m_64(
298293
; CHECK-SAME: <vscale x 2 x i64> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
299-
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> [[TMP1]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[TMP0]])
294+
; CHECK-NEXT: [[TMP3:%.*]] = and <vscale x 2 x i64> [[TMP0]], splat (i64 4294967295)
300295
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
301296
;
302297
%3 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> %1, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> %0)
@@ -306,8 +301,7 @@ define <vscale x 2 x i64> @uxtw_m_64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %
306301
define <vscale x 2 x i64> @uxtw_x_64(<vscale x 16 x i1> %0, <vscale x 2 x i64> %1) #0 {
307302
; CHECK-LABEL: define <vscale x 2 x i64> @uxtw_x_64(
308303
; CHECK-SAME: <vscale x 16 x i1> [[TMP0:%.*]], <vscale x 2 x i64> [[TMP1:%.*]]) #[[ATTR0]] {
309-
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[TMP0]])
310-
; CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP3]], <vscale x 2 x i64> [[TMP1]])
304+
; CHECK-NEXT: [[TMP4:%.*]] = and <vscale x 2 x i64> [[TMP1]], splat (i64 4294967295)
311305
; CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
312306
;
313307
%3 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %0)

0 commit comments

Comments
 (0)