@@ -13,7 +13,7 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src,
1313; CHECK-UF1-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
1414; CHECK-UF1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP5]], 16
1515; CHECK-UF1-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
16- ; CHECK-UF1-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 16
16+ ; CHECK-UF1-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP17]], 4
1717; CHECK-UF1-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP18]]
1818; CHECK-UF1-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP18]]
1919; CHECK-UF1-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
@@ -39,18 +39,18 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src,
3939; CHECK-UF4-NEXT: entry:
4040; CHECK-UF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH1:%.*]]
4141; CHECK-UF4: vector.ph:
42- ; CHECK-UF4-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
43- ; CHECK-UF4-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 64
4442; CHECK-UF4-NEXT: [[TMP61:%.*]] = call i64 @llvm.vscale.i64()
4543; CHECK-UF4-NEXT: [[TMP62:%.*]] = mul nuw i64 [[TMP61]], 64
46- ; CHECK-UF4-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP62]]
47- ; CHECK-UF4-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP62]]
44+ ; CHECK-UF4-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
45+ ; CHECK-UF4-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 6
46+ ; CHECK-UF4-NEXT: [[TMP7:%.*]] = sub i64 [[N]], [[TMP3]]
47+ ; CHECK-UF4-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[N]], [[TMP3]]
4848; CHECK-UF4-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
4949; CHECK-UF4-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
50- ; CHECK-UF4-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 16
50+ ; CHECK-UF4-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 4
5151; CHECK-UF4-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP11]]
5252; CHECK-UF4-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
53- ; CHECK-UF4-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 32
53+ ; CHECK-UF4-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 5
5454; CHECK-UF4-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP13]]
5555; CHECK-UF4-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
5656; CHECK-UF4-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], 48
@@ -72,10 +72,10 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src,
7272; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 16 x i1> [ [[TMP19]], [[VECTOR_PH1]] ], [ [[TMP58:%.*]], [[VECTOR_BODY]] ]
7373; CHECK-UF4-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]]
7474; CHECK-UF4-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
75- ; CHECK-UF4-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 16
75+ ; CHECK-UF4-NEXT: [[TMP23:%.*]] = shl nuw i64 [[TMP22]], 4
7676; CHECK-UF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[TMP23]]
7777; CHECK-UF4-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64()
78- ; CHECK-UF4-NEXT: [[TMP32:%.*]] = mul nuw i64 [[TMP31]], 32
78+ ; CHECK-UF4-NEXT: [[TMP32:%.*]] = shl nuw i64 [[TMP31]], 5
7979; CHECK-UF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[TMP32]]
8080; CHECK-UF4-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64()
8181; CHECK-UF4-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP34]], 48
@@ -90,10 +90,10 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src,
9090; CHECK-UF4-NEXT: [[TMP28:%.*]] = mul <vscale x 16 x i8> [[WIDE_MASKED_LOAD11]], splat (i8 3)
9191; CHECK-UF4-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]]
9292; CHECK-UF4-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64()
93- ; CHECK-UF4-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], 16
93+ ; CHECK-UF4-NEXT: [[TMP38:%.*]] = shl nuw i64 [[TMP37]], 4
9494; CHECK-UF4-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[TMP38]]
9595; CHECK-UF4-NEXT: [[TMP40:%.*]] = call i64 @llvm.vscale.i64()
96- ; CHECK-UF4-NEXT: [[TMP41:%.*]] = mul nuw i64 [[TMP40]], 32
96+ ; CHECK-UF4-NEXT: [[TMP41:%.*]] = shl nuw i64 [[TMP40]], 5
9797; CHECK-UF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 [[TMP41]]
9898; CHECK-UF4-NEXT: [[TMP43:%.*]] = call i64 @llvm.vscale.i64()
9999; CHECK-UF4-NEXT: [[TMP44:%.*]] = mul nuw i64 [[TMP43]], 48
@@ -102,12 +102,12 @@ define void @scalable_wide_active_lane_mask(ptr noalias %dst, ptr readonly %src,
102102; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP26]], ptr [[TMP39]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK6]])
103103; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP27]], ptr [[TMP42]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK7]])
104104; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP28]], ptr [[TMP45]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK8]])
105- ; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6 ]]
105+ ; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP62 ]]
106106; CHECK-UF4-NEXT: [[TMP46:%.*]] = call i64 @llvm.vscale.i64()
107- ; CHECK-UF4-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP46]], 16
107+ ; CHECK-UF4-NEXT: [[TMP47:%.*]] = shl nuw i64 [[TMP46]], 4
108108; CHECK-UF4-NEXT: [[TMP48:%.*]] = add i64 [[INDEX]], [[TMP47]]
109109; CHECK-UF4-NEXT: [[TMP49:%.*]] = call i64 @llvm.vscale.i64()
110- ; CHECK-UF4-NEXT: [[TMP50:%.*]] = mul nuw i64 [[TMP49]], 32
110+ ; CHECK-UF4-NEXT: [[TMP50:%.*]] = shl nuw i64 [[TMP49]], 5
111111; CHECK-UF4-NEXT: [[TMP51:%.*]] = add i64 [[INDEX]], [[TMP50]]
112112; CHECK-UF4-NEXT: [[TMP52:%.*]] = call i64 @llvm.vscale.i64()
113113; CHECK-UF4-NEXT: [[TMP53:%.*]] = mul nuw i64 [[TMP52]], 48
@@ -155,7 +155,7 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl
155155; CHECK-UF1-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
156156; CHECK-UF1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP12]], 2
157157; CHECK-UF1-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
158- ; CHECK-UF1-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP2]], 2
158+ ; CHECK-UF1-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP2]], 1
159159; CHECK-UF1-NEXT: [[TMP10:%.*]] = sub i64 [[N]], [[TMP9]]
160160; CHECK-UF1-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[N]], [[TMP9]]
161161; CHECK-UF1-NEXT: [[TMP13:%.*]] = select i1 [[TMP11]], i64 [[TMP10]], i64 0
@@ -184,18 +184,18 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl
184184; CHECK-UF4: for.body.preheader:
185185; CHECK-UF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
186186; CHECK-UF4: vector.ph:
187- ; CHECK-UF4-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
188- ; CHECK-UF4-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP26]], 8
189187; CHECK-UF4-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
190188; CHECK-UF4-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
191- ; CHECK-UF4-NEXT: [[TMP31:%.*]] = sub i64 [[N]], [[TMP3]]
192- ; CHECK-UF4-NEXT: [[TMP56:%.*]] = icmp ugt i64 [[N]], [[TMP3]]
189+ ; CHECK-UF4-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
190+ ; CHECK-UF4-NEXT: [[TMP26:%.*]] = shl nuw i64 [[TMP4]], 3
191+ ; CHECK-UF4-NEXT: [[TMP31:%.*]] = sub i64 [[N]], [[TMP26]]
192+ ; CHECK-UF4-NEXT: [[TMP56:%.*]] = icmp ugt i64 [[N]], [[TMP26]]
193193; CHECK-UF4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = select i1 [[TMP56]], i64 [[TMP31]], i64 0
194194; CHECK-UF4-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
195- ; CHECK-UF4-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2
195+ ; CHECK-UF4-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1
196196; CHECK-UF4-NEXT: [[INDEX_PART_NEXT:%.*]] = add i64 0, [[TMP6]]
197197; CHECK-UF4-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
198- ; CHECK-UF4-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
198+ ; CHECK-UF4-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
199199; CHECK-UF4-NEXT: [[INDEX_PART_NEXT1:%.*]] = add i64 0, [[TMP8]]
200200; CHECK-UF4-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
201201; CHECK-UF4-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 6
@@ -217,10 +217,10 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl
217217; CHECK-UF4-NEXT: [[ACTIVE_LANE_MASK8:%.*]] = phi <vscale x 2 x i1> [ [[TMP14]], [[VECTOR_PH]] ], [ [[TMP53:%.*]], [[VECTOR_BODY]] ]
218218; CHECK-UF4-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[SRC]], i64 [[INDEX]]
219219; CHECK-UF4-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64()
220- ; CHECK-UF4-NEXT: [[TMP28:%.*]] = mul nuw i64 [[TMP27]], 2
220+ ; CHECK-UF4-NEXT: [[TMP28:%.*]] = shl nuw i64 [[TMP27]], 1
221221; CHECK-UF4-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP28]]
222222; CHECK-UF4-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
223- ; CHECK-UF4-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], 4
223+ ; CHECK-UF4-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2
224224; CHECK-UF4-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, ptr [[TMP15]], i64 [[TMP21]]
225225; CHECK-UF4-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64()
226226; CHECK-UF4-NEXT: [[TMP24:%.*]] = mul nuw i64 [[TMP23]], 6
@@ -235,10 +235,10 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl
235235; CHECK-UF4-NEXT: [[TMP19:%.*]] = fmul <vscale x 2 x double> [[WIDE_MASKED_LOAD11]], splat (double 3.000000e+00)
236236; CHECK-UF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
237237; CHECK-UF4-NEXT: [[TMP32:%.*]] = call i64 @llvm.vscale.i64()
238- ; CHECK-UF4-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], 2
238+ ; CHECK-UF4-NEXT: [[TMP33:%.*]] = shl nuw i64 [[TMP32]], 1
239239; CHECK-UF4-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP33]]
240240; CHECK-UF4-NEXT: [[TMP35:%.*]] = call i64 @llvm.vscale.i64()
241- ; CHECK-UF4-NEXT: [[TMP36:%.*]] = mul nuw i64 [[TMP35]], 4
241+ ; CHECK-UF4-NEXT: [[TMP36:%.*]] = shl nuw i64 [[TMP35]], 2
242242; CHECK-UF4-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP36]]
243243; CHECK-UF4-NEXT: [[TMP38:%.*]] = call i64 @llvm.vscale.i64()
244244; CHECK-UF4-NEXT: [[TMP39:%.*]] = mul nuw i64 [[TMP38]], 6
@@ -247,12 +247,12 @@ define void @scalable_wide_active_lane_mask_double(ptr noalias %dst, ptr readonl
247247; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP17]], ptr [[TMP34]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK6]])
248248; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP18]], ptr [[TMP37]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK7]])
249249; CHECK-UF4-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP19]], ptr [[TMP40]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK8]])
250- ; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4 ]]
250+ ; CHECK-UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP3 ]]
251251; CHECK-UF4-NEXT: [[TMP41:%.*]] = call i64 @llvm.vscale.i64()
252- ; CHECK-UF4-NEXT: [[TMP42:%.*]] = mul nuw i64 [[TMP41]], 2
252+ ; CHECK-UF4-NEXT: [[TMP42:%.*]] = shl nuw i64 [[TMP41]], 1
253253; CHECK-UF4-NEXT: [[TMP43:%.*]] = add i64 [[INDEX]], [[TMP42]]
254254; CHECK-UF4-NEXT: [[TMP44:%.*]] = call i64 @llvm.vscale.i64()
255- ; CHECK-UF4-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], 4
255+ ; CHECK-UF4-NEXT: [[TMP45:%.*]] = shl nuw i64 [[TMP44]], 2
256256; CHECK-UF4-NEXT: [[TMP46:%.*]] = add i64 [[INDEX]], [[TMP45]]
257257; CHECK-UF4-NEXT: [[TMP47:%.*]] = call i64 @llvm.vscale.i64()
258258; CHECK-UF4-NEXT: [[TMP48:%.*]] = mul nuw i64 [[TMP47]], 6
0 commit comments