22; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -S | FileCheck %s
33
44; Check that we don't unnecessarily broadcast %pow
5- define void @powi (ptr noalias %p , i32 %pow ) {
6- ; CHECK-LABEL: define void @powi (
7- ; CHECK-SAME: ptr noalias [[P:%.*]], i32 [[POW:%.*]]) {
5+ define void @powi_only_first_lane_used_of_second_arg (ptr %p , i32 %pow ) {
6+ ; CHECK-LABEL: define void @powi_only_first_lane_used_of_second_arg (
7+ ; CHECK-SAME: ptr [[P:%.*]], i32 [[POW:%.*]]) {
88; CHECK-NEXT: [[ENTRY:.*]]:
99; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1010; CHECK: [[VECTOR_PH]]:
@@ -14,11 +14,11 @@ define void @powi(ptr noalias %p, i32 %pow) {
1414; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[P]], i32 [[INDEX]]
1515; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TMP0]], i32 0
1616; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
17- ; CHECK-NEXT: [[TMP3 :%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[WIDE_LOAD]], i32 [[POW]])
18- ; CHECK-NEXT: store <4 x float> [[TMP3 ]], ptr [[TMP1]], align 4
17+ ; CHECK-NEXT: [[TMP2 :%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[WIDE_LOAD]], i32 [[POW]])
18+ ; CHECK-NEXT: store <4 x float> [[TMP2 ]], ptr [[TMP1]], align 4
1919; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
20- ; CHECK-NEXT: [[TMP4 :%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
21- ; CHECK-NEXT: br i1 [[TMP4 ]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
20+ ; CHECK-NEXT: [[TMP3 :%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
21+ ; CHECK-NEXT: br i1 [[TMP3 ]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
2222; CHECK: [[MIDDLE_BLOCK]]:
2323; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
2424; CHECK: [[SCALAR_PH]]:
0 commit comments