@@ -67,61 +67,18 @@ exit:
6767define void @test_replicating_store_x86_fp80_cost (i32 %n , ptr %dst ) #0 {
6868; COST-LABEL: define void @test_replicating_store_x86_fp80_cost(
6969; COST-SAME: i32 [[N:%.*]], ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
70- ; COST-NEXT: [[ENTRY:.*:]]
71- ; COST-NEXT: [[TMP0:%.*]] = add i32 [[N]], 2
72- ; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8
73- ; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
74- ; COST: [[VECTOR_SCEVCHECK]]:
75- ; COST-NEXT: [[TMP1:%.*]] = zext i32 [[N]] to i64
76- ; COST-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
77- ; COST-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP2]], 4294967295
78- ; COST-NEXT: br i1 [[TMP3]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
79- ; COST: [[VECTOR_PH]]:
80- ; COST-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 8
81- ; COST-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
82- ; COST-NEXT: br label %[[VECTOR_BODY:.*]]
83- ; COST: [[VECTOR_BODY]]:
84- ; COST-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
85- ; COST-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
86- ; COST-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
87- ; COST-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
88- ; COST-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i32> [[STEP_ADD_2]], splat (i32 2)
89- ; COST-NEXT: [[TMP4:%.*]] = zext <2 x i32> [[VEC_IND]] to <2 x i64>
90- ; COST-NEXT: [[TMP5:%.*]] = zext <2 x i32> [[STEP_ADD]] to <2 x i64>
91- ; COST-NEXT: [[TMP6:%.*]] = zext <2 x i32> [[STEP_ADD_2]] to <2 x i64>
92- ; COST-NEXT: [[TMP7:%.*]] = zext <2 x i32> [[STEP_ADD_3]] to <2 x i64>
93- ; COST-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
94- ; COST-NEXT: [[TMP9:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP8]]
95- ; COST-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
96- ; COST-NEXT: [[TMP11:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP10]]
97- ; COST-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
98- ; COST-NEXT: [[TMP13:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP12]]
99- ; COST-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
100- ; COST-NEXT: [[TMP15:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP14]]
101- ; COST-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0
102- ; COST-NEXT: [[TMP17:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP16]]
103- ; COST-NEXT: [[TMP18:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
104- ; COST-NEXT: [[TMP19:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP18]]
105- ; COST-NEXT: [[TMP20:%.*]] = extractelement <2 x i64> [[TMP7]], i32 0
106- ; COST-NEXT: [[TMP21:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP20]]
107- ; COST-NEXT: [[TMP22:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1
70+ ; COST-NEXT: [[ENTRY:.*]]:
71+ ; COST-NEXT: br label %[[LOOP:.*]]
72+ ; COST: [[LOOP]]:
73+ ; COST-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
74+ ; COST-NEXT: [[TMP22:%.*]] = zext i32 [[IV]] to i64
10875; COST-NEXT: [[TMP23:%.*]] = getelementptr x86_fp80, ptr [[DST]], i64 [[TMP22]]
109- ; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP9]], align 16
110- ; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP11]], align 16
111- ; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP13]], align 16
112- ; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP15]], align 16
113- ; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP17]], align 16
114- ; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP19]], align 16
115- ; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP21]], align 16
11676; COST-NEXT: store x86_fp80 0xK00000000000000000000, ptr [[TMP23]], align 16
117- ; COST-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
118- ; COST-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD_3]], splat (i32 2)
119- ; COST-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
120- ; COST-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
121- ; COST: [[MIDDLE_BLOCK]]:
122- ; COST-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
123- ; COST-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
124- ; COST: [[SCALAR_PH]]:
77+ ; COST-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
78+ ; COST-NEXT: [[EC:%.*]] = icmp ugt i32 [[IV]], [[N]]
79+ ; COST-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
80+ ; COST: [[EXIT]]:
81+ ; COST-NEXT: ret void
12582;
12683; FORCED-LABEL: define void @test_replicating_store_x86_fp80_cost(
12784; FORCED-SAME: i32 [[N:%.*]], ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
0 commit comments