@@ -137,6 +137,64 @@ for.body:
137137}
138138
139139define ptr @test4 () {
140+ ; POWEROF2-LABEL: @test4(
141+ ; POWEROF2-NEXT: [[TMP1:%.*]] = fadd <8 x float> zeroinitializer, zeroinitializer
142+ ; POWEROF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 1, i32 2>
143+ ; POWEROF2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 5, i32 6>
144+ ; POWEROF2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 4, i32 0>
145+ ; POWEROF2-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP2]], i64 0)
146+ ; POWEROF2-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP5]], <2 x float> [[TMP3]], i64 2)
147+ ; POWEROF2-NEXT: br label [[TMP8:%.*]]
148+ ; POWEROF2: 7:
149+ ; POWEROF2-NEXT: br label [[TMP8]]
150+ ; POWEROF2: 8:
151+ ; POWEROF2-NEXT: [[TMP9:%.*]] = phi <2 x float> [ poison, [[TMP7:%.*]] ], [ [[TMP4]], [[TMP0:%.*]] ]
152+ ; POWEROF2-NEXT: [[TMP10:%.*]] = phi <4 x float> [ poison, [[TMP7]] ], [ [[TMP6]], [[TMP0]] ]
153+ ; POWEROF2-NEXT: br label [[TMP11:%.*]]
154+ ; POWEROF2: 11:
155+ ; POWEROF2-NEXT: [[TMP12:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP10]], i64 0)
156+ ; POWEROF2-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP12]], zeroinitializer
157+ ; POWEROF2-NEXT: [[TMP14:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP10]], i64 2)
158+ ; POWEROF2-NEXT: [[TMP15:%.*]] = fmul <2 x float> zeroinitializer, [[TMP14]]
159+ ; POWEROF2-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP9]], i32 1
160+ ; POWEROF2-NEXT: [[TMP17:%.*]] = fmul float 0.000000e+00, [[TMP16]]
161+ ; POWEROF2-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP9]], i32 0
162+ ; POWEROF2-NEXT: [[TMP19:%.*]] = fmul float [[TMP18]], 0.000000e+00
163+ ; POWEROF2-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP13]], i32 0
164+ ; POWEROF2-NEXT: [[TMP21:%.*]] = fadd reassoc nsz float [[TMP20]], [[TMP17]]
165+ ; POWEROF2-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[TMP15]], i32 0
166+ ; POWEROF2-NEXT: [[TMP23:%.*]] = fadd reassoc nsz float [[TMP22]], [[TMP19]]
167+ ; POWEROF2-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[TMP13]], i32 1
168+ ; POWEROF2-NEXT: [[TMP25:%.*]] = fadd reassoc nsz float [[TMP21]], [[TMP24]]
169+ ; POWEROF2-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[TMP15]], i32 1
170+ ; POWEROF2-NEXT: [[TMP27:%.*]] = fadd reassoc nsz float [[TMP23]], [[TMP26]]
171+ ; POWEROF2-NEXT: [[TMP28:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP25]])
172+ ; POWEROF2-NEXT: [[TMP29:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP27]])
173+ ; POWEROF2-NEXT: ret ptr null
174+ ;
175+ ; NONPOWEROF2-LABEL: @test4(
176+ ; NONPOWEROF2-NEXT: [[TMP1:%.*]] = fadd <8 x float> zeroinitializer, zeroinitializer
177+ ; NONPOWEROF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
178+ ; NONPOWEROF2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <3 x i32> <i32 4, i32 5, i32 6>
179+ ; NONPOWEROF2-NEXT: [[TMP4:%.*]] = call <6 x float> @llvm.vector.insert.v6f32.v3f32(<6 x float> poison, <3 x float> [[TMP2]], i64 0)
180+ ; NONPOWEROF2-NEXT: [[TMP5:%.*]] = call <6 x float> @llvm.vector.insert.v6f32.v3f32(<6 x float> [[TMP4]], <3 x float> [[TMP3]], i64 3)
181+ ; NONPOWEROF2-NEXT: br label [[TMP7:%.*]]
182+ ; NONPOWEROF2: 6:
183+ ; NONPOWEROF2-NEXT: br label [[TMP7]]
184+ ; NONPOWEROF2: 7:
185+ ; NONPOWEROF2-NEXT: [[TMP8:%.*]] = phi <6 x float> [ poison, [[TMP6:%.*]] ], [ [[TMP5]], [[TMP0:%.*]] ]
186+ ; NONPOWEROF2-NEXT: br label [[TMP9:%.*]]
187+ ; NONPOWEROF2: 9:
188+ ; NONPOWEROF2-NEXT: [[TMP10:%.*]] = call <3 x float> @llvm.vector.extract.v3f32.v6f32(<6 x float> [[TMP8]], i64 0)
189+ ; NONPOWEROF2-NEXT: [[TMP11:%.*]] = fmul <3 x float> zeroinitializer, [[TMP10]]
190+ ; NONPOWEROF2-NEXT: [[TMP12:%.*]] = call <3 x float> @llvm.vector.extract.v3f32.v6f32(<6 x float> [[TMP8]], i64 3)
191+ ; NONPOWEROF2-NEXT: [[TMP13:%.*]] = fmul <3 x float> zeroinitializer, [[TMP12]]
192+ ; NONPOWEROF2-NEXT: [[TMP14:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP11]])
193+ ; NONPOWEROF2-NEXT: [[TMP15:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP13]])
194+ ; NONPOWEROF2-NEXT: [[TMP16:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP14]])
195+ ; NONPOWEROF2-NEXT: [[TMP17:%.*]] = tail call float @llvm.sqrt.f32(float [[TMP15]])
196+ ; NONPOWEROF2-NEXT: ret ptr null
197+ ;
140198 %1 = fadd <8 x float > zeroinitializer , zeroinitializer
141199 %2 = extractelement <8 x float > %1 , i64 0
142200 %3 = extractelement <8 x float > %1 , i64 1
0 commit comments