@@ -205,4 +205,85 @@ define <4 x float> @fneg(<4 x float> %Q) nounwind {
205205 ret <4 x float > %tmp
206206}
207207
208+ ; TODO: store(fneg(load())) - convert scalar to integer
209+ define void @fneg_int_rmw_f32 (ptr %ptr ) {
210+ ; X86-SSE-LABEL: fneg_int_rmw_f32:
211+ ; X86-SSE: # %bb.0:
212+ ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
213+ ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
214+ ; X86-SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
215+ ; X86-SSE-NEXT: movss %xmm0, (%eax)
216+ ; X86-SSE-NEXT: retl
217+ ;
218+ ; X64-SSE-LABEL: fneg_int_rmw_f32:
219+ ; X64-SSE: # %bb.0:
220+ ; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
221+ ; X64-SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
222+ ; X64-SSE-NEXT: movss %xmm0, (%rdi)
223+ ; X64-SSE-NEXT: retq
224+ %1 = load float , ptr %ptr
225+ %2 = fneg float %1
226+ store float %2 , ptr %ptr
227+ ret void
228+ }
229+
230+ define void @fneg_int_f64 (ptr %src , ptr %dst ) {
231+ ; X86-SSE1-LABEL: fneg_int_f64:
232+ ; X86-SSE1: # %bb.0:
233+ ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
234+ ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx
235+ ; X86-SSE1-NEXT: fldl (%ecx)
236+ ; X86-SSE1-NEXT: fchs
237+ ; X86-SSE1-NEXT: fstpl (%eax)
238+ ; X86-SSE1-NEXT: retl
239+ ;
240+ ; X86-SSE2-LABEL: fneg_int_f64:
241+ ; X86-SSE2: # %bb.0:
242+ ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
243+ ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
244+ ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
245+ ; X86-SSE2-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
246+ ; X86-SSE2-NEXT: movlps %xmm0, (%eax)
247+ ; X86-SSE2-NEXT: retl
248+ ;
249+ ; X64-SSE1-LABEL: fneg_int_f64:
250+ ; X64-SSE1: # %bb.0:
251+ ; X64-SSE1-NEXT: fldl (%rdi)
252+ ; X64-SSE1-NEXT: fchs
253+ ; X64-SSE1-NEXT: fstpl (%rsi)
254+ ; X64-SSE1-NEXT: retq
255+ ;
256+ ; X64-SSE2-LABEL: fneg_int_f64:
257+ ; X64-SSE2: # %bb.0:
258+ ; X64-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
259+ ; X64-SSE2-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
260+ ; X64-SSE2-NEXT: movlps %xmm0, (%rsi)
261+ ; X64-SSE2-NEXT: retq
262+ %1 = load double , ptr %src
263+ %2 = fneg double %1
264+ store double %2 , ptr %dst
265+ ret void
266+ }
208267
268+ ; don't convert vector to scalar
269+ define void @fneg_int_v4f32 (ptr %src , ptr %dst ) {
270+ ; X86-SSE-LABEL: fneg_int_v4f32:
271+ ; X86-SSE: # %bb.0:
272+ ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
273+ ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
274+ ; X86-SSE-NEXT: movaps (%ecx), %xmm0
275+ ; X86-SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
276+ ; X86-SSE-NEXT: movaps %xmm0, (%eax)
277+ ; X86-SSE-NEXT: retl
278+ ;
279+ ; X64-SSE-LABEL: fneg_int_v4f32:
280+ ; X64-SSE: # %bb.0:
281+ ; X64-SSE-NEXT: movaps (%rdi), %xmm0
282+ ; X64-SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
283+ ; X64-SSE-NEXT: movaps %xmm0, (%rsi)
284+ ; X64-SSE-NEXT: retq
285+ %1 = load <4 x float >, ptr %src
286+ %2 = fneg <4 x float > %1
287+ store <4 x float > %2 , ptr %dst
288+ ret void
289+ }
0 commit comments