@@ -206,6 +206,147 @@ define <4 x float> @fneg(<4 x float> %Q) nounwind {
206206}
207207
208208; store(fneg(load())) - convert scalar to integer
209+ define void @fneg_int_rmw_half (ptr %ptr ) nounwind {
210+ ; X86-SSE1-LABEL: fneg_int_rmw_half:
211+ ; X86-SSE1: # %bb.0:
212+ ; X86-SSE1-NEXT: pushl %esi
213+ ; X86-SSE1-NEXT: subl $8, %esp
214+ ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
215+ ; X86-SSE1-NEXT: movzwl (%esi), %eax
216+ ; X86-SSE1-NEXT: movl %eax, (%esp)
217+ ; X86-SSE1-NEXT: calll __gnu_h2f_ieee
218+ ; X86-SSE1-NEXT: fstps {{[0-9]+}}(%esp)
219+ ; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
220+ ; X86-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
221+ ; X86-SSE1-NEXT: movss %xmm0, (%esp)
222+ ; X86-SSE1-NEXT: calll __gnu_f2h_ieee
223+ ; X86-SSE1-NEXT: movw %ax, (%esi)
224+ ; X86-SSE1-NEXT: addl $8, %esp
225+ ; X86-SSE1-NEXT: popl %esi
226+ ; X86-SSE1-NEXT: retl
227+ ;
228+ ; X86-SSE2-LABEL: fneg_int_rmw_half:
229+ ; X86-SSE2: # %bb.0:
230+ ; X86-SSE2-NEXT: pushl %esi
231+ ; X86-SSE2-NEXT: subl $8, %esp
232+ ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
233+ ; X86-SSE2-NEXT: pinsrw $0, (%esi), %xmm0
234+ ; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
235+ ; X86-SSE2-NEXT: movw %ax, (%esp)
236+ ; X86-SSE2-NEXT: calll __extendhfsf2
237+ ; X86-SSE2-NEXT: fstps {{[0-9]+}}(%esp)
238+ ; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
239+ ; X86-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
240+ ; X86-SSE2-NEXT: movd %xmm0, (%esp)
241+ ; X86-SSE2-NEXT: calll __truncsfhf2
242+ ; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
243+ ; X86-SSE2-NEXT: movw %ax, (%esi)
244+ ; X86-SSE2-NEXT: addl $8, %esp
245+ ; X86-SSE2-NEXT: popl %esi
246+ ; X86-SSE2-NEXT: retl
247+ ;
248+ ; X64-SSE1-LABEL: fneg_int_rmw_half:
249+ ; X64-SSE1: # %bb.0:
250+ ; X64-SSE1-NEXT: pushq %rbx
251+ ; X64-SSE1-NEXT: movq %rdi, %rbx
252+ ; X64-SSE1-NEXT: movzwl (%rdi), %edi
253+ ; X64-SSE1-NEXT: callq __gnu_h2f_ieee@PLT
254+ ; X64-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
255+ ; X64-SSE1-NEXT: callq __gnu_f2h_ieee@PLT
256+ ; X64-SSE1-NEXT: movw %ax, (%rbx)
257+ ; X64-SSE1-NEXT: popq %rbx
258+ ; X64-SSE1-NEXT: retq
259+ ;
260+ ; X64-SSE2-LABEL: fneg_int_rmw_half:
261+ ; X64-SSE2: # %bb.0:
262+ ; X64-SSE2-NEXT: pushq %rbx
263+ ; X64-SSE2-NEXT: movq %rdi, %rbx
264+ ; X64-SSE2-NEXT: pinsrw $0, (%rdi), %xmm0
265+ ; X64-SSE2-NEXT: callq __extendhfsf2@PLT
266+ ; X64-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
267+ ; X64-SSE2-NEXT: callq __truncsfhf2@PLT
268+ ; X64-SSE2-NEXT: pextrw $0, %xmm0, %eax
269+ ; X64-SSE2-NEXT: movw %ax, (%rbx)
270+ ; X64-SSE2-NEXT: popq %rbx
271+ ; X64-SSE2-NEXT: retq
272+ %1 = load half , ptr %ptr
273+ %2 = fneg half %1
274+ store half %2 , ptr %ptr
275+ ret void
276+ }
277+
278+ define void @fneg_int_bfloat (ptr %src , ptr %dst ) nounwind {
279+ ; X86-SSE1-LABEL: fneg_int_bfloat:
280+ ; X86-SSE1: # %bb.0:
281+ ; X86-SSE1-NEXT: pushl %esi
282+ ; X86-SSE1-NEXT: subl $8, %esp
283+ ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
284+ ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
285+ ; X86-SSE1-NEXT: movzwl (%eax), %eax
286+ ; X86-SSE1-NEXT: shll $16, %eax
287+ ; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
288+ ; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
289+ ; X86-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
290+ ; X86-SSE1-NEXT: movss %xmm0, (%esp)
291+ ; X86-SSE1-NEXT: calll __truncsfbf2
292+ ; X86-SSE1-NEXT: movw %ax, (%esi)
293+ ; X86-SSE1-NEXT: addl $8, %esp
294+ ; X86-SSE1-NEXT: popl %esi
295+ ; X86-SSE1-NEXT: retl
296+ ;
297+ ; X86-SSE2-LABEL: fneg_int_bfloat:
298+ ; X86-SSE2: # %bb.0:
299+ ; X86-SSE2-NEXT: pushl %esi
300+ ; X86-SSE2-NEXT: pushl %eax
301+ ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
302+ ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
303+ ; X86-SSE2-NEXT: movzwl (%eax), %eax
304+ ; X86-SSE2-NEXT: shll $16, %eax
305+ ; X86-SSE2-NEXT: movd %eax, %xmm0
306+ ; X86-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
307+ ; X86-SSE2-NEXT: movd %xmm0, (%esp)
308+ ; X86-SSE2-NEXT: calll __truncsfbf2
309+ ; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
310+ ; X86-SSE2-NEXT: movw %ax, (%esi)
311+ ; X86-SSE2-NEXT: addl $4, %esp
312+ ; X86-SSE2-NEXT: popl %esi
313+ ; X86-SSE2-NEXT: retl
314+ ;
315+ ; X64-SSE1-LABEL: fneg_int_bfloat:
316+ ; X64-SSE1: # %bb.0:
317+ ; X64-SSE1-NEXT: pushq %rbx
318+ ; X64-SSE1-NEXT: subq $16, %rsp
319+ ; X64-SSE1-NEXT: movq %rsi, %rbx
320+ ; X64-SSE1-NEXT: movzwl (%rdi), %eax
321+ ; X64-SSE1-NEXT: shll $16, %eax
322+ ; X64-SSE1-NEXT: movl %eax, {{[0-9]+}}(%rsp)
323+ ; X64-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
324+ ; X64-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
325+ ; X64-SSE1-NEXT: callq __truncsfbf2@PLT
326+ ; X64-SSE1-NEXT: movw %ax, (%rbx)
327+ ; X64-SSE1-NEXT: addq $16, %rsp
328+ ; X64-SSE1-NEXT: popq %rbx
329+ ; X64-SSE1-NEXT: retq
330+ ;
331+ ; X64-SSE2-LABEL: fneg_int_bfloat:
332+ ; X64-SSE2: # %bb.0:
333+ ; X64-SSE2-NEXT: pushq %rbx
334+ ; X64-SSE2-NEXT: movq %rsi, %rbx
335+ ; X64-SSE2-NEXT: movzwl (%rdi), %eax
336+ ; X64-SSE2-NEXT: shll $16, %eax
337+ ; X64-SSE2-NEXT: movd %eax, %xmm0
338+ ; X64-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
339+ ; X64-SSE2-NEXT: callq __truncsfbf2@PLT
340+ ; X64-SSE2-NEXT: pextrw $0, %xmm0, %eax
341+ ; X64-SSE2-NEXT: movw %ax, (%rbx)
342+ ; X64-SSE2-NEXT: popq %rbx
343+ ; X64-SSE2-NEXT: retq
344+ %1 = load bfloat, ptr %src
345+ %2 = fneg bfloat %1
346+ store bfloat %2 , ptr %dst
347+ ret void
348+ }
349+
209350define void @fneg_int_rmw_f32 (ptr %ptr ) {
210351; X86-SSE-LABEL: fneg_int_rmw_f32:
211352; X86-SSE: # %bb.0:
0 commit comments