@@ -207,140 +207,38 @@ define <4 x float> @fneg(<4 x float> %Q) nounwind {
207207
208208; store(fneg(load())) - convert scalar to integer
209209define void @fneg_int_rmw_half (ptr %ptr ) nounwind {
210- ; X86-SSE1-LABEL: fneg_int_rmw_half:
211- ; X86-SSE1: # %bb.0:
212- ; X86-SSE1-NEXT: pushl %esi
213- ; X86-SSE1-NEXT: subl $8, %esp
214- ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
215- ; X86-SSE1-NEXT: movzwl (%esi), %eax
216- ; X86-SSE1-NEXT: movl %eax, (%esp)
217- ; X86-SSE1-NEXT: calll __gnu_h2f_ieee
218- ; X86-SSE1-NEXT: fstps {{[0-9]+}}(%esp)
219- ; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
220- ; X86-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
221- ; X86-SSE1-NEXT: movss %xmm0, (%esp)
222- ; X86-SSE1-NEXT: calll __gnu_f2h_ieee
223- ; X86-SSE1-NEXT: movw %ax, (%esi)
224- ; X86-SSE1-NEXT: addl $8, %esp
225- ; X86-SSE1-NEXT: popl %esi
226- ; X86-SSE1-NEXT: retl
227- ;
228- ; X86-SSE2-LABEL: fneg_int_rmw_half:
229- ; X86-SSE2: # %bb.0:
230- ; X86-SSE2-NEXT: pushl %esi
231- ; X86-SSE2-NEXT: subl $8, %esp
232- ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
233- ; X86-SSE2-NEXT: pinsrw $0, (%esi), %xmm0
234- ; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
235- ; X86-SSE2-NEXT: movw %ax, (%esp)
236- ; X86-SSE2-NEXT: calll __extendhfsf2
237- ; X86-SSE2-NEXT: fstps {{[0-9]+}}(%esp)
238- ; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
239- ; X86-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
240- ; X86-SSE2-NEXT: movd %xmm0, (%esp)
241- ; X86-SSE2-NEXT: calll __truncsfhf2
242- ; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
243- ; X86-SSE2-NEXT: movw %ax, (%esi)
244- ; X86-SSE2-NEXT: addl $8, %esp
245- ; X86-SSE2-NEXT: popl %esi
246- ; X86-SSE2-NEXT: retl
247- ;
248- ; X64-SSE1-LABEL: fneg_int_rmw_half:
249- ; X64-SSE1: # %bb.0:
250- ; X64-SSE1-NEXT: pushq %rbx
251- ; X64-SSE1-NEXT: movq %rdi, %rbx
252- ; X64-SSE1-NEXT: movzwl (%rdi), %edi
253- ; X64-SSE1-NEXT: callq __gnu_h2f_ieee@PLT
254- ; X64-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
255- ; X64-SSE1-NEXT: callq __gnu_f2h_ieee@PLT
256- ; X64-SSE1-NEXT: movw %ax, (%rbx)
257- ; X64-SSE1-NEXT: popq %rbx
258- ; X64-SSE1-NEXT: retq
210+ ; X86-SSE-LABEL: fneg_int_rmw_half:
211+ ; X86-SSE: # %bb.0:
212+ ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
213+ ; X86-SSE-NEXT: xorb $-128, 1(%eax)
214+ ; X86-SSE-NEXT: retl
259215;
260- ; X64-SSE2-LABEL: fneg_int_rmw_half:
261- ; X64-SSE2: # %bb.0:
262- ; X64-SSE2-NEXT: pushq %rbx
263- ; X64-SSE2-NEXT: movq %rdi, %rbx
264- ; X64-SSE2-NEXT: pinsrw $0, (%rdi), %xmm0
265- ; X64-SSE2-NEXT: callq __extendhfsf2@PLT
266- ; X64-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
267- ; X64-SSE2-NEXT: callq __truncsfhf2@PLT
268- ; X64-SSE2-NEXT: pextrw $0, %xmm0, %eax
269- ; X64-SSE2-NEXT: movw %ax, (%rbx)
270- ; X64-SSE2-NEXT: popq %rbx
271- ; X64-SSE2-NEXT: retq
216+ ; X64-SSE-LABEL: fneg_int_rmw_half:
217+ ; X64-SSE: # %bb.0:
218+ ; X64-SSE-NEXT: xorb $-128, 1(%rdi)
219+ ; X64-SSE-NEXT: retq
272220 %1 = load half , ptr %ptr
273221 %2 = fneg half %1
274222 store half %2 , ptr %ptr
275223 ret void
276224}
277225
278226define void @fneg_int_bfloat (ptr %src , ptr %dst ) nounwind {
279- ; X86-SSE1-LABEL: fneg_int_bfloat:
280- ; X86-SSE1: # %bb.0:
281- ; X86-SSE1-NEXT: pushl %esi
282- ; X86-SSE1-NEXT: subl $8, %esp
283- ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
284- ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
285- ; X86-SSE1-NEXT: movzwl (%eax), %eax
286- ; X86-SSE1-NEXT: shll $16, %eax
287- ; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
288- ; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
289- ; X86-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
290- ; X86-SSE1-NEXT: movss %xmm0, (%esp)
291- ; X86-SSE1-NEXT: calll __truncsfbf2
292- ; X86-SSE1-NEXT: movw %ax, (%esi)
293- ; X86-SSE1-NEXT: addl $8, %esp
294- ; X86-SSE1-NEXT: popl %esi
295- ; X86-SSE1-NEXT: retl
296- ;
297- ; X86-SSE2-LABEL: fneg_int_bfloat:
298- ; X86-SSE2: # %bb.0:
299- ; X86-SSE2-NEXT: pushl %esi
300- ; X86-SSE2-NEXT: pushl %eax
301- ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
302- ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
303- ; X86-SSE2-NEXT: movzwl (%eax), %eax
304- ; X86-SSE2-NEXT: shll $16, %eax
305- ; X86-SSE2-NEXT: movd %eax, %xmm0
306- ; X86-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
307- ; X86-SSE2-NEXT: movd %xmm0, (%esp)
308- ; X86-SSE2-NEXT: calll __truncsfbf2
309- ; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
310- ; X86-SSE2-NEXT: movw %ax, (%esi)
311- ; X86-SSE2-NEXT: addl $4, %esp
312- ; X86-SSE2-NEXT: popl %esi
313- ; X86-SSE2-NEXT: retl
314- ;
315- ; X64-SSE1-LABEL: fneg_int_bfloat:
316- ; X64-SSE1: # %bb.0:
317- ; X64-SSE1-NEXT: pushq %rbx
318- ; X64-SSE1-NEXT: subq $16, %rsp
319- ; X64-SSE1-NEXT: movq %rsi, %rbx
320- ; X64-SSE1-NEXT: movzwl (%rdi), %eax
321- ; X64-SSE1-NEXT: shll $16, %eax
322- ; X64-SSE1-NEXT: movl %eax, {{[0-9]+}}(%rsp)
323- ; X64-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
324- ; X64-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
325- ; X64-SSE1-NEXT: callq __truncsfbf2@PLT
326- ; X64-SSE1-NEXT: movw %ax, (%rbx)
327- ; X64-SSE1-NEXT: addq $16, %rsp
328- ; X64-SSE1-NEXT: popq %rbx
329- ; X64-SSE1-NEXT: retq
227+ ; X86-SSE-LABEL: fneg_int_bfloat:
228+ ; X86-SSE: # %bb.0:
229+ ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
230+ ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
231+ ; X86-SSE-NEXT: movzwl (%ecx), %ecx
232+ ; X86-SSE-NEXT: xorl $32768, %ecx # imm = 0x8000
233+ ; X86-SSE-NEXT: movw %cx, (%eax)
234+ ; X86-SSE-NEXT: retl
330235;
331- ; X64-SSE2-LABEL: fneg_int_bfloat:
332- ; X64-SSE2: # %bb.0:
333- ; X64-SSE2-NEXT: pushq %rbx
334- ; X64-SSE2-NEXT: movq %rsi, %rbx
335- ; X64-SSE2-NEXT: movzwl (%rdi), %eax
336- ; X64-SSE2-NEXT: shll $16, %eax
337- ; X64-SSE2-NEXT: movd %eax, %xmm0
338- ; X64-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
339- ; X64-SSE2-NEXT: callq __truncsfbf2@PLT
340- ; X64-SSE2-NEXT: pextrw $0, %xmm0, %eax
341- ; X64-SSE2-NEXT: movw %ax, (%rbx)
342- ; X64-SSE2-NEXT: popq %rbx
343- ; X64-SSE2-NEXT: retq
236+ ; X64-SSE-LABEL: fneg_int_bfloat:
237+ ; X64-SSE: # %bb.0:
238+ ; X64-SSE-NEXT: movzwl (%rdi), %eax
239+ ; X64-SSE-NEXT: xorl $32768, %eax # imm = 0x8000
240+ ; X64-SSE-NEXT: movw %ax, (%rsi)
241+ ; X64-SSE-NEXT: retq
344242 %1 = load bfloat, ptr %src
345243 %2 = fneg bfloat %1
346244 store bfloat %2 , ptr %dst
0 commit comments