@@ -789,12 +789,55 @@ define double @load_double_seq_cst(ptr %fptr) {
789789}
790790
791791define void @store_bfloat (ptr %fptr , bfloat %v ) {
792- ; X86-LABEL: store_bfloat:
793- ; X86: # %bb.0:
794- ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
795- ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
796- ; X86-NEXT: movw %cx, (%eax)
797- ; X86-NEXT: retl
792+ ; X86-SSE1-LABEL: store_bfloat:
793+ ; X86-SSE1: # %bb.0:
794+ ; X86-SSE1-NEXT: pushl %esi
795+ ; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
796+ ; X86-SSE1-NEXT: subl $8, %esp
797+ ; X86-SSE1-NEXT: .cfi_def_cfa_offset 16
798+ ; X86-SSE1-NEXT: .cfi_offset %esi, -8
799+ ; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
800+ ; X86-SSE1-NEXT: movss %xmm0, (%esp)
801+ ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
802+ ; X86-SSE1-NEXT: calll __truncsfbf2
803+ ; X86-SSE1-NEXT: movw %ax, (%esi)
804+ ; X86-SSE1-NEXT: addl $8, %esp
805+ ; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
806+ ; X86-SSE1-NEXT: popl %esi
807+ ; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
808+ ; X86-SSE1-NEXT: retl
809+ ;
810+ ; X86-SSE2-LABEL: store_bfloat:
811+ ; X86-SSE2: # %bb.0:
812+ ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
813+ ; X86-SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
814+ ; X86-SSE2-NEXT: movw %cx, (%eax)
815+ ; X86-SSE2-NEXT: retl
816+ ;
817+ ; X86-AVX-LABEL: store_bfloat:
818+ ; X86-AVX: # %bb.0:
819+ ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
820+ ; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
821+ ; X86-AVX-NEXT: movw %cx, (%eax)
822+ ; X86-AVX-NEXT: retl
823+ ;
824+ ; X86-NOSSE-LABEL: store_bfloat:
825+ ; X86-NOSSE: # %bb.0:
826+ ; X86-NOSSE-NEXT: pushl %esi
827+ ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
828+ ; X86-NOSSE-NEXT: subl $8, %esp
829+ ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 16
830+ ; X86-NOSSE-NEXT: .cfi_offset %esi, -8
831+ ; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp)
832+ ; X86-NOSSE-NEXT: fstps (%esp)
833+ ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
834+ ; X86-NOSSE-NEXT: calll __truncsfbf2
835+ ; X86-NOSSE-NEXT: movw %ax, (%esi)
836+ ; X86-NOSSE-NEXT: addl $8, %esp
837+ ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
838+ ; X86-NOSSE-NEXT: popl %esi
839+ ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
840+ ; X86-NOSSE-NEXT: retl
798841;
799842; X64-SSE-LABEL: store_bfloat:
800843; X64-SSE: # %bb.0:
@@ -811,8 +854,7 @@ define void @store_bfloat(ptr %fptr, bfloat %v) {
811854 ret void
812855}
813856
814- ; Work around issue #92899 by casting to float
815- define float @load_bfloat (ptr %fptr ) {
857+ define bfloat @load_bfloat (ptr %fptr ) {
816858; X86-SSE1-LABEL: load_bfloat:
817859; X86-SSE1: # %bb.0:
818860; X86-SSE1-NEXT: pushl %eax
@@ -828,30 +870,16 @@ define float @load_bfloat(ptr %fptr) {
828870;
829871; X86-SSE2-LABEL: load_bfloat:
830872; X86-SSE2: # %bb.0:
831- ; X86-SSE2-NEXT: pushl %eax
832- ; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
833873; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
834874; X86-SSE2-NEXT: movzwl (%eax), %eax
835- ; X86-SSE2-NEXT: shll $16, %eax
836- ; X86-SSE2-NEXT: movd %eax, %xmm0
837- ; X86-SSE2-NEXT: movd %xmm0, (%esp)
838- ; X86-SSE2-NEXT: flds (%esp)
839- ; X86-SSE2-NEXT: popl %eax
840- ; X86-SSE2-NEXT: .cfi_def_cfa_offset 4
875+ ; X86-SSE2-NEXT: pinsrw $0, %eax, %xmm0
841876; X86-SSE2-NEXT: retl
842877;
843878; X86-AVX-LABEL: load_bfloat:
844879; X86-AVX: # %bb.0:
845- ; X86-AVX-NEXT: pushl %eax
846- ; X86-AVX-NEXT: .cfi_def_cfa_offset 8
847880; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
848881; X86-AVX-NEXT: movzwl (%eax), %eax
849- ; X86-AVX-NEXT: shll $16, %eax
850- ; X86-AVX-NEXT: vmovd %eax, %xmm0
851- ; X86-AVX-NEXT: vmovd %xmm0, (%esp)
852- ; X86-AVX-NEXT: flds (%esp)
853- ; X86-AVX-NEXT: popl %eax
854- ; X86-AVX-NEXT: .cfi_def_cfa_offset 4
882+ ; X86-AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
855883; X86-AVX-NEXT: retl
856884;
857885; X86-NOSSE-LABEL: load_bfloat:
@@ -870,17 +898,14 @@ define float @load_bfloat(ptr %fptr) {
870898; X64-SSE-LABEL: load_bfloat:
871899; X64-SSE: # %bb.0:
872900; X64-SSE-NEXT: movzwl (%rdi), %eax
873- ; X64-SSE-NEXT: shll $16, %eax
874- ; X64-SSE-NEXT: movd %eax, %xmm0
901+ ; X64-SSE-NEXT: pinsrw $0, %eax, %xmm0
875902; X64-SSE-NEXT: retq
876903;
877904; X64-AVX-LABEL: load_bfloat:
878905; X64-AVX: # %bb.0:
879906; X64-AVX-NEXT: movzwl (%rdi), %eax
880- ; X64-AVX-NEXT: shll $16, %eax
881- ; X64-AVX-NEXT: vmovd %eax, %xmm0
907+ ; X64-AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
882908; X64-AVX-NEXT: retq
883909 %v = load atomic bfloat, ptr %fptr unordered , align 2
884- %ext = fpext bfloat %v to float
885- ret float %ext
910+ ret bfloat %v
886911}
0 commit comments