@@ -629,26 +629,32 @@ define void @atomic_umin_uint_max(ptr %addr) {
629629;
630630; X64-LABEL: atomic_umin_uint_max:
631631; X64: # %bb.0:
632+ ; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
632633; X64-NEXT: movl (%rdi), %eax
633- ; X64-NEXT: .p2align 4
634- ; X64-NEXT: .LBB15_1: # %atomicrmw.start
635- ; X64-NEXT: # =>This Inner Loop Header: Depth=1
636- ; X64-NEXT: lock cmpxchgl %eax, (%rdi)
637- ; X64-NEXT: jne .LBB15_1
638- ; X64-NEXT: # %bb.2: # %atomicrmw.end
639634; X64-NEXT: retq
640635;
641- ; X86-LABEL: atomic_umin_uint_max:
642- ; X86: # %bb.0:
643- ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
644- ; X86-NEXT: movl (%ecx), %eax
645- ; X86-NEXT: .p2align 4
646- ; X86-NEXT: .LBB15_1: # %atomicrmw.start
647- ; X86-NEXT: # =>This Inner Loop Header: Depth=1
648- ; X86-NEXT: lock cmpxchgl %eax, (%ecx)
649- ; X86-NEXT: jne .LBB15_1
650- ; X86-NEXT: # %bb.2: # %atomicrmw.end
651- ; X86-NEXT: retl
636+ ; X86-SSE2-LABEL: atomic_umin_uint_max:
637+ ; X86-SSE2: # %bb.0:
638+ ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
639+ ; X86-SSE2-NEXT: mfence
640+ ; X86-SSE2-NEXT: movl (%eax), %eax
641+ ; X86-SSE2-NEXT: retl
642+ ;
643+ ; X86-SLM-LABEL: atomic_umin_uint_max:
644+ ; X86-SLM: # %bb.0:
645+ ; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
646+ ; X86-SLM-NEXT: lock orl $0, (%esp)
647+ ; X86-SLM-NEXT: movl (%eax), %eax
648+ ; X86-SLM-NEXT: retl
649+ ;
650+ ; X86-ATOM-LABEL: atomic_umin_uint_max:
651+ ; X86-ATOM: # %bb.0:
652+ ; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
653+ ; X86-ATOM-NEXT: lock orl $0, (%esp)
654+ ; X86-ATOM-NEXT: movl (%eax), %eax
655+ ; X86-ATOM-NEXT: nop
656+ ; X86-ATOM-NEXT: nop
657+ ; X86-ATOM-NEXT: retl
652658 atomicrmw umin ptr %addr , i32 -1 seq_cst
653659 ret void
654660}
@@ -660,26 +666,32 @@ define void @atomic_umax_zero(ptr %addr) {
660666;
661667; X64-LABEL: atomic_umax_zero:
662668; X64: # %bb.0:
669+ ; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
663670; X64-NEXT: movl (%rdi), %eax
664- ; X64-NEXT: .p2align 4
665- ; X64-NEXT: .LBB16_1: # %atomicrmw.start
666- ; X64-NEXT: # =>This Inner Loop Header: Depth=1
667- ; X64-NEXT: lock cmpxchgl %eax, (%rdi)
668- ; X64-NEXT: jne .LBB16_1
669- ; X64-NEXT: # %bb.2: # %atomicrmw.end
670671; X64-NEXT: retq
671672;
672- ; X86-LABEL: atomic_umax_zero:
673- ; X86: # %bb.0:
674- ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
675- ; X86-NEXT: movl (%ecx), %eax
676- ; X86-NEXT: .p2align 4
677- ; X86-NEXT: .LBB16_1: # %atomicrmw.start
678- ; X86-NEXT: # =>This Inner Loop Header: Depth=1
679- ; X86-NEXT: lock cmpxchgl %eax, (%ecx)
680- ; X86-NEXT: jne .LBB16_1
681- ; X86-NEXT: # %bb.2: # %atomicrmw.end
682- ; X86-NEXT: retl
673+ ; X86-SSE2-LABEL: atomic_umax_zero:
674+ ; X86-SSE2: # %bb.0:
675+ ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
676+ ; X86-SSE2-NEXT: mfence
677+ ; X86-SSE2-NEXT: movl (%eax), %eax
678+ ; X86-SSE2-NEXT: retl
679+ ;
680+ ; X86-SLM-LABEL: atomic_umax_zero:
681+ ; X86-SLM: # %bb.0:
682+ ; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
683+ ; X86-SLM-NEXT: lock orl $0, (%esp)
684+ ; X86-SLM-NEXT: movl (%eax), %eax
685+ ; X86-SLM-NEXT: retl
686+ ;
687+ ; X86-ATOM-LABEL: atomic_umax_zero:
688+ ; X86-ATOM: # %bb.0:
689+ ; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
690+ ; X86-ATOM-NEXT: lock orl $0, (%esp)
691+ ; X86-ATOM-NEXT: movl (%eax), %eax
692+ ; X86-ATOM-NEXT: nop
693+ ; X86-ATOM-NEXT: nop
694+ ; X86-ATOM-NEXT: retl
683695 atomicrmw umax ptr %addr , i32 0 seq_cst
684696 ret void
685697}
@@ -691,26 +703,32 @@ define void @atomic_min_smax_char(ptr %addr) {
691703;
692704; X64-LABEL: atomic_min_smax_char:
693705; X64: # %bb.0:
706+ ; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
694707; X64-NEXT: movzbl (%rdi), %eax
695- ; X64-NEXT: .p2align 4
696- ; X64-NEXT: .LBB17_1: # %atomicrmw.start
697- ; X64-NEXT: # =>This Inner Loop Header: Depth=1
698- ; X64-NEXT: lock cmpxchgb %al, (%rdi)
699- ; X64-NEXT: jne .LBB17_1
700- ; X64-NEXT: # %bb.2: # %atomicrmw.end
701708; X64-NEXT: retq
702709;
703- ; X86-LABEL: atomic_min_smax_char:
704- ; X86: # %bb.0:
705- ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
706- ; X86-NEXT: movzbl (%ecx), %eax
707- ; X86-NEXT: .p2align 4
708- ; X86-NEXT: .LBB17_1: # %atomicrmw.start
709- ; X86-NEXT: # =>This Inner Loop Header: Depth=1
710- ; X86-NEXT: lock cmpxchgb %al, (%ecx)
711- ; X86-NEXT: jne .LBB17_1
712- ; X86-NEXT: # %bb.2: # %atomicrmw.end
713- ; X86-NEXT: retl
710+ ; X86-SSE2-LABEL: atomic_min_smax_char:
711+ ; X86-SSE2: # %bb.0:
712+ ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
713+ ; X86-SSE2-NEXT: mfence
714+ ; X86-SSE2-NEXT: movzbl (%eax), %eax
715+ ; X86-SSE2-NEXT: retl
716+ ;
717+ ; X86-SLM-LABEL: atomic_min_smax_char:
718+ ; X86-SLM: # %bb.0:
719+ ; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
720+ ; X86-SLM-NEXT: lock orl $0, (%esp)
721+ ; X86-SLM-NEXT: movzbl (%eax), %eax
722+ ; X86-SLM-NEXT: retl
723+ ;
724+ ; X86-ATOM-LABEL: atomic_min_smax_char:
725+ ; X86-ATOM: # %bb.0:
726+ ; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
727+ ; X86-ATOM-NEXT: lock orl $0, (%esp)
728+ ; X86-ATOM-NEXT: movzbl (%eax), %eax
729+ ; X86-ATOM-NEXT: nop
730+ ; X86-ATOM-NEXT: nop
731+ ; X86-ATOM-NEXT: retl
714732 atomicrmw min ptr %addr , i8 127 seq_cst
715733 ret void
716734}
@@ -722,26 +740,32 @@ define void @atomic_max_smin_char(ptr %addr) {
722740;
723741; X64-LABEL: atomic_max_smin_char:
724742; X64: # %bb.0:
743+ ; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
725744; X64-NEXT: movzbl (%rdi), %eax
726- ; X64-NEXT: .p2align 4
727- ; X64-NEXT: .LBB18_1: # %atomicrmw.start
728- ; X64-NEXT: # =>This Inner Loop Header: Depth=1
729- ; X64-NEXT: lock cmpxchgb %al, (%rdi)
730- ; X64-NEXT: jne .LBB18_1
731- ; X64-NEXT: # %bb.2: # %atomicrmw.end
732745; X64-NEXT: retq
733746;
734- ; X86-LABEL: atomic_max_smin_char:
735- ; X86: # %bb.0:
736- ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
737- ; X86-NEXT: movzbl (%ecx), %eax
738- ; X86-NEXT: .p2align 4
739- ; X86-NEXT: .LBB18_1: # %atomicrmw.start
740- ; X86-NEXT: # =>This Inner Loop Header: Depth=1
741- ; X86-NEXT: lock cmpxchgb %al, (%ecx)
742- ; X86-NEXT: jne .LBB18_1
743- ; X86-NEXT: # %bb.2: # %atomicrmw.end
744- ; X86-NEXT: retl
747+ ; X86-SSE2-LABEL: atomic_max_smin_char:
748+ ; X86-SSE2: # %bb.0:
749+ ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
750+ ; X86-SSE2-NEXT: mfence
751+ ; X86-SSE2-NEXT: movzbl (%eax), %eax
752+ ; X86-SSE2-NEXT: retl
753+ ;
754+ ; X86-SLM-LABEL: atomic_max_smin_char:
755+ ; X86-SLM: # %bb.0:
756+ ; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
757+ ; X86-SLM-NEXT: lock orl $0, (%esp)
758+ ; X86-SLM-NEXT: movzbl (%eax), %eax
759+ ; X86-SLM-NEXT: retl
760+ ;
761+ ; X86-ATOM-LABEL: atomic_max_smin_char:
762+ ; X86-ATOM: # %bb.0:
763+ ; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
764+ ; X86-ATOM-NEXT: lock orl $0, (%esp)
765+ ; X86-ATOM-NEXT: movzbl (%eax), %eax
766+ ; X86-ATOM-NEXT: nop
767+ ; X86-ATOM-NEXT: nop
768+ ; X86-ATOM-NEXT: retl
745769 atomicrmw max ptr %addr , i8 -128 seq_cst
746770 ret void
747771}
@@ -753,26 +777,32 @@ define void @atomic_min_umax_char(ptr %addr) {
753777;
754778; X64-LABEL: atomic_min_umax_char:
755779; X64: # %bb.0:
780+ ; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
756781; X64-NEXT: movzbl (%rdi), %eax
757- ; X64-NEXT: .p2align 4
758- ; X64-NEXT: .LBB19_1: # %atomicrmw.start
759- ; X64-NEXT: # =>This Inner Loop Header: Depth=1
760- ; X64-NEXT: lock cmpxchgb %al, (%rdi)
761- ; X64-NEXT: jne .LBB19_1
762- ; X64-NEXT: # %bb.2: # %atomicrmw.end
763782; X64-NEXT: retq
764783;
765- ; X86-LABEL: atomic_min_umax_char:
766- ; X86: # %bb.0:
767- ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
768- ; X86-NEXT: movzbl (%ecx), %eax
769- ; X86-NEXT: .p2align 4
770- ; X86-NEXT: .LBB19_1: # %atomicrmw.start
771- ; X86-NEXT: # =>This Inner Loop Header: Depth=1
772- ; X86-NEXT: lock cmpxchgb %al, (%ecx)
773- ; X86-NEXT: jne .LBB19_1
774- ; X86-NEXT: # %bb.2: # %atomicrmw.end
775- ; X86-NEXT: retl
784+ ; X86-SSE2-LABEL: atomic_min_umax_char:
785+ ; X86-SSE2: # %bb.0:
786+ ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
787+ ; X86-SSE2-NEXT: mfence
788+ ; X86-SSE2-NEXT: movzbl (%eax), %eax
789+ ; X86-SSE2-NEXT: retl
790+ ;
791+ ; X86-SLM-LABEL: atomic_min_umax_char:
792+ ; X86-SLM: # %bb.0:
793+ ; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
794+ ; X86-SLM-NEXT: lock orl $0, (%esp)
795+ ; X86-SLM-NEXT: movzbl (%eax), %eax
796+ ; X86-SLM-NEXT: retl
797+ ;
798+ ; X86-ATOM-LABEL: atomic_min_umax_char:
799+ ; X86-ATOM: # %bb.0:
800+ ; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
801+ ; X86-ATOM-NEXT: lock orl $0, (%esp)
802+ ; X86-ATOM-NEXT: movzbl (%eax), %eax
803+ ; X86-ATOM-NEXT: nop
804+ ; X86-ATOM-NEXT: nop
805+ ; X86-ATOM-NEXT: retl
776806 atomicrmw umin ptr %addr , i8 255 seq_cst
777807 ret void
778808}
@@ -784,30 +814,37 @@ define void @atomic_max_umin_char(ptr %addr) {
784814;
785815; X64-LABEL: atomic_max_umin_char:
786816; X64: # %bb.0:
817+ ; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
787818; X64-NEXT: movzbl (%rdi), %eax
788- ; X64-NEXT: .p2align 4
789- ; X64-NEXT: .LBB20_1: # %atomicrmw.start
790- ; X64-NEXT: # =>This Inner Loop Header: Depth=1
791- ; X64-NEXT: lock cmpxchgb %al, (%rdi)
792- ; X64-NEXT: jne .LBB20_1
793- ; X64-NEXT: # %bb.2: # %atomicrmw.end
794819; X64-NEXT: retq
795820;
796- ; X86-LABEL: atomic_max_umin_char:
797- ; X86: # %bb.0:
798- ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
799- ; X86-NEXT: movzbl (%ecx), %eax
800- ; X86-NEXT: .p2align 4
801- ; X86-NEXT: .LBB20_1: # %atomicrmw.start
802- ; X86-NEXT: # =>This Inner Loop Header: Depth=1
803- ; X86-NEXT: lock cmpxchgb %al, (%ecx)
804- ; X86-NEXT: jne .LBB20_1
805- ; X86-NEXT: # %bb.2: # %atomicrmw.end
806- ; X86-NEXT: retl
821+ ; X86-SSE2-LABEL: atomic_max_umin_char:
822+ ; X86-SSE2: # %bb.0:
823+ ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
824+ ; X86-SSE2-NEXT: mfence
825+ ; X86-SSE2-NEXT: movzbl (%eax), %eax
826+ ; X86-SSE2-NEXT: retl
827+ ;
828+ ; X86-SLM-LABEL: atomic_max_umin_char:
829+ ; X86-SLM: # %bb.0:
830+ ; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
831+ ; X86-SLM-NEXT: lock orl $0, (%esp)
832+ ; X86-SLM-NEXT: movzbl (%eax), %eax
833+ ; X86-SLM-NEXT: retl
834+ ;
835+ ; X86-ATOM-LABEL: atomic_max_umin_char:
836+ ; X86-ATOM: # %bb.0:
837+ ; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
838+ ; X86-ATOM-NEXT: lock orl $0, (%esp)
839+ ; X86-ATOM-NEXT: movzbl (%eax), %eax
840+ ; X86-ATOM-NEXT: nop
841+ ; X86-ATOM-NEXT: nop
842+ ; X86-ATOM-NEXT: retl
807843 atomicrmw umax ptr %addr , i8 0 seq_cst
808844 ret void
809845}
810846
847+ ; TODO: Add floating point support.
811848define void @atomic_fadd_zero (ptr %addr ) {
812849; CHECK-LABEL: @atomic_fadd_zero(
813850; CHECK-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float -0.000000e+00 monotonic, align 4
0 commit comments