@@ -402,9 +402,9 @@ define void @merge_loads_i16(i32 %count, ptr noalias nocapture %q, ptr noalias n
402402define void @no_merge_loads (i32 %count , ptr noalias nocapture %q , ptr noalias nocapture %p ) nounwind uwtable noinline ssp {
403403; X86-BWON-LABEL: no_merge_loads:
404404; X86-BWON: # %bb.0:
405- ; X86-BWON-NEXT: pushl %ebx
405+ ; X86-BWON-NEXT: pushl %esi
406406; X86-BWON-NEXT: .cfi_def_cfa_offset 8
407- ; X86-BWON-NEXT: .cfi_offset %ebx , -8
407+ ; X86-BWON-NEXT: .cfi_offset %esi , -8
408408; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %eax
409409; X86-BWON-NEXT: testl %eax, %eax
410410; X86-BWON-NEXT: jle .LBB5_3
@@ -414,23 +414,21 @@ define void @no_merge_loads(i32 %count, ptr noalias nocapture %q, ptr noalias no
414414; X86-BWON-NEXT: .p2align 4
415415; X86-BWON-NEXT: .LBB5_2: # %a4
416416; X86-BWON-NEXT: # =>This Inner Loop Header: Depth=1
417- ; X86-BWON-NEXT: movzbl (%edx), %ebx
418- ; X86-BWON-NEXT: movb %bl, (%ecx)
419- ; X86-BWON-NEXT: movzbl 1(%edx), %ebx
420- ; X86-BWON-NEXT: movb %bl, 1(%ecx)
417+ ; X86-BWON-NEXT: movzwl (%edx), %esi
418+ ; X86-BWON-NEXT: movw %si, (%ecx)
421419; X86-BWON-NEXT: addl $8, %ecx
422420; X86-BWON-NEXT: decl %eax
423421; X86-BWON-NEXT: jne .LBB5_2
424422; X86-BWON-NEXT: .LBB5_3: # %._crit_edge
425- ; X86-BWON-NEXT: popl %ebx
423+ ; X86-BWON-NEXT: popl %esi
426424; X86-BWON-NEXT: .cfi_def_cfa_offset 4
427425; X86-BWON-NEXT: retl
428426;
429427; X86-BWOFF-LABEL: no_merge_loads:
430428; X86-BWOFF: # %bb.0:
431- ; X86-BWOFF-NEXT: pushl %ebx
429+ ; X86-BWOFF-NEXT: pushl %esi
432430; X86-BWOFF-NEXT: .cfi_def_cfa_offset 8
433- ; X86-BWOFF-NEXT: .cfi_offset %ebx , -8
431+ ; X86-BWOFF-NEXT: .cfi_offset %esi , -8
434432; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %eax
435433; X86-BWOFF-NEXT: testl %eax, %eax
436434; X86-BWOFF-NEXT: jle .LBB5_3
@@ -440,15 +438,13 @@ define void @no_merge_loads(i32 %count, ptr noalias nocapture %q, ptr noalias no
440438; X86-BWOFF-NEXT: .p2align 4
441439; X86-BWOFF-NEXT: .LBB5_2: # %a4
442440; X86-BWOFF-NEXT: # =>This Inner Loop Header: Depth=1
443- ; X86-BWOFF-NEXT: movb (%edx), %bl
444- ; X86-BWOFF-NEXT: movb %bl, (%ecx)
445- ; X86-BWOFF-NEXT: movb 1(%edx), %bl
446- ; X86-BWOFF-NEXT: movb %bl, 1(%ecx)
441+ ; X86-BWOFF-NEXT: movw (%edx), %si
442+ ; X86-BWOFF-NEXT: movw %si, (%ecx)
447443; X86-BWOFF-NEXT: addl $8, %ecx
448444; X86-BWOFF-NEXT: decl %eax
449445; X86-BWOFF-NEXT: jne .LBB5_2
450446; X86-BWOFF-NEXT: .LBB5_3: # %._crit_edge
451- ; X86-BWOFF-NEXT: popl %ebx
447+ ; X86-BWOFF-NEXT: popl %esi
452448; X86-BWOFF-NEXT: .cfi_def_cfa_offset 4
453449; X86-BWOFF-NEXT: retl
454450;
@@ -459,10 +455,8 @@ define void @no_merge_loads(i32 %count, ptr noalias nocapture %q, ptr noalias no
459455; X64-BWON-NEXT: .p2align 4
460456; X64-BWON-NEXT: .LBB5_1: # %a4
461457; X64-BWON-NEXT: # =>This Inner Loop Header: Depth=1
462- ; X64-BWON-NEXT: movzbl (%rsi), %eax
463- ; X64-BWON-NEXT: movb %al, (%rdx)
464- ; X64-BWON-NEXT: movzbl 1(%rsi), %eax
465- ; X64-BWON-NEXT: movb %al, 1(%rdx)
458+ ; X64-BWON-NEXT: movzwl (%rsi), %eax
459+ ; X64-BWON-NEXT: movw %ax, (%rdx)
466460; X64-BWON-NEXT: addq $8, %rdx
467461; X64-BWON-NEXT: decl %edi
468462; X64-BWON-NEXT: jne .LBB5_1
@@ -476,10 +470,8 @@ define void @no_merge_loads(i32 %count, ptr noalias nocapture %q, ptr noalias no
476470; X64-BWOFF-NEXT: .p2align 4
477471; X64-BWOFF-NEXT: .LBB5_1: # %a4
478472; X64-BWOFF-NEXT: # =>This Inner Loop Header: Depth=1
479- ; X64-BWOFF-NEXT: movb (%rsi), %al
480- ; X64-BWOFF-NEXT: movb %al, (%rdx)
481- ; X64-BWOFF-NEXT: movb 1(%rsi), %al
482- ; X64-BWOFF-NEXT: movb %al, 1(%rdx)
473+ ; X64-BWOFF-NEXT: movw (%rsi), %ax
474+ ; X64-BWOFF-NEXT: movw %ax, (%rdx)
483475; X64-BWOFF-NEXT: addq $8, %rdx
484476; X64-BWOFF-NEXT: decl %edi
485477; X64-BWOFF-NEXT: jne .LBB5_1
@@ -858,26 +850,26 @@ define void @MergeLoadStoreBaseIndexOffsetComplicated(ptr %a, ptr %b, ptr %c, i6
858850; X86-BWON-NEXT: .cfi_offset %edi, -16
859851; X86-BWON-NEXT: .cfi_offset %ebx, -12
860852; X86-BWON-NEXT: .cfi_offset %ebp, -8
861- ; X86-BWON-NEXT: xorl %eax, %eax
862- ; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %esi
853+ ; X86-BWON-NEXT: xorl %esi, %esi
863854; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %edi
864855; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %ebx
865856; X86-BWON-NEXT: xorl %ebp, %ebp
866857; X86-BWON-NEXT: .p2align 4
867858; X86-BWON-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
868859; X86-BWON-NEXT: movsbl (%edi), %ecx
869- ; X86-BWON-NEXT: movzbl (%esi,%ecx), %edx
870- ; X86-BWON-NEXT: movzbl 1(%esi,%ecx), %ecx
871- ; X86-BWON-NEXT: movb %dl, (%ebx,%eax)
872- ; X86-BWON-NEXT: movl %eax, %edx
873- ; X86-BWON-NEXT: orl $1, %edx
874- ; X86-BWON-NEXT: movb %cl, (%ebx,%edx)
860+ ; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %eax
861+ ; X86-BWON-NEXT: movzbl (%eax,%ecx), %edx
862+ ; X86-BWON-NEXT: movzbl 1(%eax,%ecx), %ecx
863+ ; X86-BWON-NEXT: movl %esi, %eax
864+ ; X86-BWON-NEXT: orl $1, %eax
865+ ; X86-BWON-NEXT: movb %cl, (%ebx,%eax)
866+ ; X86-BWON-NEXT: movb %dl, (%ebx,%esi)
875867; X86-BWON-NEXT: incl %edi
876- ; X86-BWON-NEXT: addl $2, %eax
868+ ; X86-BWON-NEXT: addl $2, %esi
877869; X86-BWON-NEXT: adcl $0, %ebp
878- ; X86-BWON-NEXT: cmpl {{[0-9]+}}(%esp), %eax
879- ; X86-BWON-NEXT: movl %ebp, %ecx
880- ; X86-BWON-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
870+ ; X86-BWON-NEXT: cmpl {{[0-9]+}}(%esp), %esi
871+ ; X86-BWON-NEXT: movl %ebp, %eax
872+ ; X86-BWON-NEXT: sbbl {{[0-9]+}}(%esp), %eax
881873; X86-BWON-NEXT: jl .LBB10_1
882874; X86-BWON-NEXT: # %bb.2:
883875; X86-BWON-NEXT: popl %esi
@@ -904,26 +896,26 @@ define void @MergeLoadStoreBaseIndexOffsetComplicated(ptr %a, ptr %b, ptr %c, i6
904896; X86-BWOFF-NEXT: .cfi_offset %edi, -16
905897; X86-BWOFF-NEXT: .cfi_offset %ebx, -12
906898; X86-BWOFF-NEXT: .cfi_offset %ebp, -8
907- ; X86-BWOFF-NEXT: xorl %eax, %eax
908- ; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %esi
899+ ; X86-BWOFF-NEXT: xorl %esi, %esi
909900; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %edi
910901; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %ebx
911902; X86-BWOFF-NEXT: xorl %ebp, %ebp
912903; X86-BWOFF-NEXT: .p2align 4
913904; X86-BWOFF-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
914905; X86-BWOFF-NEXT: movsbl (%edi), %ecx
915- ; X86-BWOFF-NEXT: movb (%esi,%ecx), %dl
916- ; X86-BWOFF-NEXT: movb 1(%esi,%ecx), %cl
917- ; X86-BWOFF-NEXT: movb %dl, (%ebx,%eax)
918- ; X86-BWOFF-NEXT: movl %eax, %edx
919- ; X86-BWOFF-NEXT: orl $1, %edx
920- ; X86-BWOFF-NEXT: movb %cl, (%ebx,%edx)
906+ ; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %eax
907+ ; X86-BWOFF-NEXT: movb (%eax,%ecx), %dl
908+ ; X86-BWOFF-NEXT: movb 1(%eax,%ecx), %cl
909+ ; X86-BWOFF-NEXT: movl %esi, %eax
910+ ; X86-BWOFF-NEXT: orl $1, %eax
911+ ; X86-BWOFF-NEXT: movb %cl, (%ebx,%eax)
912+ ; X86-BWOFF-NEXT: movb %dl, (%ebx,%esi)
921913; X86-BWOFF-NEXT: incl %edi
922- ; X86-BWOFF-NEXT: addl $2, %eax
914+ ; X86-BWOFF-NEXT: addl $2, %esi
923915; X86-BWOFF-NEXT: adcl $0, %ebp
924- ; X86-BWOFF-NEXT: cmpl {{[0-9]+}}(%esp), %eax
925- ; X86-BWOFF-NEXT: movl %ebp, %ecx
926- ; X86-BWOFF-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
916+ ; X86-BWOFF-NEXT: cmpl {{[0-9]+}}(%esp), %esi
917+ ; X86-BWOFF-NEXT: movl %ebp, %eax
918+ ; X86-BWOFF-NEXT: sbbl {{[0-9]+}}(%esp), %eax
927919; X86-BWOFF-NEXT: jl .LBB10_1
928920; X86-BWOFF-NEXT: # %bb.2:
929921; X86-BWOFF-NEXT: popl %esi
0 commit comments