@@ -543,10 +543,10 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
543543; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi
544544; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
545545; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi
546+ ; X64-NO-BMI2-NO-SHLD-NEXT: leal (%rax,%rax), %r8d
546547; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
547- ; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8
548548; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8
549- ; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdi , %r8
549+ ; X64-NO-BMI2-NO-SHLD-NEXT: orl %edi , %r8d
550550; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
551551; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax
552552; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil
@@ -573,19 +573,19 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
573573; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
574574; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
575575; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
576- ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
577- ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
576+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq % xmm0, %rax
577+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
578578; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
579- ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
580579; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi
581580; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil
582- ; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax ), %r8
581+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: leal (%rcx,%rcx ), %r8d
583582; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi
584- ; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rdi
585583; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax
584+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax
585+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
586586; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil
587- ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi , %rax
588- ; X64-HAVE-BMI2-NO-SHLD-NEXT: movb %al , (%rdx)
587+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax , %rcx
588+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: movb %cl , (%rdx)
589589; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
590590;
591591; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
@@ -651,10 +651,10 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
651651; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi
652652; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
653653; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi
654+ ; X64-NO-BMI2-NO-SHLD-NEXT: leal (%rax,%rax), %r8d
654655; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
655- ; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8
656656; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8
657- ; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdi , %r8
657+ ; X64-NO-BMI2-NO-SHLD-NEXT: orl %edi , %r8d
658658; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
659659; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax
660660; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil
@@ -681,19 +681,19 @@ define void @load_2byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
681681; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
682682; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
683683; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
684- ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
685- ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
684+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq % xmm0, %rax
685+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
686686; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
687- ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
688687; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi
689688; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil
690- ; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax ), %r8
689+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: leal (%rcx,%rcx ), %r8d
691690; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi
692- ; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rdi
693691; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax
692+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax
693+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
694694; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil
695- ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi , %rax
696- ; X64-HAVE-BMI2-NO-SHLD-NEXT: movw %ax , (%rdx)
695+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax , %rcx
696+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: movw %cx , (%rdx)
697697; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
698698;
699699; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_2byte_chunk_of_16byte_alloca:
@@ -758,10 +758,10 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
758758; X64-NO-BMI2-NO-SHLD-NEXT: movq %xmm0, %rdi
759759; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
760760; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rdi
761+ ; X64-NO-BMI2-NO-SHLD-NEXT: leal (%rax,%rax), %r8d
761762; X64-NO-BMI2-NO-SHLD-NEXT: notb %cl
762- ; X64-NO-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax), %r8
763763; X64-NO-BMI2-NO-SHLD-NEXT: shlq %cl, %r8
764- ; X64-NO-BMI2-NO-SHLD-NEXT: orq %rdi , %r8
764+ ; X64-NO-BMI2-NO-SHLD-NEXT: orl %edi , %r8d
765765; X64-NO-BMI2-NO-SHLD-NEXT: movl %esi, %ecx
766766; X64-NO-BMI2-NO-SHLD-NEXT: shrq %cl, %rax
767767; X64-NO-BMI2-NO-SHLD-NEXT: testb $64, %sil
@@ -788,19 +788,19 @@ define void @load_4byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
788788; X64-HAVE-BMI2-NO-SHLD: # %bb.0:
789789; X64-HAVE-BMI2-NO-SHLD-NEXT: movdqu (%rdi), %xmm0
790790; X64-HAVE-BMI2-NO-SHLD-NEXT: shll $3, %esi
791- ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
792- ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm1, %rax
791+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: movq % xmm0, %rax
792+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
793793; X64-HAVE-BMI2-NO-SHLD-NEXT: movq %xmm0, %rcx
794- ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
795794; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %esi, %edi
796795; X64-HAVE-BMI2-NO-SHLD-NEXT: notb %dil
797- ; X64-HAVE-BMI2-NO-SHLD-NEXT: leaq (%rax,%rax ), %r8
796+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: leal (%rcx,%rcx ), %r8d
798797; X64-HAVE-BMI2-NO-SHLD-NEXT: shlxq %rdi, %r8, %rdi
799- ; X64-HAVE-BMI2-NO-SHLD-NEXT: orq %rcx, %rdi
800798; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rax, %rax
799+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: orl %edi, %eax
800+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: shrxq %rsi, %rcx, %rcx
801801; X64-HAVE-BMI2-NO-SHLD-NEXT: testb $64, %sil
802- ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rdi , %rax
803- ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %eax , (%rdx)
802+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: cmoveq %rax , %rcx
803+ ; X64-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx , (%rdx)
804804; X64-HAVE-BMI2-NO-SHLD-NEXT: retq
805805;
806806; X64-HAVE-BMI2-HAVE-SHLD-LABEL: load_4byte_chunk_of_16byte_alloca:
0 commit comments