@@ -605,8 +605,9 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
605605;
606606; X86-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
607607; X86-NO-BMI2-NO-SHLD: # %bb.0:
608+ ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %ebx
608609; X86-NO-BMI2-NO-SHLD-NEXT: pushl %esi
609- ; X86-NO-BMI2-NO-SHLD-NEXT: subl $40 , %esp
610+ ; X86-NO-BMI2-NO-SHLD-NEXT: subl $36 , %esp
610611; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
611612; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
612613; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -618,18 +619,19 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
618619; X86-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
619620; X86-NO-BMI2-NO-SHLD-NEXT: shrb $3, %dl
620621; X86-NO-BMI2-NO-SHLD-NEXT: andb $12, %dl
621- ; X86-NO-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
622- ; X86-NO-BMI2-NO-SHLD-NEXT: movl (%esp,%edx ), %esi
623- ; X86-NO-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx ), %edx
624- ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
622+ ; X86-NO-BMI2-NO-SHLD-NEXT: movzbl %dl, %esi
623+ ; X86-NO-BMI2-NO-SHLD-NEXT: movl (%esp,%esi ), %edx
624+ ; X86-NO-BMI2-NO-SHLD-NEXT: movl 4(%esp,%esi ), %ebx
625+ ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
625626; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
626- ; X86-NO-BMI2-NO-SHLD-NEXT: addl %edx , %edx
627+ ; X86-NO-BMI2-NO-SHLD-NEXT: addl %ebx , %ebx
627628; X86-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx
628- ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
629- ; X86-NO-BMI2-NO-SHLD-NEXT: orl %esi , %edx
630- ; X86-NO-BMI2-NO-SHLD-NEXT: movb %dl , (%eax)
631- ; X86-NO-BMI2-NO-SHLD-NEXT: addl $40 , %esp
629+ ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx
630+ ; X86-NO-BMI2-NO-SHLD-NEXT: orb %dl , %bl
631+ ; X86-NO-BMI2-NO-SHLD-NEXT: movb %bl , (%eax)
632+ ; X86-NO-BMI2-NO-SHLD-NEXT: addl $36 , %esp
632633; X86-NO-BMI2-NO-SHLD-NEXT: popl %esi
634+ ; X86-NO-BMI2-NO-SHLD-NEXT: popl %ebx
633635; X86-NO-BMI2-NO-SHLD-NEXT: retl
634636;
635637; X86-SHLD-LABEL: load_1byte_chunk_of_16byte_alloca:
@@ -673,12 +675,12 @@ define void @load_1byte_chunk_of_16byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
673675; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $3, %dl
674676; X86-HAVE-BMI2-NO-SHLD-NEXT: andb $12, %dl
675677; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
676- ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx), %esi
678+ ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx), %esi
679+ ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx), %edx
677680; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
678- ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx), %edx
679- ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx
680- ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ecx
681- ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx
681+ ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %esi, %esi
682+ ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %ecx
683+ ; X86-HAVE-BMI2-NO-SHLD-NEXT: orb %dl, %cl
682684; X86-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%eax)
683685; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $40, %esp
684686; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
@@ -1224,19 +1226,26 @@ define void @load_1byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
12241226; X64-NO-BMI2: # %bb.0:
12251227; X64-NO-BMI2-NEXT: movups (%rdi), %xmm0
12261228; X64-NO-BMI2-NEXT: movups 16(%rdi), %xmm1
1227- ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx
1229+ ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %eax
12281230; X64-NO-BMI2-NEXT: xorps %xmm2, %xmm2
12291231; X64-NO-BMI2-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
12301232; X64-NO-BMI2-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
12311233; X64-NO-BMI2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
12321234; X64-NO-BMI2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1233- ; X64-NO-BMI2-NEXT: movl %ecx, %eax
1234- ; X64-NO-BMI2-NEXT: shrb $6, %al
1235- ; X64-NO-BMI2-NEXT: movzbl %al, %eax
1236- ; X64-NO-BMI2-NEXT: movq -72(%rsp,%rax,8), %rax
1237- ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx
1238- ; X64-NO-BMI2-NEXT: shrq %cl, %rax
1239- ; X64-NO-BMI2-NEXT: movb %al, (%rdx)
1235+ ; X64-NO-BMI2-NEXT: movl %eax, %ecx
1236+ ; X64-NO-BMI2-NEXT: shrb $6, %cl
1237+ ; X64-NO-BMI2-NEXT: movzbl %cl, %esi
1238+ ; X64-NO-BMI2-NEXT: movl -64(%rsp,%rsi,8), %edi
1239+ ; X64-NO-BMI2-NEXT: addl %edi, %edi
1240+ ; X64-NO-BMI2-NEXT: movl %eax, %ecx
1241+ ; X64-NO-BMI2-NEXT: andb $56, %cl
1242+ ; X64-NO-BMI2-NEXT: notb %cl
1243+ ; X64-NO-BMI2-NEXT: shlq %cl, %rdi
1244+ ; X64-NO-BMI2-NEXT: movq -72(%rsp,%rsi,8), %rsi
1245+ ; X64-NO-BMI2-NEXT: movl %eax, %ecx
1246+ ; X64-NO-BMI2-NEXT: shrq %cl, %rsi
1247+ ; X64-NO-BMI2-NEXT: orb %sil, %dil
1248+ ; X64-NO-BMI2-NEXT: movb %dil, (%rdx)
12401249; X64-NO-BMI2-NEXT: retq
12411250;
12421251; X64-BMI2-LABEL: load_1byte_chunk_of_32byte_alloca:
@@ -1252,14 +1261,22 @@ define void @load_1byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
12521261; X64-BMI2-NEXT: movl %esi, %eax
12531262; X64-BMI2-NEXT: shrb $6, %al
12541263; X64-BMI2-NEXT: movzbl %al, %eax
1255- ; X64-BMI2-NEXT: shrxq %rsi, -72(%rsp,%rax,8), %rax
1256- ; X64-BMI2-NEXT: movb %al, (%rdx)
1264+ ; X64-BMI2-NEXT: shrxq %rsi, -72(%rsp,%rax,8), %rcx
1265+ ; X64-BMI2-NEXT: # kill: def $sil killed $sil killed $rsi def $rsi
1266+ ; X64-BMI2-NEXT: andb $56, %sil
1267+ ; X64-BMI2-NEXT: notb %sil
1268+ ; X64-BMI2-NEXT: movl -64(%rsp,%rax,8), %eax
1269+ ; X64-BMI2-NEXT: addl %eax, %eax
1270+ ; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
1271+ ; X64-BMI2-NEXT: orb %al, %cl
1272+ ; X64-BMI2-NEXT: movb %cl, (%rdx)
12571273; X64-BMI2-NEXT: retq
12581274;
12591275; X86-NO-BMI2-NO-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca:
12601276; X86-NO-BMI2-NO-SHLD: # %bb.0:
1277+ ; X86-NO-BMI2-NO-SHLD-NEXT: pushl %ebx
12611278; X86-NO-BMI2-NO-SHLD-NEXT: pushl %esi
1262- ; X86-NO-BMI2-NO-SHLD-NEXT: subl $72 , %esp
1279+ ; X86-NO-BMI2-NO-SHLD-NEXT: subl $68 , %esp
12631280; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %ecx
12641281; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %eax
12651282; X86-NO-BMI2-NO-SHLD-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -1273,18 +1290,19 @@ define void @load_1byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
12731290; X86-NO-BMI2-NO-SHLD-NEXT: movaps %xmm0, (%esp)
12741291; X86-NO-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
12751292; X86-NO-BMI2-NO-SHLD-NEXT: shrb $5, %dl
1276- ; X86-NO-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
1277- ; X86-NO-BMI2-NO-SHLD-NEXT: movl (%esp,%edx ,4), %esi
1278- ; X86-NO-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx ,4), %edx
1279- ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %esi
1293+ ; X86-NO-BMI2-NO-SHLD-NEXT: movzbl %dl, %esi
1294+ ; X86-NO-BMI2-NO-SHLD-NEXT: movl (%esp,%esi ,4), %edx
1295+ ; X86-NO-BMI2-NO-SHLD-NEXT: movl 4(%esp,%esi ,4), %ebx
1296+ ; X86-NO-BMI2-NO-SHLD-NEXT: shrl %cl, %edx
12801297; X86-NO-BMI2-NO-SHLD-NEXT: notb %cl
1281- ; X86-NO-BMI2-NO-SHLD-NEXT: addl %edx , %edx
1298+ ; X86-NO-BMI2-NO-SHLD-NEXT: addl %ebx , %ebx
12821299; X86-NO-BMI2-NO-SHLD-NEXT: # kill: def $cl killed $cl killed $ecx
1283- ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %edx
1284- ; X86-NO-BMI2-NO-SHLD-NEXT: orl %esi , %edx
1285- ; X86-NO-BMI2-NO-SHLD-NEXT: movb %dl , (%eax)
1286- ; X86-NO-BMI2-NO-SHLD-NEXT: addl $72 , %esp
1300+ ; X86-NO-BMI2-NO-SHLD-NEXT: shll %cl, %ebx
1301+ ; X86-NO-BMI2-NO-SHLD-NEXT: orb %dl , %bl
1302+ ; X86-NO-BMI2-NO-SHLD-NEXT: movb %bl , (%eax)
1303+ ; X86-NO-BMI2-NO-SHLD-NEXT: addl $68 , %esp
12871304; X86-NO-BMI2-NO-SHLD-NEXT: popl %esi
1305+ ; X86-NO-BMI2-NO-SHLD-NEXT: popl %ebx
12881306; X86-NO-BMI2-NO-SHLD-NEXT: retl
12891307;
12901308; X86-SHLD-LABEL: load_1byte_chunk_of_32byte_alloca:
@@ -1332,12 +1350,12 @@ define void @load_1byte_chunk_of_32byte_alloca(ptr %src, i64 %byteOff, ptr %dst)
13321350; X86-HAVE-BMI2-NO-SHLD-NEXT: movl %ecx, %edx
13331351; X86-HAVE-BMI2-NO-SHLD-NEXT: shrb $5, %dl
13341352; X86-HAVE-BMI2-NO-SHLD-NEXT: movzbl %dl, %edx
1335- ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx,4), %esi
1353+ ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx,4), %esi
1354+ ; X86-HAVE-BMI2-NO-SHLD-NEXT: shrxl %ecx, (%esp,%edx,4), %edx
13361355; X86-HAVE-BMI2-NO-SHLD-NEXT: notb %cl
1337- ; X86-HAVE-BMI2-NO-SHLD-NEXT: movl 4(%esp,%edx,4), %edx
1338- ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %edx, %edx
1339- ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %edx, %ecx
1340- ; X86-HAVE-BMI2-NO-SHLD-NEXT: orl %esi, %ecx
1356+ ; X86-HAVE-BMI2-NO-SHLD-NEXT: addl %esi, %esi
1357+ ; X86-HAVE-BMI2-NO-SHLD-NEXT: shlxl %ecx, %esi, %ecx
1358+ ; X86-HAVE-BMI2-NO-SHLD-NEXT: orb %dl, %cl
13411359; X86-HAVE-BMI2-NO-SHLD-NEXT: movb %cl, (%eax)
13421360; X86-HAVE-BMI2-NO-SHLD-NEXT: addl $72, %esp
13431361; X86-HAVE-BMI2-NO-SHLD-NEXT: popl %esi
0 commit comments