@@ -967,82 +967,63 @@ define <8 x i16> @complement_ne_i128_bitcast(ptr %word, i32 %position) nounwind
967967; X86-NEXT: pushl %esi
968968; X86-NEXT: andl $-16, %esp
969969; X86-NEXT: subl $80, %esp
970- ; X86-NEXT: movzbl 16(%ebp), %ecx
971- ; X86-NEXT: movl 12(%ebp), %edx
972- ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
973- ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
974- ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
975- ; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
976- ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
977- ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
978- ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
979- ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
980- ; X86-NEXT: movl %ecx, %eax
981- ; X86-NEXT: shrb $3, %al
982- ; X86-NEXT: andb $12, %al
983- ; X86-NEXT: negb %al
984- ; X86-NEXT: movsbl %al, %eax
985- ; X86-NEXT: movl 56(%esp,%eax), %esi
986- ; X86-NEXT: movl 60(%esp,%eax), %ebx
987- ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
988- ; X86-NEXT: shldl %cl, %esi, %ebx
989- ; X86-NEXT: movzwl 14(%edx), %edi
970+ ; X86-NEXT: movl 12(%ebp), %eax
971+ ; X86-NEXT: movzwl (%eax), %ecx
972+ ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
973+ ; X86-NEXT: movzwl 12(%eax), %ecx
974+ ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
975+ ; X86-NEXT: movzwl 14(%eax), %edi
990976; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
991977; X86-NEXT: shll $16, %edi
992- ; X86-NEXT: movzwl 12(%edx), %ecx
993- ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
994978; X86-NEXT: orl %ecx, %edi
995- ; X86-NEXT: xorl %ebx, %edi
996- ; X86-NEXT: movl 52(%esp,%eax), %edx
997- ; X86-NEXT: movzbl 16(%ebp), %ecx
998- ; X86-NEXT: shldl %cl, %edx, %esi
999- ; X86-NEXT: movl 12(%ebp), %eax
1000- ; X86-NEXT: movzwl 10(%eax), %ebx
1001- ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1002- ; X86-NEXT: shll $16, %ebx
1003- ; X86-NEXT: movzwl 8(%eax), %eax
1004- ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1005- ; X86-NEXT: orl %eax, %ebx
1006- ; X86-NEXT: xorl %esi, %ebx
1007- ; X86-NEXT: movl (%esp), %eax # 4-byte Reload
1008- ; X86-NEXT: movl 48(%esp,%eax), %esi
1009- ; X86-NEXT: shldl %cl, %esi, %edx
1010- ; X86-NEXT: movl 12(%ebp), %ecx
1011- ; X86-NEXT: movzwl 6(%ecx), %eax
1012- ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
1013- ; X86-NEXT: shll $16, %eax
1014- ; X86-NEXT: movzwl 4(%ecx), %ecx
979+ ; X86-NEXT: movzwl 2(%eax), %ecx
1015980; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1016- ; X86-NEXT: orl %ecx, %eax
1017- ; X86-NEXT: xorl %edx, %eax
1018- ; X86-NEXT: movzbl 16(%ebp), %ecx
1019- ; X86-NEXT: shll %cl, %esi
1020- ; X86-NEXT: movl 12(%ebp), %ecx
1021- ; X86-NEXT: movzwl 2(%ecx), %edx
981+ ; X86-NEXT: movzwl 4(%eax), %edx
1022982; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1023- ; X86-NEXT: shll $16 , %edx
1024- ; X86-NEXT: movzwl (%ecx ), %ecx
983+ ; X86-NEXT: movzwl 6(%eax) , %esi
984+ ; X86-NEXT: movzwl 8(%eax ), %ecx
1025985; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1026- ; X86-NEXT: orl %ecx, %edx
1027- ; X86-NEXT: xorl %esi, %edx
986+ ; X86-NEXT: movzwl 10(%eax), %eax
987+ ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
988+ ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
989+ ; X86-NEXT: shll $16, %eax
990+ ; X86-NEXT: orl %ecx, %eax
991+ ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
992+ ; X86-NEXT: movl %esi, %eax
993+ ; X86-NEXT: shll $16, %eax
994+ ; X86-NEXT: orl %edx, %eax
995+ ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
996+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
997+ ; X86-NEXT: shll $16, %eax
998+ ; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
999+ ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1000+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1001+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1002+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1003+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1004+ ; X86-NEXT: movl 16(%ebp), %eax
1005+ ; X86-NEXT: movl %eax, %ebx
1006+ ; X86-NEXT: andb $96, %bl
1007+ ; X86-NEXT: shrb $3, %bl
1008+ ; X86-NEXT: movzbl %bl, %edi
1009+ ; X86-NEXT: movl 32(%esp,%edi), %edi
1010+ ; X86-NEXT: btcl %eax, %edi
1011+ ; X86-NEXT: andl $96, %eax
1012+ ; X86-NEXT: shrl $3, %eax
10281013; X86-NEXT: movl 12(%ebp), %ecx
1029- ; X86-NEXT: movl %edi, 12(%ecx)
1030- ; X86-NEXT: movl %ebx, 8(%ecx)
1031- ; X86-NEXT: movl %eax, 4(%ecx)
1032- ; X86-NEXT: movl %edx, (%ecx)
1014+ ; X86-NEXT: movl %edi, (%ecx,%eax)
10331015; X86-NEXT: movl 8(%ebp), %eax
10341016; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
10351017; X86-NEXT: movw %dx, 14(%eax)
10361018; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
10371019; X86-NEXT: movw %dx, 12(%eax)
1038- ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1039- ; X86-NEXT: movw %dx, 10(%eax)
1040- ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1041- ; X86-NEXT: movw %dx, 8(%eax)
1042- ; X86-NEXT: movl (%esp), %edx # 4-byte Reload
1043- ; X86-NEXT: movw %dx, 6(%eax)
1044- ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1045- ; X86-NEXT: movw %dx, 4(%eax)
1020+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1021+ ; X86-NEXT: movw %cx, 10(%eax)
1022+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1023+ ; X86-NEXT: movw %cx, 8(%eax)
1024+ ; X86-NEXT: movw %si, 6(%eax)
1025+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1026+ ; X86-NEXT: movw %cx, 4(%eax)
10461027; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
10471028; X86-NEXT: movw %cx, 2(%eax)
10481029; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -1056,81 +1037,57 @@ define <8 x i16> @complement_ne_i128_bitcast(ptr %word, i32 %position) nounwind
10561037;
10571038; SSE2-LABEL: complement_ne_i128_bitcast:
10581039; SSE2: # %bb.0:
1059- ; SSE2-NEXT: movl %esi, %ecx
1060- ; SSE2-NEXT: movl $1, %eax
1061- ; SSE2-NEXT: xorl %edx, %edx
1062- ; SSE2-NEXT: shldq %cl, %rax, %rdx
1063- ; SSE2-NEXT: xorl %esi, %esi
1064- ; SSE2-NEXT: shlq %cl, %rax
1065- ; SSE2-NEXT: testb $64, %cl
1066- ; SSE2-NEXT: cmovneq %rax, %rdx
1067- ; SSE2-NEXT: cmovneq %rsi, %rax
1040+ ; SSE2-NEXT: # kill: def $esi killed $esi def $rsi
10681041; SSE2-NEXT: movdqa (%rdi), %xmm0
1069- ; SSE2-NEXT: xorq %rdx, 8(%rdi)
1070- ; SSE2-NEXT: movq %xmm0, %rcx
1071- ; SSE2-NEXT: xorq %rax, %rcx
1072- ; SSE2-NEXT: movq %rcx, (%rdi)
1042+ ; SSE2-NEXT: movq 8(%rdi), %rax
1043+ ; SSE2-NEXT: movq %xmm0, %rdx
1044+ ; SSE2-NEXT: movl %esi, %ecx
1045+ ; SSE2-NEXT: andb $32, %cl
1046+ ; SSE2-NEXT: shrdq %cl, %rax, %rdx
1047+ ; SSE2-NEXT: shrq %cl, %rax
1048+ ; SSE2-NEXT: testb $64, %sil
1049+ ; SSE2-NEXT: cmoveq %rdx, %rax
1050+ ; SSE2-NEXT: btcl %esi, %eax
1051+ ; SSE2-NEXT: andl $96, %esi
1052+ ; SSE2-NEXT: shrl $3, %esi
1053+ ; SSE2-NEXT: movl %eax, (%rdi,%rsi)
10731054; SSE2-NEXT: retq
10741055;
10751056; SSE4-LABEL: complement_ne_i128_bitcast:
10761057; SSE4: # %bb.0:
1077- ; SSE4-NEXT: movl %esi, %ecx
1078- ; SSE4-NEXT: movl $1, %eax
1079- ; SSE4-NEXT: xorl %edx, %edx
1080- ; SSE4-NEXT: shldq %cl, %rax, %rdx
1081- ; SSE4-NEXT: shlq %cl, %rax
1082- ; SSE4-NEXT: xorl %esi, %esi
1083- ; SSE4-NEXT: testb $64, %cl
1084- ; SSE4-NEXT: cmovneq %rax, %rdx
1085- ; SSE4-NEXT: cmovneq %rsi, %rax
1058+ ; SSE4-NEXT: # kill: def $esi killed $esi def $rsi
10861059; SSE4-NEXT: movdqa (%rdi), %xmm0
1087- ; SSE4-NEXT: movq %xmm0, %rcx
1088- ; SSE4-NEXT: xorq %rax, %rcx
10891060; SSE4-NEXT: pextrq $1, %xmm0, %rax
1090- ; SSE4-NEXT: xorq %rdx, %rax
1091- ; SSE4-NEXT: movq %rax, 8(%rdi)
1092- ; SSE4-NEXT: movq %rcx, (%rdi)
1061+ ; SSE4-NEXT: movq %xmm0, %rdx
1062+ ; SSE4-NEXT: movl %esi, %ecx
1063+ ; SSE4-NEXT: andb $32, %cl
1064+ ; SSE4-NEXT: shrdq %cl, %rax, %rdx
1065+ ; SSE4-NEXT: shrq %cl, %rax
1066+ ; SSE4-NEXT: testb $64, %sil
1067+ ; SSE4-NEXT: cmoveq %rdx, %rax
1068+ ; SSE4-NEXT: btcl %esi, %eax
1069+ ; SSE4-NEXT: andl $96, %esi
1070+ ; SSE4-NEXT: shrl $3, %esi
1071+ ; SSE4-NEXT: movl %eax, (%rdi,%rsi)
10931072; SSE4-NEXT: retq
10941073;
1095- ; AVX2-LABEL: complement_ne_i128_bitcast:
1096- ; AVX2: # %bb.0:
1097- ; AVX2-NEXT: movl %esi, %ecx
1098- ; AVX2-NEXT: movl $1, %eax
1099- ; AVX2-NEXT: xorl %edx, %edx
1100- ; AVX2-NEXT: shldq %cl, %rax, %rdx
1101- ; AVX2-NEXT: xorl %esi, %esi
1102- ; AVX2-NEXT: shlxq %rcx, %rax, %rax
1103- ; AVX2-NEXT: testb $64, %cl
1104- ; AVX2-NEXT: cmovneq %rax, %rdx
1105- ; AVX2-NEXT: cmovneq %rsi, %rax
1106- ; AVX2-NEXT: vmovdqa (%rdi), %xmm0
1107- ; AVX2-NEXT: vmovq %xmm0, %rcx
1108- ; AVX2-NEXT: vpextrq $1, %xmm0, %rsi
1109- ; AVX2-NEXT: xorq %rax, %rcx
1110- ; AVX2-NEXT: xorq %rdx, %rsi
1111- ; AVX2-NEXT: movq %rsi, 8(%rdi)
1112- ; AVX2-NEXT: movq %rcx, (%rdi)
1113- ; AVX2-NEXT: retq
1114- ;
1115- ; AVX512-LABEL: complement_ne_i128_bitcast:
1116- ; AVX512: # %bb.0:
1117- ; AVX512-NEXT: movl %esi, %ecx
1118- ; AVX512-NEXT: xorl %eax, %eax
1119- ; AVX512-NEXT: movl $1, %edx
1120- ; AVX512-NEXT: xorl %esi, %esi
1121- ; AVX512-NEXT: shldq %cl, %rdx, %rsi
1122- ; AVX512-NEXT: shlxq %rcx, %rdx, %rdx
1123- ; AVX512-NEXT: testb $64, %cl
1124- ; AVX512-NEXT: cmovneq %rdx, %rsi
1125- ; AVX512-NEXT: cmovneq %rax, %rdx
1126- ; AVX512-NEXT: vmovdqa (%rdi), %xmm0
1127- ; AVX512-NEXT: vmovq %xmm0, %rax
1128- ; AVX512-NEXT: xorq %rdx, %rax
1129- ; AVX512-NEXT: vpextrq $1, %xmm0, %rcx
1130- ; AVX512-NEXT: xorq %rsi, %rcx
1131- ; AVX512-NEXT: movq %rcx, 8(%rdi)
1132- ; AVX512-NEXT: movq %rax, (%rdi)
1133- ; AVX512-NEXT: retq
1074+ ; AVX-LABEL: complement_ne_i128_bitcast:
1075+ ; AVX: # %bb.0:
1076+ ; AVX-NEXT: # kill: def $esi killed $esi def $rsi
1077+ ; AVX-NEXT: vmovdqa (%rdi), %xmm0
1078+ ; AVX-NEXT: vpextrq $1, %xmm0, %rax
1079+ ; AVX-NEXT: vmovq %xmm0, %rdx
1080+ ; AVX-NEXT: movl %esi, %ecx
1081+ ; AVX-NEXT: andb $32, %cl
1082+ ; AVX-NEXT: shrdq %cl, %rax, %rdx
1083+ ; AVX-NEXT: shrxq %rcx, %rax, %rax
1084+ ; AVX-NEXT: testb $64, %sil
1085+ ; AVX-NEXT: cmoveq %rdx, %rax
1086+ ; AVX-NEXT: btcl %esi, %eax
1087+ ; AVX-NEXT: andl $96, %esi
1088+ ; AVX-NEXT: shrl $3, %esi
1089+ ; AVX-NEXT: movl %eax, (%rdi,%rsi)
1090+ ; AVX-NEXT: retq
11341091 %rem = and i32 %position , 127
11351092 %ofs = zext nneg i32 %rem to i128
11361093 %bit = shl nuw i128 1 , %ofs
0 commit comments