11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86
3- ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE
4- ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE
3+ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE,SSE2
4+ ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE,SSE4
55; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2
66; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512
77
@@ -956,6 +956,192 @@ define i1 @complement_cmpz_i128(ptr %word, i32 %position) nounwind {
956956 ret i1 %cmp
957957}
958958
959+ ; Load hidden behind bitcast
960+ define <8 x i16 > @complement_ne_i128_bitcast (ptr %word , i32 %position ) nounwind {
961+ ; X86-LABEL: complement_ne_i128_bitcast:
962+ ; X86: # %bb.0:
963+ ; X86-NEXT: pushl %ebp
964+ ; X86-NEXT: movl %esp, %ebp
965+ ; X86-NEXT: pushl %ebx
966+ ; X86-NEXT: pushl %edi
967+ ; X86-NEXT: pushl %esi
968+ ; X86-NEXT: andl $-16, %esp
969+ ; X86-NEXT: subl $80, %esp
970+ ; X86-NEXT: movzbl 16(%ebp), %ecx
971+ ; X86-NEXT: movl 12(%ebp), %edx
972+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
973+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
974+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
975+ ; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
976+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
977+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
978+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
979+ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
980+ ; X86-NEXT: movl %ecx, %eax
981+ ; X86-NEXT: shrb $3, %al
982+ ; X86-NEXT: andb $12, %al
983+ ; X86-NEXT: negb %al
984+ ; X86-NEXT: movsbl %al, %eax
985+ ; X86-NEXT: movl 56(%esp,%eax), %esi
986+ ; X86-NEXT: movl 60(%esp,%eax), %ebx
987+ ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
988+ ; X86-NEXT: shldl %cl, %esi, %ebx
989+ ; X86-NEXT: movzwl 14(%edx), %edi
990+ ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
991+ ; X86-NEXT: shll $16, %edi
992+ ; X86-NEXT: movzwl 12(%edx), %ecx
993+ ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
994+ ; X86-NEXT: orl %ecx, %edi
995+ ; X86-NEXT: xorl %ebx, %edi
996+ ; X86-NEXT: movl 52(%esp,%eax), %edx
997+ ; X86-NEXT: movzbl 16(%ebp), %ecx
998+ ; X86-NEXT: shldl %cl, %edx, %esi
999+ ; X86-NEXT: movl 12(%ebp), %eax
1000+ ; X86-NEXT: movzwl 10(%eax), %ebx
1001+ ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1002+ ; X86-NEXT: shll $16, %ebx
1003+ ; X86-NEXT: movzwl 8(%eax), %eax
1004+ ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1005+ ; X86-NEXT: orl %eax, %ebx
1006+ ; X86-NEXT: xorl %esi, %ebx
1007+ ; X86-NEXT: movl (%esp), %eax # 4-byte Reload
1008+ ; X86-NEXT: movl 48(%esp,%eax), %esi
1009+ ; X86-NEXT: shldl %cl, %esi, %edx
1010+ ; X86-NEXT: movl 12(%ebp), %ecx
1011+ ; X86-NEXT: movzwl 6(%ecx), %eax
1012+ ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
1013+ ; X86-NEXT: shll $16, %eax
1014+ ; X86-NEXT: movzwl 4(%ecx), %ecx
1015+ ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1016+ ; X86-NEXT: orl %ecx, %eax
1017+ ; X86-NEXT: xorl %edx, %eax
1018+ ; X86-NEXT: movzbl 16(%ebp), %ecx
1019+ ; X86-NEXT: shll %cl, %esi
1020+ ; X86-NEXT: movl 12(%ebp), %ecx
1021+ ; X86-NEXT: movzwl 2(%ecx), %edx
1022+ ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1023+ ; X86-NEXT: shll $16, %edx
1024+ ; X86-NEXT: movzwl (%ecx), %ecx
1025+ ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1026+ ; X86-NEXT: orl %ecx, %edx
1027+ ; X86-NEXT: xorl %esi, %edx
1028+ ; X86-NEXT: movl 12(%ebp), %ecx
1029+ ; X86-NEXT: movl %edi, 12(%ecx)
1030+ ; X86-NEXT: movl %ebx, 8(%ecx)
1031+ ; X86-NEXT: movl %eax, 4(%ecx)
1032+ ; X86-NEXT: movl %edx, (%ecx)
1033+ ; X86-NEXT: movl 8(%ebp), %eax
1034+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1035+ ; X86-NEXT: movw %dx, 14(%eax)
1036+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1037+ ; X86-NEXT: movw %dx, 12(%eax)
1038+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1039+ ; X86-NEXT: movw %dx, 10(%eax)
1040+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1041+ ; X86-NEXT: movw %dx, 8(%eax)
1042+ ; X86-NEXT: movl (%esp), %edx # 4-byte Reload
1043+ ; X86-NEXT: movw %dx, 6(%eax)
1044+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1045+ ; X86-NEXT: movw %dx, 4(%eax)
1046+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1047+ ; X86-NEXT: movw %cx, 2(%eax)
1048+ ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1049+ ; X86-NEXT: movw %cx, (%eax)
1050+ ; X86-NEXT: leal -12(%ebp), %esp
1051+ ; X86-NEXT: popl %esi
1052+ ; X86-NEXT: popl %edi
1053+ ; X86-NEXT: popl %ebx
1054+ ; X86-NEXT: popl %ebp
1055+ ; X86-NEXT: retl $4
1056+ ;
1057+ ; SSE2-LABEL: complement_ne_i128_bitcast:
1058+ ; SSE2: # %bb.0:
1059+ ; SSE2-NEXT: movl %esi, %ecx
1060+ ; SSE2-NEXT: movl $1, %eax
1061+ ; SSE2-NEXT: xorl %edx, %edx
1062+ ; SSE2-NEXT: shldq %cl, %rax, %rdx
1063+ ; SSE2-NEXT: xorl %esi, %esi
1064+ ; SSE2-NEXT: shlq %cl, %rax
1065+ ; SSE2-NEXT: testb $64, %cl
1066+ ; SSE2-NEXT: cmovneq %rax, %rdx
1067+ ; SSE2-NEXT: cmovneq %rsi, %rax
1068+ ; SSE2-NEXT: movdqa (%rdi), %xmm0
1069+ ; SSE2-NEXT: xorq %rdx, 8(%rdi)
1070+ ; SSE2-NEXT: movq %xmm0, %rcx
1071+ ; SSE2-NEXT: xorq %rax, %rcx
1072+ ; SSE2-NEXT: movq %rcx, (%rdi)
1073+ ; SSE2-NEXT: retq
1074+ ;
1075+ ; SSE4-LABEL: complement_ne_i128_bitcast:
1076+ ; SSE4: # %bb.0:
1077+ ; SSE4-NEXT: movl %esi, %ecx
1078+ ; SSE4-NEXT: movl $1, %eax
1079+ ; SSE4-NEXT: xorl %edx, %edx
1080+ ; SSE4-NEXT: shldq %cl, %rax, %rdx
1081+ ; SSE4-NEXT: shlq %cl, %rax
1082+ ; SSE4-NEXT: xorl %esi, %esi
1083+ ; SSE4-NEXT: testb $64, %cl
1084+ ; SSE4-NEXT: cmovneq %rax, %rdx
1085+ ; SSE4-NEXT: cmovneq %rsi, %rax
1086+ ; SSE4-NEXT: movdqa (%rdi), %xmm0
1087+ ; SSE4-NEXT: movq %xmm0, %rcx
1088+ ; SSE4-NEXT: xorq %rax, %rcx
1089+ ; SSE4-NEXT: pextrq $1, %xmm0, %rax
1090+ ; SSE4-NEXT: xorq %rdx, %rax
1091+ ; SSE4-NEXT: movq %rax, 8(%rdi)
1092+ ; SSE4-NEXT: movq %rcx, (%rdi)
1093+ ; SSE4-NEXT: retq
1094+ ;
1095+ ; AVX2-LABEL: complement_ne_i128_bitcast:
1096+ ; AVX2: # %bb.0:
1097+ ; AVX2-NEXT: movl %esi, %ecx
1098+ ; AVX2-NEXT: movl $1, %eax
1099+ ; AVX2-NEXT: xorl %edx, %edx
1100+ ; AVX2-NEXT: shldq %cl, %rax, %rdx
1101+ ; AVX2-NEXT: xorl %esi, %esi
1102+ ; AVX2-NEXT: shlxq %rcx, %rax, %rax
1103+ ; AVX2-NEXT: testb $64, %cl
1104+ ; AVX2-NEXT: cmovneq %rax, %rdx
1105+ ; AVX2-NEXT: cmovneq %rsi, %rax
1106+ ; AVX2-NEXT: vmovdqa (%rdi), %xmm0
1107+ ; AVX2-NEXT: vmovq %xmm0, %rcx
1108+ ; AVX2-NEXT: vpextrq $1, %xmm0, %rsi
1109+ ; AVX2-NEXT: xorq %rax, %rcx
1110+ ; AVX2-NEXT: xorq %rdx, %rsi
1111+ ; AVX2-NEXT: movq %rsi, 8(%rdi)
1112+ ; AVX2-NEXT: movq %rcx, (%rdi)
1113+ ; AVX2-NEXT: retq
1114+ ;
1115+ ; AVX512-LABEL: complement_ne_i128_bitcast:
1116+ ; AVX512: # %bb.0:
1117+ ; AVX512-NEXT: movl %esi, %ecx
1118+ ; AVX512-NEXT: xorl %eax, %eax
1119+ ; AVX512-NEXT: movl $1, %edx
1120+ ; AVX512-NEXT: xorl %esi, %esi
1121+ ; AVX512-NEXT: shldq %cl, %rdx, %rsi
1122+ ; AVX512-NEXT: shlxq %rcx, %rdx, %rdx
1123+ ; AVX512-NEXT: testb $64, %cl
1124+ ; AVX512-NEXT: cmovneq %rdx, %rsi
1125+ ; AVX512-NEXT: cmovneq %rax, %rdx
1126+ ; AVX512-NEXT: vmovdqa (%rdi), %xmm0
1127+ ; AVX512-NEXT: vmovq %xmm0, %rax
1128+ ; AVX512-NEXT: xorq %rdx, %rax
1129+ ; AVX512-NEXT: vpextrq $1, %xmm0, %rcx
1130+ ; AVX512-NEXT: xorq %rsi, %rcx
1131+ ; AVX512-NEXT: movq %rcx, 8(%rdi)
1132+ ; AVX512-NEXT: movq %rax, (%rdi)
1133+ ; AVX512-NEXT: retq
1134+ %rem = and i32 %position , 127
1135+ %ofs = zext nneg i32 %rem to i128
1136+ %bit = shl nuw i128 1 , %ofs
1137+ %ldv = load <8 x i16 >, ptr %word
1138+ %ld = bitcast <8 x i16 > %ldv to i128
1139+ %test = and i128 %ld , %bit
1140+ %res = xor i128 %ld , %bit
1141+ store i128 %res , ptr %word
1142+ ret <8 x i16 > %ldv
1143+ }
1144+
9591145; Multiple loads in store chain
9601146define i32 @reset_multiload_i128 (ptr %word , i32 %position , ptr %p ) nounwind {
9611147; X86-LABEL: reset_multiload_i128:
@@ -975,10 +1161,10 @@ define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {
9751161; X86-NEXT: btrl %edx, %ebx
9761162; X86-NEXT: btl %edx, %edi
9771163; X86-NEXT: movl %ebx, (%ecx,%esi)
978- ; X86-NEXT: jae .LBB22_2
1164+ ; X86-NEXT: jae .LBB23_2
9791165; X86-NEXT: # %bb.1:
9801166; X86-NEXT: xorl %eax, %eax
981- ; X86-NEXT: .LBB22_2 :
1167+ ; X86-NEXT: .LBB23_2 :
9821168; X86-NEXT: popl %esi
9831169; X86-NEXT: popl %edi
9841170; X86-NEXT: popl %ebx
@@ -994,10 +1180,10 @@ define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {
9941180; X64-NEXT: btrl %esi, %r8d
9951181; X64-NEXT: xorl %eax, %eax
9961182; X64-NEXT: btl %esi, %r9d
997- ; X64-NEXT: jb .LBB22_2
1183+ ; X64-NEXT: jb .LBB23_2
9981184; X64-NEXT: # %bb.1:
9991185; X64-NEXT: movl (%rdx), %eax
1000- ; X64-NEXT: .LBB22_2 :
1186+ ; X64-NEXT: .LBB23_2 :
10011187; X64-NEXT: movl %r8d, (%rdi,%rcx)
10021188; X64-NEXT: retq
10031189 %rem = and i32 %position , 127
@@ -1046,10 +1232,10 @@ define i32 @chain_reset_i256(ptr %p0, ptr %p1, ptr %p2, i32 %position) nounwind
10461232; X86-NEXT: movl %edi, (%edx)
10471233; X86-NEXT: movl (%eax), %eax
10481234; X86-NEXT: orl %ecx, %ebp
1049- ; X86-NEXT: jne .LBB23_2
1235+ ; X86-NEXT: jne .LBB24_2
10501236; X86-NEXT: # %bb.1:
10511237; X86-NEXT: addl %esi, %eax
1052- ; X86-NEXT: .LBB23_2 :
1238+ ; X86-NEXT: .LBB24_2 :
10531239; X86-NEXT: popl %esi
10541240; X86-NEXT: popl %edi
10551241; X86-NEXT: popl %ebx
0 commit comments