Skip to content

Commit 6e5f277

Browse files
authored
[X86] bittest-big-integer.ll - add test showing missed RMW fold because the load is hidden behind a bitcast (llvm#167491)
1 parent de3de3f commit 6e5f277

File tree

1 file changed

+194
-8
lines changed

1 file changed

+194
-8
lines changed

llvm/test/CodeGen/X86/bittest-big-integer.ll

Lines changed: 194 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86
3-
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE
4-
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE
3+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE,SSE2
4+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE,SSE4
55
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2
66
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512
77

@@ -956,6 +956,192 @@ define i1 @complement_cmpz_i128(ptr %word, i32 %position) nounwind {
956956
ret i1 %cmp
957957
}
958958

959+
; Load hidden behind bitcast
960+
define <8 x i16> @complement_ne_i128_bitcast(ptr %word, i32 %position) nounwind {
961+
; X86-LABEL: complement_ne_i128_bitcast:
962+
; X86: # %bb.0:
963+
; X86-NEXT: pushl %ebp
964+
; X86-NEXT: movl %esp, %ebp
965+
; X86-NEXT: pushl %ebx
966+
; X86-NEXT: pushl %edi
967+
; X86-NEXT: pushl %esi
968+
; X86-NEXT: andl $-16, %esp
969+
; X86-NEXT: subl $80, %esp
970+
; X86-NEXT: movzbl 16(%ebp), %ecx
971+
; X86-NEXT: movl 12(%ebp), %edx
972+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
973+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
974+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
975+
; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
976+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
977+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
978+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
979+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
980+
; X86-NEXT: movl %ecx, %eax
981+
; X86-NEXT: shrb $3, %al
982+
; X86-NEXT: andb $12, %al
983+
; X86-NEXT: negb %al
984+
; X86-NEXT: movsbl %al, %eax
985+
; X86-NEXT: movl 56(%esp,%eax), %esi
986+
; X86-NEXT: movl 60(%esp,%eax), %ebx
987+
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
988+
; X86-NEXT: shldl %cl, %esi, %ebx
989+
; X86-NEXT: movzwl 14(%edx), %edi
990+
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
991+
; X86-NEXT: shll $16, %edi
992+
; X86-NEXT: movzwl 12(%edx), %ecx
993+
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
994+
; X86-NEXT: orl %ecx, %edi
995+
; X86-NEXT: xorl %ebx, %edi
996+
; X86-NEXT: movl 52(%esp,%eax), %edx
997+
; X86-NEXT: movzbl 16(%ebp), %ecx
998+
; X86-NEXT: shldl %cl, %edx, %esi
999+
; X86-NEXT: movl 12(%ebp), %eax
1000+
; X86-NEXT: movzwl 10(%eax), %ebx
1001+
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1002+
; X86-NEXT: shll $16, %ebx
1003+
; X86-NEXT: movzwl 8(%eax), %eax
1004+
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1005+
; X86-NEXT: orl %eax, %ebx
1006+
; X86-NEXT: xorl %esi, %ebx
1007+
; X86-NEXT: movl (%esp), %eax # 4-byte Reload
1008+
; X86-NEXT: movl 48(%esp,%eax), %esi
1009+
; X86-NEXT: shldl %cl, %esi, %edx
1010+
; X86-NEXT: movl 12(%ebp), %ecx
1011+
; X86-NEXT: movzwl 6(%ecx), %eax
1012+
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
1013+
; X86-NEXT: shll $16, %eax
1014+
; X86-NEXT: movzwl 4(%ecx), %ecx
1015+
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1016+
; X86-NEXT: orl %ecx, %eax
1017+
; X86-NEXT: xorl %edx, %eax
1018+
; X86-NEXT: movzbl 16(%ebp), %ecx
1019+
; X86-NEXT: shll %cl, %esi
1020+
; X86-NEXT: movl 12(%ebp), %ecx
1021+
; X86-NEXT: movzwl 2(%ecx), %edx
1022+
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1023+
; X86-NEXT: shll $16, %edx
1024+
; X86-NEXT: movzwl (%ecx), %ecx
1025+
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1026+
; X86-NEXT: orl %ecx, %edx
1027+
; X86-NEXT: xorl %esi, %edx
1028+
; X86-NEXT: movl 12(%ebp), %ecx
1029+
; X86-NEXT: movl %edi, 12(%ecx)
1030+
; X86-NEXT: movl %ebx, 8(%ecx)
1031+
; X86-NEXT: movl %eax, 4(%ecx)
1032+
; X86-NEXT: movl %edx, (%ecx)
1033+
; X86-NEXT: movl 8(%ebp), %eax
1034+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1035+
; X86-NEXT: movw %dx, 14(%eax)
1036+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1037+
; X86-NEXT: movw %dx, 12(%eax)
1038+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1039+
; X86-NEXT: movw %dx, 10(%eax)
1040+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1041+
; X86-NEXT: movw %dx, 8(%eax)
1042+
; X86-NEXT: movl (%esp), %edx # 4-byte Reload
1043+
; X86-NEXT: movw %dx, 6(%eax)
1044+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1045+
; X86-NEXT: movw %dx, 4(%eax)
1046+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1047+
; X86-NEXT: movw %cx, 2(%eax)
1048+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1049+
; X86-NEXT: movw %cx, (%eax)
1050+
; X86-NEXT: leal -12(%ebp), %esp
1051+
; X86-NEXT: popl %esi
1052+
; X86-NEXT: popl %edi
1053+
; X86-NEXT: popl %ebx
1054+
; X86-NEXT: popl %ebp
1055+
; X86-NEXT: retl $4
1056+
;
1057+
; SSE2-LABEL: complement_ne_i128_bitcast:
1058+
; SSE2: # %bb.0:
1059+
; SSE2-NEXT: movl %esi, %ecx
1060+
; SSE2-NEXT: movl $1, %eax
1061+
; SSE2-NEXT: xorl %edx, %edx
1062+
; SSE2-NEXT: shldq %cl, %rax, %rdx
1063+
; SSE2-NEXT: xorl %esi, %esi
1064+
; SSE2-NEXT: shlq %cl, %rax
1065+
; SSE2-NEXT: testb $64, %cl
1066+
; SSE2-NEXT: cmovneq %rax, %rdx
1067+
; SSE2-NEXT: cmovneq %rsi, %rax
1068+
; SSE2-NEXT: movdqa (%rdi), %xmm0
1069+
; SSE2-NEXT: xorq %rdx, 8(%rdi)
1070+
; SSE2-NEXT: movq %xmm0, %rcx
1071+
; SSE2-NEXT: xorq %rax, %rcx
1072+
; SSE2-NEXT: movq %rcx, (%rdi)
1073+
; SSE2-NEXT: retq
1074+
;
1075+
; SSE4-LABEL: complement_ne_i128_bitcast:
1076+
; SSE4: # %bb.0:
1077+
; SSE4-NEXT: movl %esi, %ecx
1078+
; SSE4-NEXT: movl $1, %eax
1079+
; SSE4-NEXT: xorl %edx, %edx
1080+
; SSE4-NEXT: shldq %cl, %rax, %rdx
1081+
; SSE4-NEXT: shlq %cl, %rax
1082+
; SSE4-NEXT: xorl %esi, %esi
1083+
; SSE4-NEXT: testb $64, %cl
1084+
; SSE4-NEXT: cmovneq %rax, %rdx
1085+
; SSE4-NEXT: cmovneq %rsi, %rax
1086+
; SSE4-NEXT: movdqa (%rdi), %xmm0
1087+
; SSE4-NEXT: movq %xmm0, %rcx
1088+
; SSE4-NEXT: xorq %rax, %rcx
1089+
; SSE4-NEXT: pextrq $1, %xmm0, %rax
1090+
; SSE4-NEXT: xorq %rdx, %rax
1091+
; SSE4-NEXT: movq %rax, 8(%rdi)
1092+
; SSE4-NEXT: movq %rcx, (%rdi)
1093+
; SSE4-NEXT: retq
1094+
;
1095+
; AVX2-LABEL: complement_ne_i128_bitcast:
1096+
; AVX2: # %bb.0:
1097+
; AVX2-NEXT: movl %esi, %ecx
1098+
; AVX2-NEXT: movl $1, %eax
1099+
; AVX2-NEXT: xorl %edx, %edx
1100+
; AVX2-NEXT: shldq %cl, %rax, %rdx
1101+
; AVX2-NEXT: xorl %esi, %esi
1102+
; AVX2-NEXT: shlxq %rcx, %rax, %rax
1103+
; AVX2-NEXT: testb $64, %cl
1104+
; AVX2-NEXT: cmovneq %rax, %rdx
1105+
; AVX2-NEXT: cmovneq %rsi, %rax
1106+
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
1107+
; AVX2-NEXT: vmovq %xmm0, %rcx
1108+
; AVX2-NEXT: vpextrq $1, %xmm0, %rsi
1109+
; AVX2-NEXT: xorq %rax, %rcx
1110+
; AVX2-NEXT: xorq %rdx, %rsi
1111+
; AVX2-NEXT: movq %rsi, 8(%rdi)
1112+
; AVX2-NEXT: movq %rcx, (%rdi)
1113+
; AVX2-NEXT: retq
1114+
;
1115+
; AVX512-LABEL: complement_ne_i128_bitcast:
1116+
; AVX512: # %bb.0:
1117+
; AVX512-NEXT: movl %esi, %ecx
1118+
; AVX512-NEXT: xorl %eax, %eax
1119+
; AVX512-NEXT: movl $1, %edx
1120+
; AVX512-NEXT: xorl %esi, %esi
1121+
; AVX512-NEXT: shldq %cl, %rdx, %rsi
1122+
; AVX512-NEXT: shlxq %rcx, %rdx, %rdx
1123+
; AVX512-NEXT: testb $64, %cl
1124+
; AVX512-NEXT: cmovneq %rdx, %rsi
1125+
; AVX512-NEXT: cmovneq %rax, %rdx
1126+
; AVX512-NEXT: vmovdqa (%rdi), %xmm0
1127+
; AVX512-NEXT: vmovq %xmm0, %rax
1128+
; AVX512-NEXT: xorq %rdx, %rax
1129+
; AVX512-NEXT: vpextrq $1, %xmm0, %rcx
1130+
; AVX512-NEXT: xorq %rsi, %rcx
1131+
; AVX512-NEXT: movq %rcx, 8(%rdi)
1132+
; AVX512-NEXT: movq %rax, (%rdi)
1133+
; AVX512-NEXT: retq
1134+
%rem = and i32 %position, 127
1135+
%ofs = zext nneg i32 %rem to i128
1136+
%bit = shl nuw i128 1, %ofs
1137+
%ldv = load <8 x i16>, ptr %word
1138+
%ld = bitcast <8 x i16> %ldv to i128
1139+
%test = and i128 %ld, %bit
1140+
%res = xor i128 %ld, %bit
1141+
store i128 %res, ptr %word
1142+
ret <8 x i16> %ldv
1143+
}
1144+
9591145
; Multiple loads in store chain
9601146
define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {
9611147
; X86-LABEL: reset_multiload_i128:
@@ -975,10 +1161,10 @@ define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {
9751161
; X86-NEXT: btrl %edx, %ebx
9761162
; X86-NEXT: btl %edx, %edi
9771163
; X86-NEXT: movl %ebx, (%ecx,%esi)
978-
; X86-NEXT: jae .LBB22_2
1164+
; X86-NEXT: jae .LBB23_2
9791165
; X86-NEXT: # %bb.1:
9801166
; X86-NEXT: xorl %eax, %eax
981-
; X86-NEXT: .LBB22_2:
1167+
; X86-NEXT: .LBB23_2:
9821168
; X86-NEXT: popl %esi
9831169
; X86-NEXT: popl %edi
9841170
; X86-NEXT: popl %ebx
@@ -994,10 +1180,10 @@ define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {
9941180
; X64-NEXT: btrl %esi, %r8d
9951181
; X64-NEXT: xorl %eax, %eax
9961182
; X64-NEXT: btl %esi, %r9d
997-
; X64-NEXT: jb .LBB22_2
1183+
; X64-NEXT: jb .LBB23_2
9981184
; X64-NEXT: # %bb.1:
9991185
; X64-NEXT: movl (%rdx), %eax
1000-
; X64-NEXT: .LBB22_2:
1186+
; X64-NEXT: .LBB23_2:
10011187
; X64-NEXT: movl %r8d, (%rdi,%rcx)
10021188
; X64-NEXT: retq
10031189
%rem = and i32 %position, 127
@@ -1046,10 +1232,10 @@ define i32 @chain_reset_i256(ptr %p0, ptr %p1, ptr %p2, i32 %position) nounwind
10461232
; X86-NEXT: movl %edi, (%edx)
10471233
; X86-NEXT: movl (%eax), %eax
10481234
; X86-NEXT: orl %ecx, %ebp
1049-
; X86-NEXT: jne .LBB23_2
1235+
; X86-NEXT: jne .LBB24_2
10501236
; X86-NEXT: # %bb.1:
10511237
; X86-NEXT: addl %esi, %eax
1052-
; X86-NEXT: .LBB23_2:
1238+
; X86-NEXT: .LBB24_2:
10531239
; X86-NEXT: popl %esi
10541240
; X86-NEXT: popl %edi
10551241
; X86-NEXT: popl %ebx

0 commit comments

Comments
 (0)