Skip to content

Commit 89c2617

Browse files
authored
[X86] bittest-big-integer.ll - add test showing multiple uses of the RMW store chain AND its stored value (#166366)
1 parent 2e89b71 commit 89c2617

File tree

1 file changed

+263
-0
lines changed

1 file changed

+263
-0
lines changed

llvm/test/CodeGen/X86/bittest-big-integer.ll

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,6 +1083,269 @@ define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {
10831083
ret i32 %ret
10841084
}
10851085

1086+
; Multiple uses of the store chain AND stored value
1087+
define i32 @chain_reset_i256(ptr %p0, ptr %p1, ptr %p2, i32 %position) nounwind {
1088+
; X86-LABEL: chain_reset_i256:
1089+
; X86: # %bb.0:
1090+
; X86-NEXT: pushl %ebp
1091+
; X86-NEXT: movl %esp, %ebp
1092+
; X86-NEXT: pushl %ebx
1093+
; X86-NEXT: pushl %edi
1094+
; X86-NEXT: pushl %esi
1095+
; X86-NEXT: andl $-16, %esp
1096+
; X86-NEXT: subl $112, %esp
1097+
; X86-NEXT: movzbl 20(%ebp), %ecx
1098+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1099+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1100+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1101+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1102+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1103+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1104+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1105+
; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
1106+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1107+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1108+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1109+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1110+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1111+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1112+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1113+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1114+
; X86-NEXT: movl %ecx, %eax
1115+
; X86-NEXT: shrb $3, %al
1116+
; X86-NEXT: andb $28, %al
1117+
; X86-NEXT: negb %al
1118+
; X86-NEXT: movsbl %al, %eax
1119+
; X86-NEXT: movl 72(%esp,%eax), %edx
1120+
; X86-NEXT: movl 76(%esp,%eax), %edi
1121+
; X86-NEXT: movl %edi, %esi
1122+
; X86-NEXT: shldl %cl, %edx, %esi
1123+
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1124+
; X86-NEXT: movl 68(%esp,%eax), %esi
1125+
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1126+
; X86-NEXT: shldl %cl, %esi, %edx
1127+
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1128+
; X86-NEXT: movl 80(%esp,%eax), %edx
1129+
; X86-NEXT: movl 84(%esp,%eax), %ebx
1130+
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1131+
; X86-NEXT: shldl %cl, %edx, %ebx
1132+
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1133+
; X86-NEXT: shldl %cl, %edi, %edx
1134+
; X86-NEXT: movl 64(%esp,%eax), %edi
1135+
; X86-NEXT: movl 88(%esp,%eax), %esi
1136+
; X86-NEXT: movl 92(%esp,%eax), %eax
1137+
; X86-NEXT: shldl %cl, %esi, %eax
1138+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
1139+
; X86-NEXT: shldl %cl, %ebx, %esi
1140+
; X86-NEXT: shldl %cl, %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
1141+
; X86-NEXT: shll %cl, %edi
1142+
; X86-NEXT: movl %edi, %ecx
1143+
; X86-NEXT: movl %esi, %ebx
1144+
; X86-NEXT: notl %ebx
1145+
; X86-NEXT: notl %eax
1146+
; X86-NEXT: notl %edx
1147+
; X86-NEXT: notl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
1148+
; X86-NEXT: notl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
1149+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
1150+
; X86-NEXT: notl %edi
1151+
; X86-NEXT: notl %ecx
1152+
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1153+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
1154+
; X86-NEXT: notl %esi
1155+
; X86-NEXT: movl 8(%ebp), %ecx
1156+
; X86-NEXT: andl 12(%ecx), %edi
1157+
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1158+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
1159+
; X86-NEXT: andl 8(%ecx), %edi
1160+
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1161+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
1162+
; X86-NEXT: andl 20(%ecx), %edi
1163+
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1164+
; X86-NEXT: andl 16(%ecx), %edx
1165+
; X86-NEXT: andl 28(%ecx), %eax
1166+
; X86-NEXT: andl 24(%ecx), %ebx
1167+
; X86-NEXT: andl 4(%ecx), %esi
1168+
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1169+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
1170+
; X86-NEXT: andl (%ecx), %esi
1171+
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1172+
; X86-NEXT: movl %ebx, 24(%ecx)
1173+
; X86-NEXT: movl %eax, 28(%ecx)
1174+
; X86-NEXT: movl %edx, 16(%ecx)
1175+
; X86-NEXT: movl %edi, 20(%ecx)
1176+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
1177+
; X86-NEXT: movl %edi, 8(%ecx)
1178+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
1179+
; X86-NEXT: movl %edi, 12(%ecx)
1180+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
1181+
; X86-NEXT: movl %esi, (%ecx)
1182+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
1183+
; X86-NEXT: movl %esi, 4(%ecx)
1184+
; X86-NEXT: orl %edi, %eax
1185+
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
1186+
; X86-NEXT: orl %eax, %esi
1187+
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
1188+
; X86-NEXT: movl 12(%ebp), %eax
1189+
; X86-NEXT: movl (%eax), %ecx
1190+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
1191+
; X86-NEXT: movl %edi, (%eax)
1192+
; X86-NEXT: orl %edi, %edx
1193+
; X86-NEXT: orl %ebx, %edx
1194+
; X86-NEXT: orl %esi, %edx
1195+
; X86-NEXT: movl 16(%ebp), %eax
1196+
; X86-NEXT: movl (%eax), %eax
1197+
; X86-NEXT: jne .LBB23_2
1198+
; X86-NEXT: # %bb.1:
1199+
; X86-NEXT: addl %ecx, %eax
1200+
; X86-NEXT: .LBB23_2:
1201+
; X86-NEXT: leal -12(%ebp), %esp
1202+
; X86-NEXT: popl %esi
1203+
; X86-NEXT: popl %edi
1204+
; X86-NEXT: popl %ebx
1205+
; X86-NEXT: popl %ebp
1206+
; X86-NEXT: retl
1207+
;
1208+
; SSE-LABEL: chain_reset_i256:
1209+
; SSE: # %bb.0:
1210+
; SSE-NEXT: xorps %xmm0, %xmm0
1211+
; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
1212+
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1213+
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
1214+
; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp)
1215+
; SSE-NEXT: movq $1, -{{[0-9]+}}(%rsp)
1216+
; SSE-NEXT: movl %ecx, %eax
1217+
; SSE-NEXT: shrb $3, %al
1218+
; SSE-NEXT: andb $24, %al
1219+
; SSE-NEXT: negb %al
1220+
; SSE-NEXT: movsbq %al, %r10
1221+
; SSE-NEXT: movq -24(%rsp,%r10), %r8
1222+
; SSE-NEXT: movq -16(%rsp,%r10), %rax
1223+
; SSE-NEXT: shldq %cl, %r8, %rax
1224+
; SSE-NEXT: movq -32(%rsp,%r10), %r9
1225+
; SSE-NEXT: shldq %cl, %r9, %r8
1226+
; SSE-NEXT: movq -40(%rsp,%r10), %r10
1227+
; SSE-NEXT: shldq %cl, %r10, %r9
1228+
; SSE-NEXT: # kill: def $cl killed $cl killed $ecx
1229+
; SSE-NEXT: shlq %cl, %r10
1230+
; SSE-NEXT: notq %r8
1231+
; SSE-NEXT: notq %rax
1232+
; SSE-NEXT: notq %r10
1233+
; SSE-NEXT: notq %r9
1234+
; SSE-NEXT: andq 24(%rdi), %rax
1235+
; SSE-NEXT: andq 16(%rdi), %r8
1236+
; SSE-NEXT: andq 8(%rdi), %r9
1237+
; SSE-NEXT: andq (%rdi), %r10
1238+
; SSE-NEXT: movq %r8, 16(%rdi)
1239+
; SSE-NEXT: movq %rax, 24(%rdi)
1240+
; SSE-NEXT: movq %r10, (%rdi)
1241+
; SSE-NEXT: movq %r9, 8(%rdi)
1242+
; SSE-NEXT: orq %rax, %r9
1243+
; SSE-NEXT: orq %r10, %r8
1244+
; SSE-NEXT: movl (%rsi), %eax
1245+
; SSE-NEXT: movl %r10d, (%rsi)
1246+
; SSE-NEXT: movl (%rdx), %ecx
1247+
; SSE-NEXT: addl %ecx, %eax
1248+
; SSE-NEXT: orq %r9, %r8
1249+
; SSE-NEXT: cmovnel %ecx, %eax
1250+
; SSE-NEXT: retq
1251+
;
1252+
; AVX2-LABEL: chain_reset_i256:
1253+
; AVX2: # %bb.0:
1254+
; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx
1255+
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
1256+
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
1257+
; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1,0,0,0]
1258+
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
1259+
; AVX2-NEXT: movl %ecx, %eax
1260+
; AVX2-NEXT: shrb $3, %al
1261+
; AVX2-NEXT: andb $24, %al
1262+
; AVX2-NEXT: negb %al
1263+
; AVX2-NEXT: movsbq %al, %rax
1264+
; AVX2-NEXT: movq -32(%rsp,%rax), %r8
1265+
; AVX2-NEXT: movq -24(%rsp,%rax), %r9
1266+
; AVX2-NEXT: movq %r9, %r10
1267+
; AVX2-NEXT: shldq %cl, %r8, %r10
1268+
; AVX2-NEXT: movq -40(%rsp,%rax), %r11
1269+
; AVX2-NEXT: movq -16(%rsp,%rax), %rax
1270+
; AVX2-NEXT: shldq %cl, %r9, %rax
1271+
; AVX2-NEXT: shldq %cl, %r11, %r8
1272+
; AVX2-NEXT: andnq 24(%rdi), %rax, %rax
1273+
; AVX2-NEXT: andnq 16(%rdi), %r10, %r9
1274+
; AVX2-NEXT: andnq 8(%rdi), %r8, %r8
1275+
; AVX2-NEXT: shlxq %rcx, %r11, %rcx
1276+
; AVX2-NEXT: andnq (%rdi), %rcx, %rcx
1277+
; AVX2-NEXT: movq %r9, 16(%rdi)
1278+
; AVX2-NEXT: movq %rax, 24(%rdi)
1279+
; AVX2-NEXT: movq %rcx, (%rdi)
1280+
; AVX2-NEXT: movq %r8, 8(%rdi)
1281+
; AVX2-NEXT: orq %rax, %r8
1282+
; AVX2-NEXT: orq %rcx, %r9
1283+
; AVX2-NEXT: movl (%rsi), %eax
1284+
; AVX2-NEXT: movl %ecx, (%rsi)
1285+
; AVX2-NEXT: movl (%rdx), %ecx
1286+
; AVX2-NEXT: addl %ecx, %eax
1287+
; AVX2-NEXT: orq %r8, %r9
1288+
; AVX2-NEXT: cmovnel %ecx, %eax
1289+
; AVX2-NEXT: vzeroupper
1290+
; AVX2-NEXT: retq
1291+
;
1292+
; AVX512-LABEL: chain_reset_i256:
1293+
; AVX512: # %bb.0:
1294+
; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
1295+
; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
1296+
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [1,0,0,0]
1297+
; AVX512-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
1298+
; AVX512-NEXT: # kill: def $ecx killed $ecx def $rcx
1299+
; AVX512-NEXT: movl %ecx, %eax
1300+
; AVX512-NEXT: shrb $3, %al
1301+
; AVX512-NEXT: andb $24, %al
1302+
; AVX512-NEXT: negb %al
1303+
; AVX512-NEXT: movsbq %al, %rax
1304+
; AVX512-NEXT: movq -40(%rsp,%rax), %r8
1305+
; AVX512-NEXT: movq -32(%rsp,%rax), %r9
1306+
; AVX512-NEXT: movq -24(%rsp,%rax), %r10
1307+
; AVX512-NEXT: movq %r10, %r11
1308+
; AVX512-NEXT: shldq %cl, %r9, %r11
1309+
; AVX512-NEXT: movq -16(%rsp,%rax), %rax
1310+
; AVX512-NEXT: shldq %cl, %r10, %rax
1311+
; AVX512-NEXT: shlxq %rcx, %r8, %r10
1312+
; AVX512-NEXT: # kill: def $cl killed $cl killed $rcx
1313+
; AVX512-NEXT: shldq %cl, %r8, %r9
1314+
; AVX512-NEXT: andnq 24(%rdi), %rax, %rax
1315+
; AVX512-NEXT: andnq 16(%rdi), %r11, %rcx
1316+
; AVX512-NEXT: andnq 8(%rdi), %r9, %r8
1317+
; AVX512-NEXT: andnq (%rdi), %r10, %r9
1318+
; AVX512-NEXT: movq %rcx, 16(%rdi)
1319+
; AVX512-NEXT: movq %rax, 24(%rdi)
1320+
; AVX512-NEXT: movq %r9, (%rdi)
1321+
; AVX512-NEXT: movq %r8, 8(%rdi)
1322+
; AVX512-NEXT: orq %rax, %r8
1323+
; AVX512-NEXT: orq %r9, %rcx
1324+
; AVX512-NEXT: movl (%rsi), %eax
1325+
; AVX512-NEXT: movl %r9d, (%rsi)
1326+
; AVX512-NEXT: movl (%rdx), %edx
1327+
; AVX512-NEXT: addl %edx, %eax
1328+
; AVX512-NEXT: orq %r8, %rcx
1329+
; AVX512-NEXT: cmovnel %edx, %eax
1330+
; AVX512-NEXT: vzeroupper
1331+
; AVX512-NEXT: retq
1332+
%rem = and i32 %position, 255
1333+
%ofs = zext nneg i32 %rem to i256
1334+
%bit = shl nuw i256 1, %ofs
1335+
%ld0 = load i256, ptr %p0
1336+
%msk = xor i256 %bit, -1
1337+
%res = and i256 %ld0, %msk
1338+
store i256 %res, ptr %p0
1339+
%cmp = icmp ne i256 %res, 0
1340+
%ld1 = load i32, ptr %p1
1341+
%trunc = trunc i256 %res to i32
1342+
store i32 %trunc, ptr %p1
1343+
%ld2 = load i32, ptr %p2
1344+
%add = add i32 %ld1, %ld2
1345+
%sel = select i1 %cmp, i32 %ld2, i32 %add
1346+
ret i32 %sel
1347+
}
1348+
10861349
; BTC/BT/BTS sequence on same i128
10871350
define i1 @sequence_i128(ptr %word, i32 %pos0, i32 %pos1, i32 %pos2) nounwind {
10881351
; X86-LABEL: sequence_i128:

0 commit comments

Comments
 (0)