Skip to content

Commit b4c4013

Browse files
authored
[X86] narrowBitOpRMW - peek through bitcasts while searching for RMW patterns (llvm#167497)
1 parent 17ce48f commit b4c4013

File tree

2 files changed

+88
-131
lines changed

2 files changed

+88
-131
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53412,7 +53412,7 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
5341253412
return SDValue();
5341353413

5341453414
// SrcVal must be a matching normal load further up the chain.
53415-
auto *Ld = dyn_cast<LoadSDNode>(SrcVal);
53415+
auto *Ld = dyn_cast<LoadSDNode>(peekThroughBitcasts(SrcVal));
5341653416
if (!Ld || !ISD::isNormalLoad(Ld) || !Ld->isSimple() ||
5341753417
Ld->getBasePtr() != St->getBasePtr() ||
5341853418
Ld->getOffset() != St->getOffset() ||

llvm/test/CodeGen/X86/bittest-big-integer.ll

Lines changed: 87 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -967,82 +967,63 @@ define <8 x i16> @complement_ne_i128_bitcast(ptr %word, i32 %position) nounwind
967967
; X86-NEXT: pushl %esi
968968
; X86-NEXT: andl $-16, %esp
969969
; X86-NEXT: subl $80, %esp
970-
; X86-NEXT: movzbl 16(%ebp), %ecx
971-
; X86-NEXT: movl 12(%ebp), %edx
972-
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
973-
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
974-
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
975-
; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
976-
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
977-
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
978-
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
979-
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
980-
; X86-NEXT: movl %ecx, %eax
981-
; X86-NEXT: shrb $3, %al
982-
; X86-NEXT: andb $12, %al
983-
; X86-NEXT: negb %al
984-
; X86-NEXT: movsbl %al, %eax
985-
; X86-NEXT: movl 56(%esp,%eax), %esi
986-
; X86-NEXT: movl 60(%esp,%eax), %ebx
987-
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
988-
; X86-NEXT: shldl %cl, %esi, %ebx
989-
; X86-NEXT: movzwl 14(%edx), %edi
970+
; X86-NEXT: movl 12(%ebp), %eax
971+
; X86-NEXT: movzwl (%eax), %ecx
972+
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
973+
; X86-NEXT: movzwl 12(%eax), %ecx
974+
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
975+
; X86-NEXT: movzwl 14(%eax), %edi
990976
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
991977
; X86-NEXT: shll $16, %edi
992-
; X86-NEXT: movzwl 12(%edx), %ecx
993-
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
994978
; X86-NEXT: orl %ecx, %edi
995-
; X86-NEXT: xorl %ebx, %edi
996-
; X86-NEXT: movl 52(%esp,%eax), %edx
997-
; X86-NEXT: movzbl 16(%ebp), %ecx
998-
; X86-NEXT: shldl %cl, %edx, %esi
999-
; X86-NEXT: movl 12(%ebp), %eax
1000-
; X86-NEXT: movzwl 10(%eax), %ebx
1001-
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1002-
; X86-NEXT: shll $16, %ebx
1003-
; X86-NEXT: movzwl 8(%eax), %eax
1004-
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1005-
; X86-NEXT: orl %eax, %ebx
1006-
; X86-NEXT: xorl %esi, %ebx
1007-
; X86-NEXT: movl (%esp), %eax # 4-byte Reload
1008-
; X86-NEXT: movl 48(%esp,%eax), %esi
1009-
; X86-NEXT: shldl %cl, %esi, %edx
1010-
; X86-NEXT: movl 12(%ebp), %ecx
1011-
; X86-NEXT: movzwl 6(%ecx), %eax
1012-
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
1013-
; X86-NEXT: shll $16, %eax
1014-
; X86-NEXT: movzwl 4(%ecx), %ecx
979+
; X86-NEXT: movzwl 2(%eax), %ecx
1015980
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1016-
; X86-NEXT: orl %ecx, %eax
1017-
; X86-NEXT: xorl %edx, %eax
1018-
; X86-NEXT: movzbl 16(%ebp), %ecx
1019-
; X86-NEXT: shll %cl, %esi
1020-
; X86-NEXT: movl 12(%ebp), %ecx
1021-
; X86-NEXT: movzwl 2(%ecx), %edx
981+
; X86-NEXT: movzwl 4(%eax), %edx
1022982
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1023-
; X86-NEXT: shll $16, %edx
1024-
; X86-NEXT: movzwl (%ecx), %ecx
983+
; X86-NEXT: movzwl 6(%eax), %esi
984+
; X86-NEXT: movzwl 8(%eax), %ecx
1025985
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
1026-
; X86-NEXT: orl %ecx, %edx
1027-
; X86-NEXT: xorl %esi, %edx
986+
; X86-NEXT: movzwl 10(%eax), %eax
987+
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
988+
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
989+
; X86-NEXT: shll $16, %eax
990+
; X86-NEXT: orl %ecx, %eax
991+
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
992+
; X86-NEXT: movl %esi, %eax
993+
; X86-NEXT: shll $16, %eax
994+
; X86-NEXT: orl %edx, %eax
995+
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
996+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
997+
; X86-NEXT: shll $16, %eax
998+
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
999+
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
1000+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1001+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1002+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1003+
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
1004+
; X86-NEXT: movl 16(%ebp), %eax
1005+
; X86-NEXT: movl %eax, %ebx
1006+
; X86-NEXT: andb $96, %bl
1007+
; X86-NEXT: shrb $3, %bl
1008+
; X86-NEXT: movzbl %bl, %edi
1009+
; X86-NEXT: movl 32(%esp,%edi), %edi
1010+
; X86-NEXT: btcl %eax, %edi
1011+
; X86-NEXT: andl $96, %eax
1012+
; X86-NEXT: shrl $3, %eax
10281013
; X86-NEXT: movl 12(%ebp), %ecx
1029-
; X86-NEXT: movl %edi, 12(%ecx)
1030-
; X86-NEXT: movl %ebx, 8(%ecx)
1031-
; X86-NEXT: movl %eax, 4(%ecx)
1032-
; X86-NEXT: movl %edx, (%ecx)
1014+
; X86-NEXT: movl %edi, (%ecx,%eax)
10331015
; X86-NEXT: movl 8(%ebp), %eax
10341016
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
10351017
; X86-NEXT: movw %dx, 14(%eax)
10361018
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
10371019
; X86-NEXT: movw %dx, 12(%eax)
1038-
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1039-
; X86-NEXT: movw %dx, 10(%eax)
1040-
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1041-
; X86-NEXT: movw %dx, 8(%eax)
1042-
; X86-NEXT: movl (%esp), %edx # 4-byte Reload
1043-
; X86-NEXT: movw %dx, 6(%eax)
1044-
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
1045-
; X86-NEXT: movw %dx, 4(%eax)
1020+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1021+
; X86-NEXT: movw %cx, 10(%eax)
1022+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1023+
; X86-NEXT: movw %cx, 8(%eax)
1024+
; X86-NEXT: movw %si, 6(%eax)
1025+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
1026+
; X86-NEXT: movw %cx, 4(%eax)
10461027
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
10471028
; X86-NEXT: movw %cx, 2(%eax)
10481029
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
@@ -1056,81 +1037,57 @@ define <8 x i16> @complement_ne_i128_bitcast(ptr %word, i32 %position) nounwind
10561037
;
10571038
; SSE2-LABEL: complement_ne_i128_bitcast:
10581039
; SSE2: # %bb.0:
1059-
; SSE2-NEXT: movl %esi, %ecx
1060-
; SSE2-NEXT: movl $1, %eax
1061-
; SSE2-NEXT: xorl %edx, %edx
1062-
; SSE2-NEXT: shldq %cl, %rax, %rdx
1063-
; SSE2-NEXT: xorl %esi, %esi
1064-
; SSE2-NEXT: shlq %cl, %rax
1065-
; SSE2-NEXT: testb $64, %cl
1066-
; SSE2-NEXT: cmovneq %rax, %rdx
1067-
; SSE2-NEXT: cmovneq %rsi, %rax
1040+
; SSE2-NEXT: # kill: def $esi killed $esi def $rsi
10681041
; SSE2-NEXT: movdqa (%rdi), %xmm0
1069-
; SSE2-NEXT: xorq %rdx, 8(%rdi)
1070-
; SSE2-NEXT: movq %xmm0, %rcx
1071-
; SSE2-NEXT: xorq %rax, %rcx
1072-
; SSE2-NEXT: movq %rcx, (%rdi)
1042+
; SSE2-NEXT: movq 8(%rdi), %rax
1043+
; SSE2-NEXT: movq %xmm0, %rdx
1044+
; SSE2-NEXT: movl %esi, %ecx
1045+
; SSE2-NEXT: andb $32, %cl
1046+
; SSE2-NEXT: shrdq %cl, %rax, %rdx
1047+
; SSE2-NEXT: shrq %cl, %rax
1048+
; SSE2-NEXT: testb $64, %sil
1049+
; SSE2-NEXT: cmoveq %rdx, %rax
1050+
; SSE2-NEXT: btcl %esi, %eax
1051+
; SSE2-NEXT: andl $96, %esi
1052+
; SSE2-NEXT: shrl $3, %esi
1053+
; SSE2-NEXT: movl %eax, (%rdi,%rsi)
10731054
; SSE2-NEXT: retq
10741055
;
10751056
; SSE4-LABEL: complement_ne_i128_bitcast:
10761057
; SSE4: # %bb.0:
1077-
; SSE4-NEXT: movl %esi, %ecx
1078-
; SSE4-NEXT: movl $1, %eax
1079-
; SSE4-NEXT: xorl %edx, %edx
1080-
; SSE4-NEXT: shldq %cl, %rax, %rdx
1081-
; SSE4-NEXT: shlq %cl, %rax
1082-
; SSE4-NEXT: xorl %esi, %esi
1083-
; SSE4-NEXT: testb $64, %cl
1084-
; SSE4-NEXT: cmovneq %rax, %rdx
1085-
; SSE4-NEXT: cmovneq %rsi, %rax
1058+
; SSE4-NEXT: # kill: def $esi killed $esi def $rsi
10861059
; SSE4-NEXT: movdqa (%rdi), %xmm0
1087-
; SSE4-NEXT: movq %xmm0, %rcx
1088-
; SSE4-NEXT: xorq %rax, %rcx
10891060
; SSE4-NEXT: pextrq $1, %xmm0, %rax
1090-
; SSE4-NEXT: xorq %rdx, %rax
1091-
; SSE4-NEXT: movq %rax, 8(%rdi)
1092-
; SSE4-NEXT: movq %rcx, (%rdi)
1061+
; SSE4-NEXT: movq %xmm0, %rdx
1062+
; SSE4-NEXT: movl %esi, %ecx
1063+
; SSE4-NEXT: andb $32, %cl
1064+
; SSE4-NEXT: shrdq %cl, %rax, %rdx
1065+
; SSE4-NEXT: shrq %cl, %rax
1066+
; SSE4-NEXT: testb $64, %sil
1067+
; SSE4-NEXT: cmoveq %rdx, %rax
1068+
; SSE4-NEXT: btcl %esi, %eax
1069+
; SSE4-NEXT: andl $96, %esi
1070+
; SSE4-NEXT: shrl $3, %esi
1071+
; SSE4-NEXT: movl %eax, (%rdi,%rsi)
10931072
; SSE4-NEXT: retq
10941073
;
1095-
; AVX2-LABEL: complement_ne_i128_bitcast:
1096-
; AVX2: # %bb.0:
1097-
; AVX2-NEXT: movl %esi, %ecx
1098-
; AVX2-NEXT: movl $1, %eax
1099-
; AVX2-NEXT: xorl %edx, %edx
1100-
; AVX2-NEXT: shldq %cl, %rax, %rdx
1101-
; AVX2-NEXT: xorl %esi, %esi
1102-
; AVX2-NEXT: shlxq %rcx, %rax, %rax
1103-
; AVX2-NEXT: testb $64, %cl
1104-
; AVX2-NEXT: cmovneq %rax, %rdx
1105-
; AVX2-NEXT: cmovneq %rsi, %rax
1106-
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
1107-
; AVX2-NEXT: vmovq %xmm0, %rcx
1108-
; AVX2-NEXT: vpextrq $1, %xmm0, %rsi
1109-
; AVX2-NEXT: xorq %rax, %rcx
1110-
; AVX2-NEXT: xorq %rdx, %rsi
1111-
; AVX2-NEXT: movq %rsi, 8(%rdi)
1112-
; AVX2-NEXT: movq %rcx, (%rdi)
1113-
; AVX2-NEXT: retq
1114-
;
1115-
; AVX512-LABEL: complement_ne_i128_bitcast:
1116-
; AVX512: # %bb.0:
1117-
; AVX512-NEXT: movl %esi, %ecx
1118-
; AVX512-NEXT: xorl %eax, %eax
1119-
; AVX512-NEXT: movl $1, %edx
1120-
; AVX512-NEXT: xorl %esi, %esi
1121-
; AVX512-NEXT: shldq %cl, %rdx, %rsi
1122-
; AVX512-NEXT: shlxq %rcx, %rdx, %rdx
1123-
; AVX512-NEXT: testb $64, %cl
1124-
; AVX512-NEXT: cmovneq %rdx, %rsi
1125-
; AVX512-NEXT: cmovneq %rax, %rdx
1126-
; AVX512-NEXT: vmovdqa (%rdi), %xmm0
1127-
; AVX512-NEXT: vmovq %xmm0, %rax
1128-
; AVX512-NEXT: xorq %rdx, %rax
1129-
; AVX512-NEXT: vpextrq $1, %xmm0, %rcx
1130-
; AVX512-NEXT: xorq %rsi, %rcx
1131-
; AVX512-NEXT: movq %rcx, 8(%rdi)
1132-
; AVX512-NEXT: movq %rax, (%rdi)
1133-
; AVX512-NEXT: retq
1074+
; AVX-LABEL: complement_ne_i128_bitcast:
1075+
; AVX: # %bb.0:
1076+
; AVX-NEXT: # kill: def $esi killed $esi def $rsi
1077+
; AVX-NEXT: vmovdqa (%rdi), %xmm0
1078+
; AVX-NEXT: vpextrq $1, %xmm0, %rax
1079+
; AVX-NEXT: vmovq %xmm0, %rdx
1080+
; AVX-NEXT: movl %esi, %ecx
1081+
; AVX-NEXT: andb $32, %cl
1082+
; AVX-NEXT: shrdq %cl, %rax, %rdx
1083+
; AVX-NEXT: shrxq %rcx, %rax, %rax
1084+
; AVX-NEXT: testb $64, %sil
1085+
; AVX-NEXT: cmoveq %rdx, %rax
1086+
; AVX-NEXT: btcl %esi, %eax
1087+
; AVX-NEXT: andl $96, %esi
1088+
; AVX-NEXT: shrl $3, %esi
1089+
; AVX-NEXT: movl %eax, (%rdi,%rsi)
1090+
; AVX-NEXT: retq
11341091
%rem = and i32 %position, 127
11351092
%ofs = zext nneg i32 %rem to i128
11361093
%bit = shl nuw i128 1, %ofs

0 commit comments

Comments
 (0)