Skip to content

Commit ede5262

Browse files
RKSimongithub-actions[bot]
authored andcommitted
Automerge: [X86] narrowBitOpRMW - add additional uses of the StoredVal back to the DAG worklist (#166819)
As StoredVal has been replaced with a fresh load, and has one less user, make sure we add the remaining user(s) back to the worklist in case this opens further folds.
2 parents b0e90f2 + 732c725 commit ede5262

File tree

2 files changed

+43
-21
lines changed

2 files changed

+43
-21
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53354,6 +53354,7 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
5335453354
// i32 sub value.
5335553355
static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
5335653356
SelectionDAG &DAG,
53357+
TargetLowering::DAGCombinerInfo &DCI,
5335753358
const X86Subtarget &Subtarget) {
5335853359
using namespace SDPatternMatch;
5335953360
SDValue StoredVal = St->getValue();
@@ -53451,6 +53452,8 @@ static SDValue narrowBitOpRMW(StoreSDNode *St, const SDLoc &DL,
5345153452
if (!StoredVal.hasOneUse()) {
5345253453
SDValue NewLoad =
5345353454
DAG.getLoad(VT, DL, NewStore, Ld->getBasePtr(), Ld->getMemOperand());
53455+
for (SDNode *User : StoredVal->users())
53456+
DCI.AddToWorklist(User);
5345453457
DAG.ReplaceAllUsesWith(StoredVal, NewLoad);
5345553458
}
5345653459
return NewStore;
@@ -53682,7 +53685,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
5368253685
}
5368353686
}
5368453687

53685-
if (SDValue R = narrowBitOpRMW(St, dl, DAG, Subtarget))
53688+
if (SDValue R = narrowBitOpRMW(St, dl, DAG, DCI, Subtarget))
5368653689
return R;
5368753690

5368853691
// Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC)

llvm/test/CodeGen/X86/bittest-big-integer.ll

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,26 +1056,45 @@ define i32 @chain_reset_i256(ptr %p0, ptr %p1, ptr %p2, i32 %position) nounwind
10561056
; X86-NEXT: popl %ebp
10571057
; X86-NEXT: retl
10581058
;
1059-
; X64-LABEL: chain_reset_i256:
1060-
; X64: # %bb.0:
1061-
; X64-NEXT: # kill: def $ecx killed $ecx def $rcx
1062-
; X64-NEXT: movl $-2, %eax
1063-
; X64-NEXT: roll %cl, %eax
1064-
; X64-NEXT: shrl $3, %ecx
1065-
; X64-NEXT: andl $28, %ecx
1066-
; X64-NEXT: andl %eax, (%rdi,%rcx)
1067-
; X64-NEXT: movq (%rdi), %rcx
1068-
; X64-NEXT: movq 8(%rdi), %r8
1069-
; X64-NEXT: orq 24(%rdi), %r8
1070-
; X64-NEXT: movq 16(%rdi), %rdi
1071-
; X64-NEXT: orq %rcx, %rdi
1072-
; X64-NEXT: movl (%rsi), %eax
1073-
; X64-NEXT: movl %ecx, (%rsi)
1074-
; X64-NEXT: movl (%rdx), %ecx
1075-
; X64-NEXT: addl %ecx, %eax
1076-
; X64-NEXT: orq %r8, %rdi
1077-
; X64-NEXT: cmovnel %ecx, %eax
1078-
; X64-NEXT: retq
1059+
; SSE-LABEL: chain_reset_i256:
1060+
; SSE: # %bb.0:
1061+
; SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
1062+
; SSE-NEXT: movl $-2, %eax
1063+
; SSE-NEXT: roll %cl, %eax
1064+
; SSE-NEXT: shrl $3, %ecx
1065+
; SSE-NEXT: andl $28, %ecx
1066+
; SSE-NEXT: andl %eax, (%rdi,%rcx)
1067+
; SSE-NEXT: movq (%rdi), %rcx
1068+
; SSE-NEXT: movq 8(%rdi), %r8
1069+
; SSE-NEXT: orq 24(%rdi), %r8
1070+
; SSE-NEXT: movq 16(%rdi), %rdi
1071+
; SSE-NEXT: orq %rcx, %rdi
1072+
; SSE-NEXT: movl (%rsi), %eax
1073+
; SSE-NEXT: movl %ecx, (%rsi)
1074+
; SSE-NEXT: movl (%rdx), %ecx
1075+
; SSE-NEXT: addl %ecx, %eax
1076+
; SSE-NEXT: orq %r8, %rdi
1077+
; SSE-NEXT: cmovnel %ecx, %eax
1078+
; SSE-NEXT: retq
1079+
;
1080+
; AVX-LABEL: chain_reset_i256:
1081+
; AVX: # %bb.0:
1082+
; AVX-NEXT: # kill: def $ecx killed $ecx def $rcx
1083+
; AVX-NEXT: movl $-2, %eax
1084+
; AVX-NEXT: roll %cl, %eax
1085+
; AVX-NEXT: shrl $3, %ecx
1086+
; AVX-NEXT: andl $28, %ecx
1087+
; AVX-NEXT: andl %eax, (%rdi,%rcx)
1088+
; AVX-NEXT: vmovdqu (%rdi), %ymm0
1089+
; AVX-NEXT: movl (%rdi), %ecx
1090+
; AVX-NEXT: movl (%rsi), %eax
1091+
; AVX-NEXT: movl %ecx, (%rsi)
1092+
; AVX-NEXT: movl (%rdx), %ecx
1093+
; AVX-NEXT: addl %ecx, %eax
1094+
; AVX-NEXT: vptest %ymm0, %ymm0
1095+
; AVX-NEXT: cmovnel %ecx, %eax
1096+
; AVX-NEXT: vzeroupper
1097+
; AVX-NEXT: retq
10791098
%rem = and i32 %position, 255
10801099
%ofs = zext nneg i32 %rem to i256
10811100
%bit = shl nuw i256 1, %ofs

0 commit comments

Comments
 (0)