-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[DAG] Fold (umin (sub a b) a) -> (usubo a b); (select usubo.1 a usubo.0) #161651
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+326
−0
Merged
Changes from 18 commits
Commits
Show all changes
19 commits
Select commit
Hold shift + click to select a range
3a24f85
[DAG] Fold (umin (sub a b) a) -> (usubo a b); (select usubo.1 a usubo.0)
ckoparkar 0db4bf0
Remove smin pattern, it might not be correct
ckoparkar 1848dd5
Merge branch 'main' into ckoparkar/161036
ckoparkar c0ba3fd
Fix check for retq
ckoparkar 5aa0f85
Merge branch 'main' into ckoparkar/161036
ckoparkar 57e2ea0
Address review comments
ckoparkar 7e08f23
Don't use MVT::i1
ckoparkar 7660fae
Fix pattern match
ckoparkar d795383
Add some more tests
ckoparkar a27dcf5
Simplify pattern match
ckoparkar 9b6df39
Merge branch 'main' into ckoparkar/161036
ckoparkar cd06ef4
Merge branch 'main' into ckoparkar/161036
ckoparkar 83f8ad0
Merge branch 'main' into ckoparkar/161036
ckoparkar e1ba30b
Edits
ckoparkar 224f0a3
Merge branch 'main' into ckoparkar/161036
ckoparkar 6df10cc
Adjust patterns for AMDGPU
ckoparkar 0a54b44
Adjust patterns in AMDGPU/llvm.set.rounding.ll
ckoparkar b3579ab
Apply arsenm's suggestion
ckoparkar 4610bf5
Merge branch 'main' into ckoparkar/161036
ckoparkar File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
151 changes: 151 additions & 0 deletions
151
llvm/test/CodeGen/AArch64/umin-sub-to-usubo-select-combine.ll
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,151 @@ | ||
| ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s | ||
|
|
||
| ; GitHub issue #161036 | ||
|
|
||
| ; Positive test : umin(sub(a,b),a) with scalar types should be folded | ||
| define i64 @underflow_compare_fold_i64(i64 %a, i64 %b) { | ||
| ; CHECK-LABEL: underflow_compare_fold_i64 | ||
| ; CHECK-LABEL: %bb.0: | ||
| ; CHECK-NEXT: subs x8, x0, x1 | ||
| ; CHECK-NEXT: csel x0, x0, x8, lo | ||
| ; CHECK-NEXT: ret | ||
| %sub = sub i64 %a, %b | ||
| %cond = tail call i64 @llvm.umin.i64(i64 %sub, i64 %a) | ||
| ret i64 %cond | ||
| } | ||
|
|
||
| ; Positive test : umin(a,sub(a,b)) with scalar types should be folded | ||
| define i64 @underflow_compare_fold_i64_commute(i64 %a, i64 %b) { | ||
| ; CHECK-LABEL: underflow_compare_fold_i64_commute | ||
| ; CHECK-LABEL: %bb.0: | ||
| ; CHECK-NEXT: subs x8, x0, x1 | ||
| ; CHECK-NEXT: csel x0, x0, x8, lo | ||
| ; CHECK-NEXT: ret | ||
| %sub = sub i64 %a, %b | ||
| %cond = tail call i64 @llvm.umin.i64(i64 %a, i64 %sub) | ||
| ret i64 %cond | ||
| } | ||
|
|
||
| ; Positive test : multi-use is OK since the sub instruction still runs once | ||
| define i64 @underflow_compare_fold_i64_multi_use(i64 %a, i64 %b, ptr addrspace(1) %ptr) { | ||
| ; CHECK-LABEL: underflow_compare_fold_i64_multi_use | ||
| ; CHECK-LABEL: %bb.0: | ||
| ; CHECK-NEXT: subs x8, x0, x1 | ||
| ; CHECK-NEXT: csel x0, x0, x8, lo | ||
| ; CHECK-NEXT: str x8, [x2] | ||
| ; CHECK-NEXT: ret | ||
| %sub = sub i64 %a, %b | ||
| store i64 %sub, ptr addrspace(1) %ptr | ||
| %cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a) | ||
| ret i64 %cond | ||
| } | ||
|
|
||
| ; Positive test : i32 | ||
| define i32 @underflow_compare_fold_i32(i32 %a, i32 %b) { | ||
| ; CHECK-LABEL: underflow_compare_fold_i32 | ||
| ; CHECK-LABEL: %bb.0: | ||
| ; CHECK-NEXT: subs w8, w0, w1 | ||
| ; CHECK-NEXT: csel w0, w0, w8, lo | ||
| ; CHECK-NEXT: ret | ||
| %sub = sub i32 %a, %b | ||
| %cond = tail call i32 @llvm.umin.i32(i32 %sub, i32 %a) | ||
| ret i32 %cond | ||
| } | ||
|
|
||
| ; Positive test : i32 | ||
| define i32 @underflow_compare_fold_i32_commute(i32 %a, i32 %b) { | ||
| ; CHECK-LABEL: underflow_compare_fold_i32_commute | ||
| ; CHECK-LABEL: %bb.0: | ||
| ; CHECK-NEXT: subs w8, w0, w1 | ||
| ; CHECK-NEXT: csel w0, w0, w8, lo | ||
| ; CHECK-NEXT: ret | ||
| %sub = sub i32 %a, %b | ||
| %cond = tail call i32 @llvm.umin.i32(i32 %a, i32 %sub) | ||
| ret i32 %cond | ||
| } | ||
|
|
||
| ; Positive test : i32 | ||
| define i32 @underflow_compare_fold_i32_multi_use(i32 %a, i32 %b, ptr addrspace(1) %ptr) { | ||
| ; CHECK-LABEL: underflow_compare_fold_i32_multi_use | ||
| ; CHECK-LABEL: %bb.0: | ||
| ; CHECK-NEXT: subs w8, w0, w1 | ||
| ; CHECK-NEXT: csel w0, w0, w8, lo | ||
| ; CHECK-NEXT: str w8, [x2] | ||
| ; CHECK-NEXT: ret | ||
| %sub = sub i32 %a, %b | ||
| store i32 %sub, ptr addrspace(1) %ptr | ||
| %cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a) | ||
| ret i32 %cond | ||
| } | ||
|
|
||
| ; Negative test : i16 | ||
| define i16 @underflow_compare_fold_i16(i16 %a, i16 %b) { | ||
| ; CHECK-LABEL: underflow_compare_fold_i16 | ||
| ; CHECK-LABEL: %bb.0: | ||
| ; CHECK-LABEL: sub w8, w0, w1 | ||
| ; CHECK-LABEL: and w9, w0, #0xffff | ||
| ; CHECK-LABEL: and w8, w8, #0xffff | ||
| ; CHECK-LABEL: cmp w8, w9 | ||
| ; CHECK-LABEL: csel w0, w8, w9, lo | ||
| ; CHECK-LABEL: ret | ||
| %sub = sub i16 %a, %b | ||
| %cond = tail call i16 @llvm.umin.i16(i16 %sub, i16 %a) | ||
| ret i16 %cond | ||
| } | ||
|
|
||
| ; Negative test : i16 | ||
| define i16 @underflow_compare_fold_i16_commute(i16 %a, i16 %b) { | ||
| ; CHECK-LABEL: underflow_compare_fold_i16_commute | ||
| ; CHECK-LABEL: %bb.0: | ||
| ; CHECK-LABEL: sub w8, w0, w1 | ||
| ; CHECK-LABEL: and w9, w0, #0xffff | ||
| ; CHECK-LABEL: and w8, w8, #0xffff | ||
| ; CHECK-LABEL: cmp w9, w8 | ||
| ; CHECK-LABEL: csel w0, w9, w8, lo | ||
| ; CHECK-LABEL: ret | ||
| %sub = sub i16 %a, %b | ||
| %cond = tail call i16 @llvm.umin.i16(i16 %a, i16 %sub) | ||
| ret i16 %cond | ||
| } | ||
|
|
||
| ; Negative test : i16 | ||
| define i16 @underflow_compare_fold_i16_multi_use(i16 %a, i16 %b, ptr addrspace(1) %ptr) { | ||
| ; CHECK-LABEL: underflow_compare_fold_i16_multi_use | ||
| ; CHECK-LABEL: %bb.0: | ||
| ; CHECK-LABEL: sub w8, w0, w1 | ||
| ; CHECK-LABEL: and w9, w0, #0xffff | ||
| ; CHECK-LABEL: and w10, w8, #0xffff | ||
| ; CHECK-LABEL: strh w8, [x2] | ||
| ; CHECK-LABEL: cmp w10, w9 | ||
| ; CHECK-LABEL: csel w0, w10, w9, lo | ||
| ; CHECK-LABEL: ret | ||
| %sub = sub i16 %a, %b | ||
| store i16 %sub, ptr addrspace(1) %ptr | ||
| %cond = call i16 @llvm.umin.i16(i16 %sub, i16 %a) | ||
| ret i16 %cond | ||
| } | ||
|
|
||
| ; Negative test, vector types : umin(sub(a,b),a) but with vectors | ||
| define <16 x i8> @underflow_compare_dontfold_vectors(<16 x i8> %a, <16 x i8> %b) { | ||
| ; CHECK-LABEL: underflow_compare_dontfold_vectors | ||
| ; CHECK-LABEL: %bb.0 | ||
| ; CHECK-NEXT: sub v1.16b, v0.16b, v1.16b | ||
| ; CHECK-NEXT: umin v0.16b, v1.16b, v0.16b | ||
| ; CHECK-NEXT: ret | ||
| %sub = sub <16 x i8> %a, %b | ||
| %cond = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %sub, <16 x i8> %a) | ||
| ret <16 x i8> %cond | ||
| } | ||
|
|
||
| ; Negative test, pattern mismatch : umin(add(a,b),a) | ||
| define i64 @umin_add(i64 %a, i64 %b) { | ||
| ; CHECK-LABEL: umin_add | ||
| ; CHECK-LABEL: %bb.0 | ||
| ; CHECK-NEXT: add x8, x0, x1 | ||
| ; CHECK-NEXT: cmp x8, x0 | ||
| ; CHECK-NEXT: csel x0, x8, x0, lo | ||
| ; CHECK-NEXT: ret | ||
| %add = add i64 %a, %b | ||
| %cond = tail call i64 @llvm.umin.i64(i64 %add, i64 %a) | ||
| ret i64 %cond | ||
| } |
156 changes: 156 additions & 0 deletions
156
llvm/test/CodeGen/X86/umin-sub-to-usubo-select-combine.ll
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,156 @@ | ||
| ; RUN: llc < %s -mtriple=x86_64 | FileCheck %s | ||
|
|
||
| ; GitHub issue #161036 | ||
|
|
||
| ; Positive test : umin(sub(a,b),a) with scalar types should be folded | ||
| define i64 @underflow_compare_fold_i64(i64 %a, i64 %b) { | ||
| ; CHECK-LABEL: underflow_compare_fold_i64 | ||
| ; CHECK-LABEL: %bb.0 | ||
| ; CHECK-NEXT: movq %rdi, %rax | ||
| ; CHECK-NEXT: subq %rsi, %rax | ||
| ; CHECK-NEXT: cmovbq %rdi, %rax | ||
| ; CHECK-NEXT: retq | ||
| %sub = sub i64 %a, %b | ||
| %cond = tail call i64 @llvm.umin.i64(i64 %sub, i64 %a) | ||
| ret i64 %cond | ||
| } | ||
|
|
||
| ; Positive test : umin(a,sub(a,b)) with scalar types should be folded | ||
| define i64 @underflow_compare_fold_i64_commute(i64 %a, i64 %b) { | ||
| ; CHECK-LABEL: underflow_compare_fold_i64_commute | ||
| ; CHECK-LABEL: %bb.0 | ||
| ; CHECK-NEXT: movq %rdi, %rax | ||
| ; CHECK-NEXT: subq %rsi, %rax | ||
| ; CHECK-NEXT: cmovbq %rdi, %rax | ||
| ; CHECK-NEXT: retq | ||
| %sub = sub i64 %a, %b | ||
| %cond = tail call i64 @llvm.umin.i64(i64 %a, i64 %sub) | ||
| ret i64 %cond | ||
| } | ||
|
|
||
| ; Positive test : multi-use is OK since the sub instruction still runs once | ||
| define i64 @underflow_compare_fold_i64_multi_use(i64 %a, i64 %b, ptr addrspace(1) %ptr) { | ||
| ; CHECK-LABEL: underflow_compare_fold_i64_multi_use | ||
| ; CHECK-LABEL: %bb.0 | ||
| ; CHECK-NEXT: movq %rdi, %rax | ||
| ; CHECK-NEXT: subq %rsi, %rax | ||
| ; CHECK-NEXT: movq %rax, (%rdx) | ||
| ; CHECK-NEXT: cmovbq %rdi, %rax | ||
| ; CHECK-NEXT: retq | ||
| %sub = sub i64 %a, %b | ||
| store i64 %sub, ptr addrspace(1) %ptr | ||
| %cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a) | ||
| ret i64 %cond | ||
| } | ||
|
|
||
| ; Positive test : i32 | ||
| define i32 @underflow_compare_fold_i32(i32 %a, i32 %b) { | ||
| ; CHECK-LABEL: underflow_compare_fold_i32 | ||
| ; CHECK-LABEL: %bb.0 | ||
| ; CHECK-NEXT: movl %edi, %eax | ||
| ; CHECK-NEXT: subl %esi, %eax | ||
| ; CHECK-NEXT: cmovbl %edi, %eax | ||
| ; CHECK-NEXT: retq | ||
| %sub = sub i32 %a, %b | ||
| %cond = tail call i32 @llvm.umin.i32(i32 %sub, i32 %a) | ||
| ret i32 %cond | ||
| } | ||
|
|
||
| ; Positive test : i32 | ||
| define i32 @underflow_compare_fold_i32_commute(i32 %a, i32 %b) { | ||
| ; CHECK-LABEL: underflow_compare_fold_i32_commute | ||
| ; CHECK-LABEL: %bb.0 | ||
| ; CHECK-NEXT: movl %edi, %eax | ||
| ; CHECK-NEXT: subl %esi, %eax | ||
| ; CHECK-NEXT: cmovbl %edi, %eax | ||
| ; CHECK-NEXT: retq | ||
| %sub = sub i32 %a, %b | ||
| %cond = tail call i32 @llvm.umin.i32(i32 %a, i32 %sub) | ||
| ret i32 %cond | ||
| } | ||
|
|
||
| ; Positive test : i32 | ||
| define i32 @underflow_compare_fold_i32_multi_use(i32 %a, i32 %b, ptr addrspace(1) %ptr) { | ||
| ; CHECK-LABEL: underflow_compare_fold_i32_multi_use | ||
| ; CHECK-LABEL: %bb.0 | ||
| ; CHECK-NEXT: movl %edi, %eax | ||
| ; CHECK-NEXT: subl %esi, %eax | ||
| ; CHECK-NEXT: movl %eax, (%rdx) | ||
| ; CHECK-NEXT: cmovbl %edi, %eax | ||
| ; CHECK-NEXT: retq | ||
| %sub = sub i32 %a, %b | ||
| store i32 %sub, ptr addrspace(1) %ptr | ||
| %cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a) | ||
| ret i32 %cond | ||
| } | ||
|
|
||
| ; Positive test : i16 | ||
| define i16 @underflow_compare_fold_i16(i16 %a, i16 %b) { | ||
| ; CHECK-LABEL: underflow_compare_fold_i16 | ||
| ; CHECK-LABEL: %bb.0: | ||
| ; CHECK-NEXT: movl %edi, %eax | ||
| ; CHECK-NEXT: subw %si, %ax | ||
| ; CHECK-NEXT: cmovbl %edi, %eax | ||
| ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax | ||
| ; CHECK-NEXT: retq | ||
| %sub = sub i16 %a, %b | ||
| %cond = tail call i16 @llvm.umin.i16(i16 %sub, i16 %a) | ||
| ret i16 %cond | ||
| } | ||
|
|
||
| ; Positive test : i16 | ||
| define i16 @underflow_compare_fold_i16_commute(i16 %a, i16 %b) { | ||
| ; CHECK-LABEL: underflow_compare_fold_i16_commute | ||
| ; CHECK-LABEL: %bb.0: | ||
| ; CHECK-NEXT: movl %edi, %eax | ||
| ; CHECK-NEXT: subw %si, %ax | ||
| ; CHECK-NEXT: cmovbl %edi, %eax | ||
| ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax | ||
| ; CHECK-NEXT: retq | ||
| %sub = sub i16 %a, %b | ||
| %cond = tail call i16 @llvm.umin.i16(i16 %a, i16 %sub) | ||
| ret i16 %cond | ||
| } | ||
|
|
||
| ; Positive test : i16 | ||
| define i16 @underflow_compare_fold_i16_multi_use(i16 %a, i16 %b, ptr addrspace(1) %ptr) { | ||
| ; CHECK-LABEL: underflow_compare_fold_i16_multi_use | ||
| ; CHECK-LABEL: %bb.0: | ||
| ; CHECK-NEXT: movl %edi, %eax | ||
| ; CHECK-NEXT: subw %si, %ax | ||
| ; CHECK-NEXT: movw %ax, (%rdx) | ||
| ; CHECK-NEXT: cmovbl %edi, %eax | ||
| ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax | ||
| ; CHECK-NEXT: retq | ||
| %sub = sub i16 %a, %b | ||
| store i16 %sub, ptr addrspace(1) %ptr | ||
| %cond = call i16 @llvm.umin.i16(i16 %sub, i16 %a) | ||
| ret i16 %cond | ||
| } | ||
|
|
||
|
|
||
| ; Negative test, vector types : umin(sub(a,b),a) but with vectors | ||
| define <16 x i8> @underflow_compare_dontfold_vectors(<16 x i8> %a, <16 x i8> %b) { | ||
| ; CHECK-LABEL: underflow_compare_dontfold_vectors | ||
| ; CHECK-LABEL: %bb.0 | ||
| ; CHECK-NEXT: movdqa %xmm0, %xmm2 | ||
| ; CHECK-NEXT: psubb %xmm1, %xmm2 | ||
| ; CHECK-NEXT: pminub %xmm2, %xmm0 | ||
| ; CHECK-NEXT: retq | ||
| %sub = sub <16 x i8> %a, %b | ||
| %cond = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %sub, <16 x i8> %a) | ||
| ret <16 x i8> %cond | ||
| } | ||
|
|
||
| ; Negative test, pattern mismatch : umin(add(a,b),a) | ||
| define i64 @umin_add(i64 %a, i64 %b) { | ||
| ; CHECK-LABEL: umin_add | ||
| ; CHECK-LABEL: %bb.0 | ||
| ; CHECK-NEXT: leaq (%rsi,%rdi), %rax | ||
| ; CHECK-NEXT: cmpq %rdi, %rax | ||
| ; CHECK-NEXT: cmovaeq %rdi, %rax | ||
| ; CHECK-NEXT: retq | ||
| %add = add i64 %a, %b | ||
| %cond = tail call i64 @llvm.umin.i64(i64 %add, i64 %a) | ||
| ret i64 %cond | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.