-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[InstCombine] Optimize usub.sat pattern #151044
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 7 commits
Commits
Show all changes
24 commits
Select commit
Hold shift + click to select a range
82336b6
Optimize usub.sat fix for #79690
73f9b08
Merge branch 'main' into opt/79690
nimit25 f3c8b3a
Merge branch 'main' into opt/79690
nimit25 dbfd598
refactorization
408fbbe
Merge branch 'main' into opt/79690
nimit25 c572adb
Add more tests and change the condition for negation
945c0ca
Merge branch 'main' into opt/79690
nimit25 14f0289
Change to APInt for scalar and splat vector
5bc8ace
Merge branch 'main' into opt/79690
nimit25 fb6c736
apply suggestions from code review
b3bad04
test change
79ddd3d
Merge branch 'main' into opt/79690
nimit25 d246173
formatting
da63c85
Merge branch 'main' into opt/79690
nimit25 8553c8a
Merge branch 'main' into opt/79690
nimit25 2a15ef2
Merge branch 'main' into opt/79690
nimit25 501a574
Merge branch 'main' into opt/79690
nimit25 5843911
add multi use tests
1d0468e
Merge branch 'main' into opt/79690
nimit25 f57c871
remove useless header
8bc3293
remove
587ada3
Merge branch 'main' into opt/79690
nimit25 6bc3659
add oneUse checks for usub & or
7c5eeb9
Merge branch 'main' into opt/79690
nimit25 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
206 changes: 206 additions & 0 deletions
206
llvm/test/Transforms/InstCombine/usub_sat_to_msb_mask.ll
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,206 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | ||
|
|
||
dtcxzyw marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ; RUN: opt -passes=instcombine -S < %s 2>&1 | FileCheck %s | ||
|
|
||
| declare i8 @llvm.usub.sat.i8(i8, i8) | ||
| declare i16 @llvm.usub.sat.i16(i16, i16) | ||
| declare i32 @llvm.usub.sat.i32(i32, i32) | ||
| declare i64 @llvm.usub.sat.i64(i64, i64) | ||
dtcxzyw marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| define i8 @test_i8(i8 %a, i8 %b) { | ||
| ; CHECK-LABEL: define i8 @test_i8( | ||
| ; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) { | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A]], i8 96) | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 112) | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP1]], [[TMP2]] | ||
| ; CHECK-NEXT: [[RES:%.*]] = and i8 [[TMP3]], -128 | ||
| ; CHECK-NEXT: ret i8 [[RES]] | ||
| ; | ||
|
|
||
| %a_sub = call i8 @llvm.usub.sat.i8(i8 %a, i8 223) | ||
| %b_sub = call i8 @llvm.usub.sat.i8(i8 %b, i8 239) | ||
| %or = or i8 %a_sub, %b_sub | ||
| %cmp = icmp eq i8 %or, 0 | ||
| %res = select i1 %cmp, i8 0, i8 128 | ||
| ret i8 %res | ||
| } | ||
|
|
||
| define i8 @test_i8_ne(i8 %a, i8 %b) { | ||
| ; CHECK-LABEL: define i8 @test_i8_ne( | ||
| ; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) { | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A]], i8 96) | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 112) | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = or i8 [[TMP1]], [[TMP2]] | ||
| ; CHECK-NEXT: [[RES:%.*]] = and i8 [[TMP3]], -128 | ||
| ; CHECK-NEXT: ret i8 [[RES]] | ||
| ; | ||
|
|
||
| %a_sub = call i8 @llvm.usub.sat.i8(i8 %a, i8 223) | ||
| %b_sub = call i8 @llvm.usub.sat.i8(i8 %b, i8 239) | ||
| %or = or i8 %a_sub, %b_sub | ||
| %cmp = icmp ne i8 %or, 0 | ||
| %res = select i1 %cmp, i8 128, i8 0 | ||
| ret i8 %res | ||
| } | ||
|
|
||
| define i16 @test_i16(i16 %a, i16 %b) { | ||
| ; CHECK-LABEL: define i16 @test_i16( | ||
| ; CHECK-SAME: i16 [[A:%.*]], i16 [[B:%.*]]) { | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[A]], i16 32642) | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[B]], i16 32656) | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = or i16 [[TMP1]], [[TMP2]] | ||
| ; CHECK-NEXT: [[RES:%.*]] = and i16 [[TMP3]], -32768 | ||
| ; CHECK-NEXT: ret i16 [[RES]] | ||
| ; | ||
|
|
||
| %a_sub = call i16 @llvm.usub.sat.i16(i16 %a, i16 65409) | ||
| %b_sub = call i16 @llvm.usub.sat.i16(i16 %b, i16 65423) | ||
| %or = or i16 %a_sub, %b_sub | ||
| %cmp = icmp eq i16 %or, 0 | ||
| %res = select i1 %cmp, i16 0, i16 32768 | ||
| ret i16 %res | ||
| } | ||
|
|
||
| define i32 @test_i32(i32 %a, i32 %b) { | ||
| ; CHECK-LABEL: define i32 @test_i32( | ||
| ; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 224) | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[B]], i32 240) | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]] | ||
| ; CHECK-NEXT: [[RES:%.*]] = and i32 [[TMP3]], -2147483648 | ||
| ; CHECK-NEXT: ret i32 [[RES]] | ||
| ; | ||
|
|
||
| %a_sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 2147483871) | ||
| %b_sub = call i32 @llvm.usub.sat.i32(i32 %b, i32 2147483887) | ||
| %or = or i32 %a_sub, %b_sub | ||
| %cmp = icmp eq i32 %or, 0 | ||
| %res = select i1 %cmp, i32 0, i32 2147483648 | ||
| ret i32 %res | ||
| } | ||
|
|
||
| define i64 @test_i64(i64 %a, i64 %b) { | ||
| ; CHECK-LABEL: define i64 @test_i64( | ||
| ; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) { | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A]], i64 224) | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[B]], i64 240) | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]] | ||
| ; CHECK-NEXT: [[RES:%.*]] = and i64 [[TMP3]], -9223372036854775808 | ||
| ; CHECK-NEXT: ret i64 [[RES]] | ||
| ; | ||
|
|
||
| %a_sub = call i64 @llvm.usub.sat.i64(i64 %a, i64 9223372036854776031) | ||
| %b_sub = call i64 @llvm.usub.sat.i64(i64 %b, i64 9223372036854776047) | ||
| %or = or i64 %a_sub, %b_sub | ||
| %cmp = icmp eq i64 %or, 0 | ||
| %res = select i1 %cmp, i64 0, i64 9223372036854775808 | ||
| ret i64 %res | ||
| } | ||
|
|
||
| define i32 @no_fold_due_to_small_K(i32 %a, i32 %b) { | ||
| ; CHECK-LABEL: define i32 @no_fold_due_to_small_K( | ||
| ; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { | ||
| ; CHECK-NEXT: [[A_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 100) | ||
| ; CHECK-NEXT: [[B_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[B]], i32 239) | ||
| ; CHECK-NEXT: [[OR:%.*]] = or i32 [[A_SUB]], [[B_SUB]] | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[OR]], 0 | ||
| ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 0, i32 -2147483648 | ||
| ; CHECK-NEXT: ret i32 [[RES]] | ||
| ; | ||
|
|
||
| %a_sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 100) | ||
| %b_sub = call i32 @llvm.usub.sat.i32(i32 %b, i32 239) | ||
| %or = or i32 %a_sub, %b_sub | ||
| %cmp = icmp eq i32 %or, 0 | ||
| %res = select i1 %cmp, i32 0, i32 2147483648 | ||
| ret i32 %res | ||
| } | ||
|
|
||
| define i32 @commuted_test_neg(i32 %a, i32 %b) { | ||
| ; CHECK-LABEL: define i32 @commuted_test_neg( | ||
| ; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { | ||
| ; CHECK-NEXT: [[B_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[B]], i32 239) | ||
| ; CHECK-NEXT: [[A_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 223) | ||
| ; CHECK-NEXT: [[OR:%.*]] = or i32 [[B_SUB]], [[A_SUB]] | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[OR]], 0 | ||
| ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 0, i32 -2147483648 | ||
| ; CHECK-NEXT: ret i32 [[RES]] | ||
| ; | ||
|
|
||
| %b_sub = call i32 @llvm.usub.sat.i32(i32 %b, i32 239) | ||
| %a_sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 223) | ||
| %or = or i32 %b_sub, %a_sub | ||
| %cmp = icmp eq i32 %or, 0 | ||
| %res = select i1 %cmp, i32 0, i32 2147483648 | ||
| ret i32 %res | ||
| } | ||
| define <4 x i32> @vector_test(<4 x i32> %a, <4 x i32> %b) { | ||
| ; CHECK-LABEL: define <4 x i32> @vector_test( | ||
| ; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) { | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A]], <4 x i32> splat (i32 224)) | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[B]], <4 x i32> splat (i32 240)) | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] | ||
| ; CHECK-NEXT: [[RES:%.*]] = and <4 x i32> [[TMP3]], splat (i32 -2147483648) | ||
| ; CHECK-NEXT: ret <4 x i32> [[RES]] | ||
| ; | ||
|
|
||
|
|
||
| %a_sub = call <4 x i32> @llvm.usub.sat.v4i32( | ||
| <4 x i32> %a, | ||
| <4 x i32> <i32 2147483871, i32 2147483871, i32 2147483871, i32 2147483871>) | ||
| %b_sub = call <4 x i32> @llvm.usub.sat.v4i32( | ||
| <4 x i32> %b, | ||
| <4 x i32> <i32 2147483887, i32 2147483887, i32 2147483887, i32 2147483887>) | ||
| %or = or <4 x i32> %a_sub, %b_sub | ||
| %cmp = icmp eq <4 x i32> %or, zeroinitializer | ||
| %res = select <4 x i1> %cmp, <4 x i32> zeroinitializer, | ||
| <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> | ||
| ret <4 x i32> %res | ||
| } | ||
|
|
||
| define <4 x i32> @vector_negative_test(<4 x i32> %a, <4 x i32> %b) { | ||
| ; CHECK-LABEL: define <4 x i32> @vector_negative_test( | ||
| ; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) { | ||
| ; CHECK-NEXT: [[A_SUB:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A]], <4 x i32> <i32 -2147483425, i32 0, i32 -2147483425, i32 -2147483425>) | ||
| ; CHECK-NEXT: [[B_SUB:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[B]], <4 x i32> splat (i32 -2147483409)) | ||
| ; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[A_SUB]], [[B_SUB]] | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i32> [[OR]], zeroinitializer | ||
| ; CHECK-NEXT: [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 -2147483648) | ||
| ; CHECK-NEXT: ret <4 x i32> [[RES]] | ||
| ; | ||
| %a_sub = call <4 x i32> @llvm.usub.sat.v4i32( | ||
| <4 x i32> %a, | ||
| <4 x i32> <i32 2147483871, i32 0, i32 2147483871, i32 2147483871>) | ||
| %b_sub = call <4 x i32> @llvm.usub.sat.v4i32( | ||
| <4 x i32> %b, | ||
| <4 x i32> <i32 2147483887, i32 2147483887, i32 2147483887, i32 2147483887>) | ||
| %or = or <4 x i32> %a_sub, %b_sub | ||
| %cmp = icmp eq <4 x i32> %or, zeroinitializer | ||
| %res = select <4 x i1> %cmp, <4 x i32> zeroinitializer, | ||
| <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> | ||
| ret <4 x i32> %res | ||
| } | ||
|
|
||
| define <4 x i32> @vector_ne_test(<4 x i32> %a, <4 x i32> %b) { | ||
| ; CHECK-LABEL: define <4 x i32> @vector_ne_test( | ||
| ; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) { | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A]], <4 x i32> splat (i32 224)) | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[B]], <4 x i32> splat (i32 240)) | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] | ||
| ; CHECK-NEXT: [[RES:%.*]] = and <4 x i32> [[TMP3]], splat (i32 -2147483648) | ||
| ; CHECK-NEXT: ret <4 x i32> [[RES]] | ||
| ; | ||
|
|
||
|
|
||
| %a_sub = call <4 x i32> @llvm.usub.sat.v4i32( | ||
| <4 x i32> %a, | ||
| <4 x i32> <i32 2147483871, i32 2147483871, i32 2147483871, i32 2147483871>) | ||
dtcxzyw marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| %b_sub = call <4 x i32> @llvm.usub.sat.v4i32( | ||
| <4 x i32> %b, | ||
| <4 x i32> <i32 2147483887, i32 2147483887, i32 2147483887, i32 2147483887>) | ||
| %or = or <4 x i32> %a_sub, %b_sub | ||
| %cmp = icmp eq <4 x i32> %or, zeroinitializer | ||
| %res = select <4 x i1> %cmp, <4 x i32> zeroinitializer, | ||
| <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> | ||
| ret <4 x i32> %res | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.