-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[NVPTX] Implement isTruncateFree and isZExtFree for i32/i64 Optimizations #114683
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f43171b
401f834
519453e
6550044
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: llc -march=nvptx64 < %s | FileCheck %s | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test looks like it's checking that the correct PTX is generated for The purpose of the test associated with this PR should be to ensure that your implementation of I like to find a place where
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 to that. The test should have some code which has to choose between free ops vs an alternative which would be used otherwise (e.g. |
||
| ; Test for truncation from i64 to i32 | ||
| define i32 @test_trunc_i64_to_i32(i64 %val) { | ||
| ; CHECK-LABEL: test_trunc_i64_to_i32( | ||
| ; CHECK: { | ||
| ; CHECK-NEXT: .reg .b32 %r<2>; | ||
| ; CHECK-EMPTY: | ||
| ; CHECK-NEXT: // %bb.0: | ||
| ; CHECK-NEXT: ld.param.u32 %r1, [test_trunc_i64_to_i32_param_0]; | ||
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r1; | ||
| ; CHECK-NEXT: ret; | ||
| %trunc = trunc i64 %val to i32 | ||
| ret i32 %trunc | ||
| } | ||
|
|
||
| ; Test for zero-extension from i32 to i64 | ||
| define i64 @test_zext_i32_to_i64(i32 %val) { | ||
| ; CHECK-LABEL: test_zext_i32_to_i64( | ||
| ; CHECK: { | ||
| ; CHECK-NEXT: .reg .b64 %rd<2>; | ||
| ; CHECK-EMPTY: | ||
| ; CHECK-NEXT: // %bb.0: | ||
| ; CHECK-NEXT: ld.param.u32 %rd1, [test_zext_i32_to_i64_param_0]; | ||
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; | ||
| ; CHECK-NEXT: ret; | ||
| %zext = zext i32 %val to i64 | ||
| ret i64 %zext | ||
| } | ||
|
|
||
| ; Test for operand truncation before select | ||
| define i32 @test_select_truncate_free(i1 %cond, i64 %a, i64 %b) { | ||
| ; CHECK-LABEL: test_select_truncate_free( | ||
| ; CHECK: { | ||
| ; CHECK-NEXT: .reg .pred %p<2>; | ||
| ; CHECK-NEXT: .reg .b16 %rs<3>; | ||
| ; CHECK-NEXT: .reg .b32 %r<4>; | ||
| ; CHECK-EMPTY: | ||
| ; CHECK-NEXT: // %bb.0: | ||
| ; CHECK-NEXT: ld.param.u8 %rs1, [test_select_truncate_free_param_0]; | ||
| ; CHECK-NEXT: and.b16 %rs2, %rs1, 1; | ||
| ; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1; | ||
| ; CHECK-NEXT: ld.param.u32 %r1, [test_select_truncate_free_param_1]; | ||
| ; CHECK-NEXT: ld.param.u32 %r2, [test_select_truncate_free_param_2]; | ||
| ; CHECK-NEXT: selp.b32 %r3, %r1, %r2, %p1; | ||
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; | ||
| ; CHECK-NEXT: ret; | ||
|
|
||
| %trunc_a = trunc i64 %a to i32 | ||
| %trunc_b = trunc i64 %b to i32 | ||
| %result = select i1 %cond, i32 %trunc_a, i32 %trunc_b | ||
| ret i32 %result | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You may want to use
llvm/utils/update_llc_test_checks.pyto generate the checks.