-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[AMDGPU] Extended vector promotion to aggregate types. #143784
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,263 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca-to-vector -amdgpu-promote-alloca-to-vector-limit=512 -amdgpu-promote-alloca-to-vector-max-regs=32 %s | FileCheck %s | ||
zGoldthorpe marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| declare void @clobber_i8(i8) | ||
|
|
||
| define void @test_v4i8(i64 %idx) { | ||
| ; CHECK-LABEL: define void @test_v4i8( | ||
| ; CHECK-SAME: i64 [[IDX:%.*]]) { | ||
| ; CHECK-NEXT: [[STACK:%.*]] = freeze <4 x i8> poison | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[STACK]], i64 [[IDX]] | ||
| ; CHECK-NEXT: call void @clobber_i8(i8 [[TMP1]]) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %stack = alloca <4 x i8>, align 4, addrspace(5) | ||
zGoldthorpe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx | ||
| %val = load i8, ptr addrspace(5) %ptr, align 1 | ||
| call void @clobber_i8(i8 %val) | ||
| ret void | ||
| } | ||
|
|
||
| define void @test_a4i8(i64 %idx) { | ||
| ; CHECK-LABEL: define void @test_a4i8( | ||
| ; CHECK-SAME: i64 [[IDX:%.*]]) { | ||
| ; CHECK-NEXT: [[STACK:%.*]] = freeze <4 x i8> poison | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[STACK]], i64 [[IDX]] | ||
| ; CHECK-NEXT: call void @clobber_i8(i8 [[TMP1]]) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %stack = alloca [4 x i8], align 4, addrspace(5) | ||
| %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx | ||
| %val = load i8, ptr addrspace(5) %ptr, align 1 | ||
| call void @clobber_i8(i8 %val) | ||
| ret void | ||
| } | ||
|
|
||
| define void @test_a2v4i8(i64 %idx) { | ||
| ; CHECK-LABEL: define void @test_a2v4i8( | ||
| ; CHECK-SAME: i64 [[IDX:%.*]]) { | ||
| ; CHECK-NEXT: [[STACK:%.*]] = freeze <8 x i8> poison | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[STACK]], i64 [[IDX]] | ||
| ; CHECK-NEXT: call void @clobber_i8(i8 [[TMP1]]) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %stack = alloca [2 x <4 x i8>], align 4, addrspace(5) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought SROA already tried to flatten out aggregate into simple arrays. Why do we need to do this? We don't need to optimally handle all IR, just post-optimized IR
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This behaviour is actually already implemented in the AMDGPUPromoteAllocaToVector pass. I put the test in to ensure this behaviour didn't change with this PR. If I understand correctly, the problem with SROA is that it tends to prefer promoting types to scalar registers, rather than vectors, which is why the AMDGPUPromoteAllocaToVector pass is queued to occur before SROA in the pipeline.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If SROA can break the value up it's better, we should be running this after SROA |
||
| %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx | ||
| %val = load i8, ptr addrspace(5) %ptr, align 1 | ||
| call void @clobber_i8(i8 %val) | ||
| ret void | ||
| } | ||
|
|
||
| define void @test_a2v3i8(i64 %idx) { | ||
| ; CHECK-LABEL: define void @test_a2v3i8( | ||
| ; CHECK-SAME: i64 [[IDX:%.*]]) { | ||
| ; CHECK-NEXT: [[STACK:%.*]] = freeze <8 x i8> poison | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[STACK]], i64 [[IDX]] | ||
| ; CHECK-NEXT: call void @clobber_i8(i8 [[TMP1]]) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %stack = alloca [2 x <3 x i8>], align 4, addrspace(5) | ||
| %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx | ||
| %val = load i8, ptr addrspace(5) %ptr, align 1 | ||
| call void @clobber_i8(i8 %val) | ||
| ret void | ||
| } | ||
|
|
||
| define void @test_a2a4i8(i64 %idx) { | ||
| ; CHECK-LABEL: define void @test_a2a4i8( | ||
| ; CHECK-SAME: i64 [[IDX:%.*]]) { | ||
| ; CHECK-NEXT: [[STACK:%.*]] = freeze <8 x i8> poison | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[STACK]], i64 [[IDX]] | ||
| ; CHECK-NEXT: call void @clobber_i8(i8 [[TMP1]]) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %stack = alloca [2 x [4 x i8]], align 4, addrspace(5) | ||
| %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx | ||
| %val = load i8, ptr addrspace(5) %ptr, align 1 | ||
| call void @clobber_i8(i8 %val) | ||
| ret void | ||
| } | ||
|
|
||
| define void @test_a2a3i8(i64 %idx) { | ||
| ; CHECK-LABEL: define void @test_a2a3i8( | ||
| ; CHECK-SAME: i64 [[IDX:%.*]]) { | ||
| ; CHECK-NEXT: [[STACK:%.*]] = freeze <6 x i8> poison | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <6 x i8> [[STACK]], i64 [[IDX]] | ||
| ; CHECK-NEXT: call void @clobber_i8(i8 [[TMP1]]) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %stack = alloca [2 x [3 x i8]], align 4, addrspace(5) | ||
| %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx | ||
| %val = load i8, ptr addrspace(5) %ptr, align 1 | ||
| call void @clobber_i8(i8 %val) | ||
| ret void | ||
| } | ||
|
|
||
| define void @test_s1v4i8(i64 %idx) { | ||
| ; CHECK-LABEL: define void @test_s1v4i8( | ||
| ; CHECK-SAME: i64 [[IDX:%.*]]) { | ||
| ; CHECK-NEXT: [[STACK:%.*]] = freeze <4 x i8> poison | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[STACK]], i64 [[IDX]] | ||
| ; CHECK-NEXT: call void @clobber_i8(i8 [[TMP1]]) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %stack = alloca {<4 x i8>}, align 4, addrspace(5) | ||
| %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx | ||
| %val = load i8, ptr addrspace(5) %ptr, align 1 | ||
| call void @clobber_i8(i8 %val) | ||
| ret void | ||
| } | ||
|
|
||
| define void @test_s1a4i8(i64 %idx) { | ||
| ; CHECK-LABEL: define void @test_s1a4i8( | ||
| ; CHECK-SAME: i64 [[IDX:%.*]]) { | ||
| ; CHECK-NEXT: [[STACK:%.*]] = freeze <4 x i8> poison | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[STACK]], i64 [[IDX]] | ||
| ; CHECK-NEXT: call void @clobber_i8(i8 [[TMP1]]) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %stack = alloca {[4 x i8]}, align 4, addrspace(5) | ||
| %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx | ||
| %val = load i8, ptr addrspace(5) %ptr, align 1 | ||
| call void @clobber_i8(i8 %val) | ||
| ret void | ||
| } | ||
|
|
||
| define void @test_s4i8(i64 %idx) { | ||
| ; CHECK-LABEL: define void @test_s4i8( | ||
| ; CHECK-SAME: i64 [[IDX:%.*]]) { | ||
| ; CHECK-NEXT: [[STACK:%.*]] = freeze <4 x i8> poison | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[STACK]], i64 [[IDX]] | ||
| ; CHECK-NEXT: call void @clobber_i8(i8 [[TMP1]]) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %stack = alloca {i8, i8, i8, i8}, align 4, addrspace(5) | ||
| %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx | ||
| %val = load i8, ptr addrspace(5) %ptr, align 1 | ||
| call void @clobber_i8(i8 %val) | ||
| ret void | ||
| } | ||
|
|
||
| define void @test_s2v4i8(i64 %idx) { | ||
| ; CHECK-LABEL: define void @test_s2v4i8( | ||
| ; CHECK-SAME: i64 [[IDX:%.*]]) { | ||
| ; CHECK-NEXT: [[STACK:%.*]] = freeze <8 x i8> poison | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[STACK]], i64 [[IDX]] | ||
| ; CHECK-NEXT: call void @clobber_i8(i8 [[TMP1]]) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %stack = alloca {<4 x i8>, <4 x i8>}, align 4, addrspace(5) | ||
| %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx | ||
| %val = load i8, ptr addrspace(5) %ptr, align 1 | ||
| call void @clobber_i8(i8 %val) | ||
| ret void | ||
| } | ||
|
|
||
| define void @test_s2v2i8v4i8(i64 %idx) { | ||
| ; CHECK-LABEL: define void @test_s2v2i8v4i8( | ||
| ; CHECK-SAME: i64 [[IDX:%.*]]) { | ||
| ; CHECK-NEXT: [[STACK:%.*]] = freeze <8 x i8> poison | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[STACK]], i64 [[IDX]] | ||
| ; CHECK-NEXT: call void @clobber_i8(i8 [[TMP1]]) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %stack = alloca {<2 x i8>, <4 x i8>}, align 4, addrspace(5) | ||
| %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx | ||
| %val = load i8, ptr addrspace(5) %ptr, align 1 | ||
| call void @clobber_i8(i8 %val) | ||
| ret void | ||
| } | ||
|
|
||
| define void @test_s2v2i8v3i8(i64 %idx) { | ||
| ; CHECK-LABEL: define void @test_s2v2i8v3i8( | ||
| ; CHECK-SAME: i64 [[IDX:%.*]]) { | ||
| ; CHECK-NEXT: [[STACK:%.*]] = freeze <8 x i8> poison | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[STACK]], i64 [[IDX]] | ||
| ; CHECK-NEXT: call void @clobber_i8(i8 [[TMP1]]) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %stack = alloca {<2 x i8>, <3 x i8>}, align 4, addrspace(5) | ||
| %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx | ||
| %val = load i8, ptr addrspace(5) %ptr, align 1 | ||
| call void @clobber_i8(i8 %val) | ||
| ret void | ||
| } | ||
|
|
||
| define void @test_s2s2i8s4i8(i64 %idx) { | ||
| ; CHECK-LABEL: define void @test_s2s2i8s4i8( | ||
| ; CHECK-SAME: i64 [[IDX:%.*]]) { | ||
| ; CHECK-NEXT: [[STACK:%.*]] = freeze <6 x i8> poison | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <6 x i8> [[STACK]], i64 [[IDX]] | ||
| ; CHECK-NEXT: call void @clobber_i8(i8 [[TMP1]]) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %stack = alloca {{i8, i8}, {i8, i8, i8, i8}}, align 4, addrspace(5) | ||
| %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx | ||
| %val = load i8, ptr addrspace(5) %ptr, align 1 | ||
| call void @clobber_i8(i8 %val) | ||
| ret void | ||
| } | ||
|
|
||
| define void @test_s2s2i8s3i8(i64 %idx) { | ||
| ; CHECK-LABEL: define void @test_s2s2i8s3i8( | ||
| ; CHECK-SAME: i64 [[IDX:%.*]]) { | ||
| ; CHECK-NEXT: [[STACK:%.*]] = freeze <5 x i8> poison | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <5 x i8> [[STACK]], i64 [[IDX]] | ||
| ; CHECK-NEXT: call void @clobber_i8(i8 [[TMP1]]) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %stack = alloca {{i8, i8}, {i8, i8, i8}}, align 4, addrspace(5) | ||
| %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx | ||
| %val = load i8, ptr addrspace(5) %ptr, align 1 | ||
| call void @clobber_i8(i8 %val) | ||
| ret void | ||
| } | ||
|
|
||
| define void @test_s3i8s1i8v2i8(i64 %idx) { | ||
| ; CHECK-LABEL: define void @test_s3i8s1i8v2i8( | ||
| ; CHECK-SAME: i64 [[IDX:%.*]]) { | ||
| ; CHECK-NEXT: [[STACK:%.*]] = freeze <4 x i8> poison | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[STACK]], i64 [[IDX]] | ||
| ; CHECK-NEXT: call void @clobber_i8(i8 [[TMP1]]) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %stack = alloca {i8, {i8}, <2 x i8>}, align 4, addrspace(5) | ||
| %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx | ||
| %val = load i8, ptr addrspace(5) %ptr, align 1 | ||
| call void @clobber_i8(i8 %val) | ||
| ret void | ||
| } | ||
|
|
||
| ; heterogeneous element types are not supported | ||
| define void @test_heterogeneous(i64 %idx) { | ||
| ; CHECK-LABEL: define void @test_heterogeneous( | ||
| ; CHECK-SAME: i64 [[IDX:%.*]]) { | ||
| ; CHECK-NEXT: [[STACK:%.*]] = alloca { i8, i8, i16 }, align 4, addrspace(5) | ||
| ; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[STACK]], i64 [[IDX]] | ||
| ; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr addrspace(5) [[PTR]], align 1 | ||
| ; CHECK-NEXT: call void @clobber_i8(i8 [[VAL]]) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %stack = alloca {i8, i8, i16}, align 4, addrspace(5) | ||
| %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx | ||
| %val = load i8, ptr addrspace(5) %ptr, align 1 | ||
| call void @clobber_i8(i8 %val) | ||
| ret void | ||
| } | ||
|
|
||
| ; empty structs are not supported | ||
| define void @test_empty(i64 %idx) { | ||
| ; CHECK-LABEL: define void @test_empty( | ||
| ; CHECK-SAME: i64 [[IDX:%.*]]) { | ||
| ; CHECK-NEXT: [[STACK:%.*]] = alloca { i8, {} }, align 4, addrspace(5) | ||
| ; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[STACK]], i64 [[IDX]] | ||
| ; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr addrspace(5) [[PTR]], align 1 | ||
| ; CHECK-NEXT: call void @clobber_i8(i8 [[VAL]]) | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
| %stack = alloca {i8, {}}, align 4, addrspace(5) | ||
| %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx | ||
| %val = load i8, ptr addrspace(5) %ptr, align 1 | ||
| call void @clobber_i8(i8 %val) | ||
| ret void | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.