-
Notifications
You must be signed in to change notification settings - Fork 15.1k
AMDGPU: Implement tensor load and store instructions for gfx1250 #146636
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py | ||
| // REQUIRES: amdgpu-registered-target | ||
| // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1250 | ||
|
|
||
| typedef int v4i __attribute__((ext_vector_type(4))); | ||
| typedef int v8i __attribute__((ext_vector_type(8))); | ||
|
|
||
| // CHECK-GFX1250-LABEL: @test_amdgcn_tensor_load_to_lds( | ||
| // CHECK-GFX1250-NEXT: entry: | ||
| // CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.tensor.load.to.lds(<4 x i32> [[SG0:%.*]], <8 x i32> [[SG1:%.*]], <4 x i32> [[SG2:%.*]], <4 x i32> [[SG3:%.*]], i32 0) | ||
| // CHECK-GFX1250-NEXT: ret void | ||
| // | ||
| void test_amdgcn_tensor_load_to_lds(v4i sg0, v8i sg1, v4i sg2, v4i sg3) | ||
| { | ||
| __builtin_amdgcn_tensor_load_to_lds(sg0, sg1, sg2, sg3, 0); | ||
| } | ||
|
|
||
| // CHECK-GFX1250-LABEL: @test_amdgcn_tensor_load_to_lds_d2( | ||
| // CHECK-GFX1250-NEXT: entry: | ||
| // CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.tensor.load.to.lds.d2(<4 x i32> [[SG0:%.*]], <8 x i32> [[SG1:%.*]], i32 27) | ||
| // CHECK-GFX1250-NEXT: ret void | ||
| // | ||
| void test_amdgcn_tensor_load_to_lds_d2(v4i sg0, v8i sg1) | ||
| { | ||
| __builtin_amdgcn_tensor_load_to_lds_d2(sg0, sg1, 27); | ||
| } | ||
|
|
||
| // CHECK-GFX1250-LABEL: @test_amdgcn_tensor_store_from_lds( | ||
| // CHECK-GFX1250-NEXT: entry: | ||
| // CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.tensor.store.from.lds(<4 x i32> [[SG0:%.*]], <8 x i32> [[SG1:%.*]], <4 x i32> [[SG2:%.*]], <4 x i32> [[SG3:%.*]], i32 22) | ||
| // CHECK-GFX1250-NEXT: ret void | ||
| // | ||
| void test_amdgcn_tensor_store_from_lds(v4i sg0, v8i sg1, v4i sg2, v4i sg3) | ||
| { | ||
| __builtin_amdgcn_tensor_store_from_lds(sg0, sg1, sg2, sg3, 22); | ||
| } | ||
|
|
||
| // CHECK-GFX1250-LABEL: @test_amdgcn_tensor_store_from_lds_d2( | ||
| // CHECK-GFX1250-NEXT: entry: | ||
| // CHECK-GFX1250-NEXT: tail call void @llvm.amdgcn.tensor.store.from.lds.d2(<4 x i32> [[SG0:%.*]], <8 x i32> [[SG1:%.*]], i32 0) | ||
| // CHECK-GFX1250-NEXT: ret void | ||
| // | ||
| void test_amdgcn_tensor_store_from_lds_d2(v4i sg0, v8i sg1) | ||
| { | ||
| __builtin_amdgcn_tensor_store_from_lds_d2(sg0, sg1, 0); | ||
| } |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -3348,6 +3348,20 @@ void AMDGPURegisterBankInfo::applyMappingImpl( | |||||||||||||||||||||||||||||
| MI.eraseFromParent(); | ||||||||||||||||||||||||||||||
| return; | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
| case Intrinsic::amdgcn_tensor_load_to_lds: | ||||||||||||||||||||||||||||||
| case Intrinsic::amdgcn_tensor_store_from_lds: { | ||||||||||||||||||||||||||||||
| constrainOpWithReadfirstlane(B, MI, 1); | ||||||||||||||||||||||||||||||
| constrainOpWithReadfirstlane(B, MI, 2); | ||||||||||||||||||||||||||||||
| constrainOpWithReadfirstlane(B, MI, 3); | ||||||||||||||||||||||||||||||
| constrainOpWithReadfirstlane(B, MI, 4); | ||||||||||||||||||||||||||||||
| return; | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
| case Intrinsic::amdgcn_tensor_load_to_lds_d2: | ||||||||||||||||||||||||||||||
| case Intrinsic::amdgcn_tensor_store_from_lds_d2: { | ||||||||||||||||||||||||||||||
| constrainOpWithReadfirstlane(B, MI, 1); | ||||||||||||||||||||||||||||||
| constrainOpWithReadfirstlane(B, MI, 2); | ||||||||||||||||||||||||||||||
| return; | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
| default: { | ||||||||||||||||||||||||||||||
| if (const AMDGPU::RsrcIntrinsic *RSrcIntrin = | ||||||||||||||||||||||||||||||
| AMDGPU::lookupRsrcIntrinsic(IntrID)) { | ||||||||||||||||||||||||||||||
|
|
@@ -5354,6 +5368,22 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { | |||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
| case Intrinsic::amdgcn_pops_exiting_wave_id: | ||||||||||||||||||||||||||||||
| return getDefaultMappingSOP(MI); | ||||||||||||||||||||||||||||||
| case Intrinsic::amdgcn_tensor_load_to_lds_d2: | ||||||||||||||||||||||||||||||
| case Intrinsic::amdgcn_tensor_store_from_lds_d2: | ||||||||||||||||||||||||||||||
| case Intrinsic::amdgcn_tensor_load_to_lds: | ||||||||||||||||||||||||||||||
| case Intrinsic::amdgcn_tensor_store_from_lds: { | ||||||||||||||||||||||||||||||
| // Lie and claim everything is legal, even all operands need to be | ||||||||||||||||||||||||||||||
| // SGPRs. applyMapping will have to deal with it with readfirstlane. | ||||||||||||||||||||||||||||||
| for (unsigned I = 1; I < MI.getNumOperands(); ++I) { | ||||||||||||||||||||||||||||||
| if (MI.getOperand(I).isReg()) { | ||||||||||||||||||||||||||||||
| Register Reg = MI.getOperand(I).getReg(); | ||||||||||||||||||||||||||||||
| auto OpBank = getRegBankID(Reg, MRI); | ||||||||||||||||||||||||||||||
| unsigned Size = getSizeInBits(Reg, MRI, *TRI); | ||||||||||||||||||||||||||||||
| OpdsMapping[I] = AMDGPU::getValueMapping(OpBank, Size); | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
|
Comment on lines
+5377
to
+5384
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The suggested code does not compile. Thanks There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry. Updated the code accordingly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
What is the value of "I" then? We need a way to map the operand back to the index. |
||||||||||||||||||||||||||||||
| break; | ||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||
| case Intrinsic::amdgcn_s_prefetch_data: { | ||||||||||||||||||||||||||||||
| OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI); | ||||||||||||||||||||||||||||||
| OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); | ||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||
Uh oh!
There was an error while loading. Please reload this page.