-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[Attributor] Pack out arguments into a struct #119267
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: None (elhewaty) Changes
Full diff: https://github.com/llvm/llvm-project/pull/119267.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 116f419129a239..90eecf45892ee6 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -2963,6 +2963,119 @@ bool Attributor::shouldSeedAttribute(AbstractAttribute &AA) {
return Result;
}
+// For now: argument can be put in the struct if it's write only and
+// has no aliases.
+static bool canBeComapctedInAStruct(const Argument &Arg, Attributor &A, const AbstractAttribute &QueryingAA) {
+ IRPosition ArgPosition = IRPosition::argument(Arg);
+ // Check if Arg has no alias.
+ auto *AAliasInfo = A.getAAFor<AANoAlias>(QueryingAA, ArgPosition, DepClassTy::NONE);
+ if (!AAliasInfo || !AAliasInfo->isKnownNoAlias())
+ return false;
+
+ // Check if Arg is write-only.
+ const auto *MemBehaviorAA =
+ A.getAAFor<AAMemoryBehavior>(QueryingAA, ArgPosition, DepClassTy::NONE);
+ if (!MemBehaviorAA || !MemBehaviorAA->isKnownWriteOnly())
+ return false;
+
+ return true;
+}
+
+static void replaceArgRetWithStructRetCalls(Function &OldFunction, Function &NewFunction) {
+ for (auto UseItr = OldFunction.use_begin(); UseItr != OldFunction.use_end(); ++UseItr) {
+ CallBase *Call = dyn_cast<CallBase>(UseItr->getUser());
+ if (!Call)
+ continue;
+
+ IRBuilder<> Builder(Call);
+ SmallVector<Value *, 8> NewArgs;
+ for (unsigned ArgIdx = 0; ArgIdx < Call->arg_size(); ++ArgIdx)
+ if (std::find_if(OldFunction.arg_begin(), OldFunction.arg_end(),
+ [&](Argument &Arg) { return &Arg == Call->getArgOperand(ArgIdx); }) == OldFunction.arg_end())
+ NewArgs.push_back(Call->getArgOperand(ArgIdx));
+
+ CallInst *NewCall = Builder.CreateCall(&NewFunction, NewArgs);
+ Call->replaceAllUsesWith(NewCall);
+ Call->eraseFromParent();
+ }
+}
+
+static bool convertOutArgsToRetStruct(Function &F, Attributor &A, AbstractAttribute &QueryingAA) {
+ // Get valid ptr args.
+ DenseMap<Argument *, Type *> PtrToType;
+ for (unsigned ArgIdx = 0; ArgIdx < F.arg_size(); ++ArgIdx) {
+ Argument *Arg = F.getArg(ArgIdx);
+ if (Arg->getType()->isPointerTy() && canBeComapctedInAStruct(*Arg, A, QueryingAA)) {
+ // Get the the type of the pointer through its users
+ for (auto UseItr = Arg->use_begin(); UseItr != Arg->use_end(); ++UseItr) {
+ auto *Store = dyn_cast<StoreInst>(UseItr->getUser());
+ if (Store)
+ PtrToType[Arg] = Store->getValueOperand()->getType();
+ }
+ }
+ }
+
+ // If there is no valid candidates then return false.
+ if (PtrToType.empty())
+ return false;
+
+ // Create the new struct return type.
+ SmallVector<Type *, 4> OutStructElements;
+ if (auto *OriginalFuncTy = F.getReturnType(); !OriginalFuncTy->isVoidTy())
+ OutStructElements.push_back(OriginalFuncTy);
+
+ for (const auto &[Arg, Type] : PtrToType)
+ OutStructElements.push_back(Type);
+
+ auto *ReturnStructType = StructType::create(F.getContext(), OutStructElements, (F.getName() + "Out").str());
+
+ // Get the new Args.
+ SmallVector<Type *, 4> NewParamTypes;
+ for (unsigned ArgIdx = 0; ArgIdx < F.arg_size(); ++ArgIdx)
+ if (!PtrToType.count(F.getArg(ArgIdx)))
+ NewParamTypes.push_back(F.getArg(ArgIdx)->getType());
+
+ auto *NewFunctionType = FunctionType::get(ReturnStructType, NewParamTypes, F.isVarArg());
+ auto *NewFunction = Function::Create(NewFunctionType, F.getLinkage(), F.getAddressSpace(), F.getName());
+
+ // Map old args to new args.
+ ValueToValueMapTy VMap;
+ auto *NewArgIt = NewFunction->arg_begin();
+ for (Argument &OldArg : F.args())
+ if (!PtrToType.count(F.getArg(OldArg.getArgNo())))
+ VMap[&OldArg] = &(*NewArgIt++);
+
+ // Clone the old function into the new one.
+ SmallVector<ReturnInst *, 8> Returns;
+ CloneFunctionInto(NewFunction, &F, VMap, CloneFunctionChangeType::LocalChangesOnly, Returns);
+
+ // Update the return values (make it struct).
+ for (ReturnInst *Ret : Returns) {
+ IRBuilder<> Builder(Ret);
+ SmallVector<Value *, 4> StructValues;
+ // Include original return type, if any
+ if (auto *OriginalFuncTy = F.getReturnType(); !OriginalFuncTy->isVoidTy())
+ StructValues.push_back(Ret->getReturnValue());
+
+ // Create a load instruction to fill the struct element.
+ for (const auto &[Arg, Ty] : PtrToType) {
+ Value *OutVal = Builder.CreateLoad(Ty, VMap[Arg]);
+ StructValues.push_back(OutVal);
+ }
+
+ // Build the return struct incrementally.
+ Value *StructRetVal = UndefValue::get(ReturnStructType);
+ for (unsigned i = 0; i < StructValues.size(); ++i)
+ StructRetVal = Builder.CreateInsertValue(StructRetVal, StructValues[i], i);
+
+ Builder.CreateRet(StructRetVal);
+ Ret->eraseFromParent();
+ }
+
+ replaceArgRetWithStructRetCalls(F, *NewFunction);
+ F.eraseFromParent();
+}
+
ChangeStatus Attributor::rewriteFunctionSignatures(
SmallSetVector<Function *, 8> &ModifiedFns) {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
diff --git a/llvm/test/Transforms/Attributor/remove_out_args.ll b/llvm/test/Transforms/Attributor/remove_out_args.ll
new file mode 100644
index 00000000000000..bd52bf5d80656c
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/remove_out_args.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=attributor < %s | FileCheck %s
+
+
+
+define i1 @foo(ptr %dst) {
+; CHECK-LABEL: define noundef i1 @foo(
+; CHECK-SAME: ptr nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: store i32 42, ptr [[DST]], align 4
+; CHECK-NEXT: ret i1 true
+;
+entry:
+ store i32 42, ptr %dst
+ ret i1 1
+}
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
1ab8d0c to
8e88711
Compare
|
I know it's not complete. but I wonder if I am on the right track as I am confused. |
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: opt -S -passes=attributor < %s | FileCheck %s | ||
|
|
||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This needs about 100 more tests llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll and llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll would be better starting points
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will. BTW, I don't have an AMD GPU, I have a nvidia one, should J add the tests in NVPTX?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It doesn't matter what GPU you have, it doesn't matter when working on the compiler. NVPTX will be less useful for IR testing since it doesn't use a non-0 address space for stack objects
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still need all of these tests, using AMDGPU
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@arsenm, I will test using AMDGPU, but I don't know why my changes don't change this simple test? Do I need to call the some function somewhere? the optimizer dosn't see my changes or do my changes don't affect the input?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is an externally visible function, so it's not allowed to change the signature. It would need to be be internal or similar linkage, with a call use
8e88711 to
461834b
Compare
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' e8e75e08c9214fe25b56535fc26f5435a875a137 479165962e72bb5a9e5ac31154f092f2f08bbd4e llvm/test/Transforms/Attributor/remove_out_args.ll llvm/include/llvm/Transforms/IPO/Attributor.h llvm/lib/Transforms/IPO/Attributor.cpp llvm/lib/Transforms/IPO/AttributorAttributes.cppThe following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
}Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
}Please refer to the Undefined Behavior Manual for more information. |
|
@arsenm @shiltian @vidsinghal @jdoerfert @jhuber6 But I need you to share your opinions so far, I will write tests to cover all possible cases, but at least I need this to work with the simple test function written before. What did I mess up while adding the attributor? |
You didn't implement a virtual method of the base class. If you look further along in the error message it should tell you which ones |
461834b to
4faf50a
Compare
@arsenm, Thanks. |
| const IRPosition &ArgPos = IRPosition::argument(Arg); | ||
| auto *AAMem = A.getAAFor<AAMemoryBehavior>(AA, ArgPos, DepClassTy::NONE); | ||
|
|
||
| return Arg.hasNoAliasAttr() && AAMem && AAMem->isKnownWriteOnly() && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be querying from AANoAlias instead of directly checking the attribute is present on the argument?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@arsenm, in the following example does ptr %dts have any aliases in @foo? as AANoAlias->isKnownNoAlias() returns false.
define internal i1 @foo(ptr %dst) {
entry:
store i32 42, ptr %dst
ret i1 true
}
define i1 @fee(i32 %x, i32 %y) {
%ptr = alloca i32
%a = call i1 @foo(ptr %ptr, i32 %y)
%b = load i32, ptr %ptr
%c = icmp sle i32 %b, %x
%xor = xor i1 %a, %c
ret i1 %xor
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This callsite has the wrong type, so this test is UB and it probably doesn't bother trying to optimize it correctly
| // Every function can track active assumptions. | ||
| getOrCreateAAFor<AAAssumptionInfo>(FPos); | ||
|
|
||
| // Every function can have out arguments. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Only functions with pointer arguments, not sure if it's worth checking if there are any
|
|
||
| bool hasCandidateArg = false; | ||
| for (const Argument &Arg : F->args()) | ||
| if (Arg.getType()->isPointerTy() && isEligibleArgument(Arg, A, *this)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you're going to introduce isEligibleArgumentType, use it consistently. You also need to guard against sret arguments
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doesn't hasPointeeInMemoryValueAttr() do this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes
| Argument *Arg = F.getArg(argIdx); | ||
| if (isEligibleArgument(*Arg, A, *this)) { | ||
| CandidateArgs.push_back(Arg); | ||
| for (auto UseItr = Arg->use_begin(); UseItr != Arg->use_end(); ++UseItr) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
range loop, probably needs to be early_inc_range
| SmallVector<Argument *, 4> CandidateArgs; | ||
| for (unsigned argIdx = 0; argIdx < F.arg_size(); ++argIdx) { | ||
| Argument *Arg = F.getArg(argIdx); | ||
| if (isEligibleArgument(*Arg, A, *this)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Early continue and reduce indent
| // If there is no valid candidates then return false. | ||
| if (PtrToType.empty()) | ||
| return ChangeStatus::UNCHANGED; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This shouldn't reach the manifest stage if it's not possible to do anything
| if (auto *OriginalFuncTy = F.getReturnType(); !OriginalFuncTy->isVoidTy()) | ||
| OutStructElementsTypes.push_back(OriginalFuncTy); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the function already has a struct return, you could flatten and insert directly into the struct type. It may be a little more effort to emit though.
You also should check whether TargetLowering::CanLowerReturn succeeds, you should avoid over-committing return values if they're going to be forced to the stack. As written out arguments may be in any address space, not just allocas
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd say we get the functionality in and then we add the heuristic on top. It should be easier to test the "maximal" approach first.
|
|
||
| // Redirect all uses of the old call to the new call. | ||
| for (auto &Use : CB->uses()) | ||
| Use.set(NewCall); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should assert that this is the callee use, and not a data operand or other type of non-call user
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: opt -S -passes=attributor < %s | FileCheck %s | ||
|
|
||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still need all of these tests, using AMDGPU
4faf50a to
7e34621
Compare
|
@arsenm, @jdoerfert @shiltian @vidsinghal I run into this crash, any help? |
It seems like the assertion |
|
What trying to do here, is creating a new variables instead of old argument args using allocas. here's the stacktrace |
You're not using the correct address space for the alloca. Regardless of that, I think the crash is probably because PtrToType[&OldArg] is nullptr |
The wrong address space shouldn't cause this crash |
7e34621 to
e4be7f0
Compare
e4be7f0 to
4791659
Compare
|
@jdoerfert thanks. |
jdoerfert
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I left some comments and pointers to APIs that you should look into/use. It might help to list the steps explicitly here:
- Update: Check for each argument if we can rewrite it:
- NoAlias + AAPointerInfo only has stores and they are all at known offsets.
- isValidFunctionSignature/ReturnRewrite succeeds, thus all call sites are known and can be rewritten (more on that below).
- Manifest:
- Remove the arguments with the rewrite API.
- Add a stack allocation into the caller with a size to allow all AAPointerInfo tracked accesses.
- Register rewrites of all the store pointers in AAPointerInfo to the stack allocation.
- Introduce loads of all the values, or an approximation thereof (e.g., one i64 instead of two adjacent i32's) at each return of the function.
- Register rewrites of the return value to be a struct of the loaded values and the original return value, potentially flattened into one struct.
- Register the rewrite of the return type based on the new return type. The rewrite callback has to introduce stores of the returned values back into the originally passed pointer and the offsets in AAPointerInfo. e.g.,
void foo(int *out_2_ints);
becomes
{int, int} R = foo();
out[0] = get<0>(R);
out[1] = get<1>(R);
| if (!markedAsAAConvertArgument) { | ||
| getOrCreateAAFor<AAConvertOutArgument>(FPos); | ||
| markedAsAAConvertArgument = true; | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should just be with the other FPos attributes before the argument loop.
You can put an if (F.arg_size()) around it or sth.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So every function with at least one argument is a candidate to have out argument?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. I don't know how aggressive the filters here should try to be, but it could more specifically be there is at least one pointer argument that is writable
| /// ----------- AAConvertOutArgument ---------- | ||
| namespace { | ||
| static bool isEligibleArgument(const Argument &Arg, Attributor &A, | ||
| const AbstractAttribute &AA) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Make this a (static) member function, no need for the global namespace. If it's a regular member you don't need to pass AA.
| auto *NoAlias = A.getAAFor<AANoAlias>(AA, ArgPos, DepClassTy::OPTIONAL); | ||
|
|
||
| return AAMem && NoAlias && AAMem->isAssumedWriteOnly() && | ||
| NoAlias->isAssumedNoAlias() && !Arg.hasPointeeInMemoryValueAttr(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Check the hasPointeeInMemoryValueAttr stuff first, with the type above.
| ChangeStatus updateImpl(Attributor &A) override { | ||
| const Function *F = getAssociatedFunction(); | ||
| if (!F || F->isDeclaration()) | ||
| return indicatePessimisticFixpoint(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No need for this check, you checked in initialize.
| AAConvertOutArgumentFunction(const IRPosition &IRP, Attributor &A) | ||
| : AAConvertOutArgument(IRP, A) {} | ||
|
|
||
| SmallVector<bool> ArgumentsStates; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Make it a bitvector/bitset. I think ADT has one of those as well.
| for (unsigned ArgIdx = 0; ArgIdx < F.arg_size(); ++ArgIdx) { | ||
| Argument *Arg = F.getArg(ArgIdx); | ||
| if (!isEligibleArgument(*Arg, A, *this)) | ||
| continue; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No need for this check, just look into the AssumedState.
| // AAPointerInfo on args | ||
| for (auto &Use : Arg->uses()) | ||
| if (auto *Store = dyn_cast<StoreInst>(Use.getUser())) | ||
| PtrToType[Arg] = Store->getValueOperand()->getType(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You need to handle more than just stores users here. What if the user is a gep, or some other instruction. You can use AAPointerInfo instead of AAMemoryBehavior and then iterate over all the instructions that actually modify the pointer. It will also tell you the offsets, which you'll likely need.
| if (auto *OriginalFuncTy = F.getReturnType(); !OriginalFuncTy->isVoidTy()) | ||
| OutStructElementsTypes.push_back(OriginalFuncTy); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd say we get the functionality in and then we add the heuristic on top. It should be easier to test the "maximal" approach first.
| const Function *F = getAssociatedFunction(); | ||
| if (!F || F->isDeclaration()) | ||
| return indicatePessimisticFixpoint(); | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since you have to rewrite all the call sites, you need to check if they are amenable. Use Attributor::checkForAllCallSites to do so, and verify (1) you see them all, and (2) you can properly rewrite them. For (2) I don't have all of the conditions at hand but setting up the check will allow us to easily fill them. In manifest you can use the same API to iterate over all call sites again, create new ones and new code that sets up the return values into the originally passed pointers.
| FunctionType::get(ReturnStructType, NewParamTypes, F.isVarArg()); | ||
| auto *NewFunction = | ||
| Function::Create(NewFunctionType, F.getLinkage(), F.getAddressSpace(), | ||
| F.getName() + ".converted"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Check out
Attributor::isValidFunctionSignatureRewrite
I believe it will even check the call sites for you so you don't have to do it yourself (basically ignore what I wrote above).
Attributor::registerFunctionSignatureRewrite
And this will do the function cloning and other things for you.
You might need to add a new pair of APIs though:
Attributor::isValidFunctionReturnRewrite
and
Attributor::registerFunctionReturnRewrite
PR Summary:
Transforms functions with write-only pointer arguments into functions that return a struct containing the original return value and pointer-written values, improving GPU performance, and maybe x86 too.
Example:
we need to convert this:
into