-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[Attributor] Pack out arguments into a struct #119267
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -68,6 +68,7 @@ | |
| #include "llvm/Support/raw_ostream.h" | ||
| #include "llvm/Transforms/Utils/BasicBlockUtils.h" | ||
| #include "llvm/Transforms/Utils/CallPromotionUtils.h" | ||
| #include "llvm/Transforms/Utils/Cloning.h" | ||
| #include "llvm/Transforms/Utils/Local.h" | ||
| #include "llvm/Transforms/Utils/ValueMapper.h" | ||
| #include <cassert> | ||
|
|
@@ -197,6 +198,7 @@ PIPE_OPERATOR(AAAllocationInfo) | |
| PIPE_OPERATOR(AAIndirectCallInfo) | ||
| PIPE_OPERATOR(AAGlobalValueInfo) | ||
| PIPE_OPERATOR(AADenormalFPMath) | ||
| PIPE_OPERATOR(AAConvertOutArgument) | ||
|
|
||
| #undef PIPE_OPERATOR | ||
|
|
||
|
|
@@ -12987,6 +12989,225 @@ struct AAAllocationInfoCallSiteArgument : AAAllocationInfoImpl { | |
| }; | ||
| } // namespace | ||
|
|
||
| /// ----------- AAConvertOutArgument ---------- | ||
| namespace { | ||
| static bool isEligibleArgument(const Argument &Arg, Attributor &A, | ||
| const AbstractAttribute &AA) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make this a (static) member function, no need for the global namespace. If it's a regular member you don't need to pass AA. |
||
| if (!Arg.getType()->isPointerTy()) | ||
| return false; | ||
|
|
||
| const IRPosition &ArgPos = IRPosition::argument(Arg); | ||
| auto *AAMem = A.getAAFor<AAMemoryBehavior>(AA, ArgPos, DepClassTy::OPTIONAL); | ||
| auto *NoAlias = A.getAAFor<AANoAlias>(AA, ArgPos, DepClassTy::OPTIONAL); | ||
|
|
||
| return AAMem && NoAlias && AAMem->isAssumedWriteOnly() && | ||
| NoAlias->isAssumedNoAlias() && !Arg.hasPointeeInMemoryValueAttr(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Check the hasPointeeInMemoryValueAttr stuff first, with the type above. |
||
| } | ||
|
|
||
| struct AAConvertOutArgumentFunction final : AAConvertOutArgument { | ||
| AAConvertOutArgumentFunction(const IRPosition &IRP, Attributor &A) | ||
| : AAConvertOutArgument(IRP, A) {} | ||
|
|
||
| SmallVector<bool> ArgumentsStates; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make it a bitvector/bitset. I think ADT has one of those as well. |
||
|
|
||
| /// See AbstractAttribute::updateImpl(...). | ||
| void initialize(Attributor &A) override { | ||
| const Function *F = getAssociatedFunction(); | ||
| if (!F || F->isDeclaration()) | ||
| return; | ||
|
|
||
| // Assume that all args are convertable at the begining. | ||
| ArgumentsStates.resize(F->arg_size(), true); | ||
| } | ||
|
|
||
| /// See AbstractAttribute::updateImpl(...). | ||
| ChangeStatus updateImpl(Attributor &A) override { | ||
| const Function *F = getAssociatedFunction(); | ||
| if (!F || F->isDeclaration()) | ||
| return indicatePessimisticFixpoint(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need for this check, you checked in initialize. |
||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since you have to rewrite all the call sites, you need to check if they are amenable. Use Attributor::checkForAllCallSites to do so, and verify (1) you see them all, and (2) you can properly rewrite them. For (2) I don't have all of the conditions at hand but setting up the check will allow us to easily fill them. In manifest you can use the same API to iterate over all call sites again, create new ones and new code that sets up the return values into the originally passed pointers. |
||
| auto NewStates = ArgumentsStates; | ||
| for (unsigned ArgIdx = 0; ArgIdx < F->arg_size(); ++ArgIdx) | ||
| if (!isEligibleArgument(*F->getArg(ArgIdx), A, *this)) | ||
| NewStates[ArgIdx] = false; | ||
|
|
||
| bool Changed = NewStates == ArgumentsStates; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You don't need to copy the state, you can set the Changed flag as part of the assignment above if you check first that it currently is set to true. |
||
| ArgumentsStates = NewStates; | ||
| return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED; | ||
| } | ||
|
|
||
| /// See AbstractAttribute::manifest(...). | ||
| ChangeStatus manifest(Attributor &A) override { | ||
| Function &F = *getAssociatedFunction(); | ||
| DenseMap<Argument *, Type *> PtrToType; | ||
| SmallVector<Argument *, 4> CandidateArgs; | ||
|
|
||
| for (unsigned ArgIdx = 0; ArgIdx < F.arg_size(); ++ArgIdx) { | ||
| Argument *Arg = F.getArg(ArgIdx); | ||
| if (!isEligibleArgument(*Arg, A, *this)) | ||
| continue; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need for this check, just look into the AssumedState. |
||
|
|
||
| CandidateArgs.push_back(Arg); | ||
| // AAPointerInfo on args | ||
| for (auto &Use : Arg->uses()) | ||
| if (auto *Store = dyn_cast<StoreInst>(Use.getUser())) | ||
| PtrToType[Arg] = Store->getValueOperand()->getType(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You need to handle more than just stores users here. What if the user is a gep, or some other instruction. You can use AAPointerInfo instead of AAMemoryBehavior and then iterate over all the instructions that actually modify the pointer. It will also tell you the offsets, which you'll likely need. |
||
| } | ||
|
|
||
| // If there is no valid candidates then return false. | ||
| if (PtrToType.empty()) | ||
| return indicatePessimisticFixpoint(); | ||
|
|
||
| // Create the new struct return type. | ||
| SmallVector<Type *, 4> OutStructElementsTypes; | ||
| if (auto *OriginalFuncTy = F.getReturnType(); !OriginalFuncTy->isVoidTy()) | ||
| OutStructElementsTypes.push_back(OriginalFuncTy); | ||
|
Comment on lines
+13063
to
+13064
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the function already has a struct return, you could flatten and insert directly into the struct type. It may be a little more effort to emit though. You also should check whether TargetLowering::CanLowerReturn succeeds, you should avoid over-committing return values if they're going to be forced to the stack. As written out arguments may be in any address space, not just allocas
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd say we get the functionality in and then we add the heuristic on top. It should be easier to test the "maximal" approach first. |
||
|
|
||
| for (auto *Arg : CandidateArgs) | ||
| OutStructElementsTypes.push_back(PtrToType[Arg]); | ||
|
|
||
| auto *ReturnStructType = StructType::create( | ||
| F.getContext(), OutStructElementsTypes, (F.getName() + "_out").str()); | ||
|
|
||
| // Get the new Args. | ||
| SmallVector<Type *, 4> NewParamTypes; | ||
| for (auto &Arg : F.args()) | ||
| if (!PtrToType.count(&Arg)) | ||
| NewParamTypes.push_back(Arg.getType()); | ||
|
|
||
| auto *NewFunctionType = | ||
| FunctionType::get(ReturnStructType, NewParamTypes, F.isVarArg()); | ||
| auto *NewFunction = | ||
| Function::Create(NewFunctionType, F.getLinkage(), F.getAddressSpace(), | ||
| F.getName() + ".converted"); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Check out I believe it will even check the call sites for you so you don't have to do it yourself (basically ignore what I wrote above). And this will do the function cloning and other things for you. You might need to add a new pair of APIs though: and |
||
|
|
||
| // Map old arguments to new ones, And also map the old arguments to struct | ||
| // elements. | ||
| ValueToValueMapTy VMap; | ||
| auto NewArgIt = NewFunction->arg_begin(); | ||
| BasicBlock *EntryBlock = | ||
| BasicBlock::Create(NewFunction->getContext(), "entry", NewFunction); | ||
|
|
||
| IRBuilder<> EntryBuilder(EntryBlock); | ||
| for (auto &OldArg : F.args()) { | ||
| if (PtrToType.count(&OldArg)) { | ||
| dbgs() << "OldArg: " << OldArg | ||
| << " ======> Type: " << *PtrToType[&OldArg] << "\n"; | ||
| AllocaInst *Alloca = EntryBuilder.CreateAlloca( | ||
| PtrToType[&OldArg], nullptr, OldArg.getName() + "_"); | ||
| VMap[&OldArg] = Alloca; | ||
| } else | ||
| VMap[&OldArg] = &(*NewArgIt++); | ||
| } | ||
|
|
||
| // Clone the old function into the new one. | ||
| SmallVector<ReturnInst *, 8> Returns; | ||
| CloneFunctionInto(NewFunction, &F, VMap, | ||
| CloneFunctionChangeType::LocalChangesOnly, Returns); | ||
|
|
||
| // Update the return values (make it struct). | ||
| for (ReturnInst *Ret : Returns) { | ||
| IRBuilder<> Builder(Ret); | ||
| SmallVector<Value *, 4> StructValues; | ||
| // Include original return type, if any | ||
| if (auto *OriginalFuncTy = F.getReturnType(); !OriginalFuncTy->isVoidTy()) | ||
| StructValues.push_back(Ret->getReturnValue()); | ||
|
|
||
| // Create a load instruction to fill the struct element. | ||
| for (auto *Arg : CandidateArgs) { | ||
| Value *OutVal = Builder.CreateLoad(PtrToType[Arg], VMap[Arg]); | ||
| StructValues.push_back(OutVal); | ||
| } | ||
|
|
||
| // Build the return struct incrementally. | ||
| Value *StructRetVal = UndefValue::get(ReturnStructType); | ||
| for (unsigned i = 0; i < StructValues.size(); ++i) | ||
| StructRetVal = | ||
| Builder.CreateInsertValue(StructRetVal, StructValues[i], i); | ||
|
|
||
| Builder.CreateRet(StructRetVal); | ||
| A.deleteAfterManifest(*Ret); | ||
| } | ||
| return ChangeStatus::CHANGED; | ||
| } | ||
|
|
||
| /// See AbstractAttribute::getAsStr(...). | ||
| const std::string getAsStr(Attributor *A) const override { | ||
| return "AAConvertOutArgumentFunction"; | ||
| } | ||
|
|
||
| /// See AbstractAttribute::trackStatistics() | ||
| void trackStatistics() const override {} | ||
| }; | ||
|
|
||
| struct AAConvertOutArgumentCallSite final : AAConvertOutArgument { | ||
| AAConvertOutArgumentCallSite(const IRPosition &IRP, Attributor &A) | ||
| : AAConvertOutArgument(IRP, A) {} | ||
|
|
||
| /// See AbstractAttribute::updateImpl(...). | ||
| ChangeStatus updateImpl(Attributor &A) override { | ||
| CallBase *CB = cast<CallBase>(getCtxI()); | ||
| Function *F = CB->getCalledFunction(); | ||
| if (!F) | ||
| return indicatePessimisticFixpoint(); | ||
|
|
||
| // Get convert attribute. | ||
| auto *ConvertAA = A.getAAFor<AAConvertOutArgument>( | ||
| *this, IRPosition::function(*F), DepClassTy::REQUIRED); | ||
|
|
||
| // If function will be transformed, mark this call site for update | ||
| if (!ConvertAA || ConvertAA->isAssumedConvertible()) | ||
| return ChangeStatus::CHANGED; | ||
|
|
||
| return ChangeStatus::UNCHANGED; | ||
| } | ||
|
|
||
| /// See AbstractAttribute::manifest(...). | ||
| ChangeStatus manifest(Attributor &A) override { | ||
| CallBase *CB = cast<CallBase>(getCtxI()); | ||
| Function *F = CB->getCalledFunction(); | ||
| if (!F) | ||
| return ChangeStatus::UNCHANGED; | ||
|
|
||
| IRBuilder<> Builder(CB); | ||
| // Create args for new call. | ||
| SmallVector<Value *, 4> NewArgs; | ||
| for (unsigned ArgIdx = 0; ArgIdx < CB->arg_size(); ++ArgIdx) { | ||
| Value *Arg = CB->getArgOperand(ArgIdx); | ||
| Argument *ParamArg = F->getArg(ArgIdx); | ||
| if (!isEligibleArgument(*ParamArg, A, *this)) | ||
| NewArgs.push_back(Arg); | ||
| } | ||
|
|
||
| Module *M = F->getParent(); | ||
| auto *NewF = M->getFunction((F->getName() + ".converted").str()); | ||
| if (!NewF) | ||
| return ChangeStatus::UNCHANGED; | ||
|
|
||
| FunctionCallee NewCallee(NewF->getFunctionType(), NewF); | ||
| Instruction *NewCall = | ||
| CallInst::Create(NewCallee, NewArgs, CB->getName() + ".converted", CB); | ||
| IRPosition ReturnPos = IRPosition::callsite_returned(*CB); | ||
| A.changeAfterManifest(ReturnPos, *NewCall); | ||
|
|
||
| // Redirect all uses of the old call to the new call. | ||
| for (auto &Use : CB->uses()) | ||
| Use.set(NewCall); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should assert that this is the callee use, and not a data operand or other type of non-call user |
||
|
|
||
| A.deleteAfterManifest(*CB); | ||
| return ChangeStatus::CHANGED; | ||
| } | ||
|
|
||
| /// See AbstractAttribute::getAsStr(...). | ||
| const std::string getAsStr(Attributor *A) const override { | ||
| return "AAConvertOutArgumentCallSite"; | ||
| } | ||
|
|
||
| /// See AbstractAttribute::trackStatistics() | ||
| void trackStatistics() const override {} | ||
| }; | ||
| } // namespace | ||
|
|
||
| const char AANoUnwind::ID = 0; | ||
| const char AANoSync::ID = 0; | ||
| const char AANoFree::ID = 0; | ||
|
|
@@ -13024,6 +13245,7 @@ const char AAAllocationInfo::ID = 0; | |
| const char AAIndirectCallInfo::ID = 0; | ||
| const char AAGlobalValueInfo::ID = 0; | ||
| const char AADenormalFPMath::ID = 0; | ||
| const char AAConvertOutArgument::ID = 0; | ||
|
|
||
| // Macro magic to create the static generator function for attributes that | ||
| // follow the naming scheme. | ||
|
|
@@ -13139,6 +13361,7 @@ CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryLocation) | |
| CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AACallEdges) | ||
| CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAssumptionInfo) | ||
| CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMustProgress) | ||
| CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAConvertOutArgument) | ||
|
|
||
| CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull) | ||
| CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 | ||
| ; RUN: opt -S -passes=attributor < %s | FileCheck %s | ||
|
|
||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This needs about 100 more tests llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll and llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll would be better starting points
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will. BTW, I don't have an AMD GPU, I have a nvidia one, should J add the tests in NVPTX?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't matter what GPU you have, it doesn't matter when working on the compiler. NVPTX will be less useful for IR testing since it doesn't use a non-0 address space for stack objects
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Still need all of these tests, using AMDGPU
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @arsenm, I will test using AMDGPU, but I don't know why my changes don't change this simple test? Do I need to call the some function somewhere? the optimizer dosn't see my changes or do my changes don't affect the input?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is an externally visible function, so it's not allowed to change the signature. It would need to be be internal or similar linkage, with a call use |
||
|
|
||
| define internal i1 @foo(ptr %dst, i32 %a, i32 %b) { | ||
| entry: | ||
| %x = xor i32 %a, 13 | ||
| %y = add i32 %b, 5 | ||
| %z = icmp sle i32 %x, %y | ||
| br i1 %z, label %if, label %else | ||
|
|
||
| if: | ||
| store i32 %x, ptr %dst, align 4 | ||
| br label %end | ||
|
|
||
| else: | ||
| store i32 %y, ptr %dst, align 4 | ||
| br label %end | ||
|
|
||
| end: | ||
| %t = mul i32 %x, %y | ||
| %tt = xor i32 %x, %y | ||
| %result = icmp eq i32 %t, %tt | ||
| ret i1 %result | ||
| } | ||
|
|
||
|
|
||
| define i1 @fee(i32 %x, i32 %y, i32 %z) { | ||
| ; CHECK-LABEL: define i1 @fee( | ||
| ; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) #[[ATTR0:[0-9]+]] { | ||
| ; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 4 | ||
| ; CHECK-NEXT: [[A:%.*]] = call i1 @foo.converted(ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32 [[Y]], i32 [[Z]]) #[[ATTR1:[0-9]+]] | ||
| ; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[PTR]], align 4 | ||
| ; CHECK-NEXT: [[C:%.*]] = icmp sle i32 [[B]], [[X]] | ||
| ; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[A]], [[C]] | ||
| ; CHECK-NEXT: ret i1 [[XOR]] | ||
| ; | ||
| %ptr = alloca i32 | ||
| %a = call i1 @foo(ptr %ptr, i32 %y, i32 %z) | ||
| %b = load i32, ptr %ptr | ||
| %c = icmp sle i32 %b, %x | ||
| %xor = xor i1 %a, %c | ||
| ret i1 %xor | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should just be with the other FPos attributes before the argument loop.
You can put an if (F.arg_size()) around it or sth.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So every function with at least one argument is a candidate to have out argument?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. I don't know how aggressive the filters here should try to be, but it could more specifically be there is at least one pointer argument that is writable