Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions llvm/include/llvm/Transforms/IPO/Attributor.h
Original file line number Diff line number Diff line change
Expand Up @@ -6477,6 +6477,39 @@ struct AADenormalFPMath
static const char ID;
};

/// An abstract attribute for converting out arguments into struct elements.
struct AAConvertOutArgument
: public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;

AAConvertOutArgument(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

/// Create an abstract attribute view for the position \p IRP.
static AAConvertOutArgument &createForPosition(const IRPosition &IRP,
Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAConvertOutArgument"; }

/// Return true if convertible is assumed.
bool isAssumedConvertible() const { return getAssumed(); }

/// Return true if convertible is known.
bool isKnownConvertible() const { return getKnown(); }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is
/// AADenormalFPMath.
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
};

raw_ostream &operator<<(raw_ostream &, const AAPointerInfo::Access &);

/// Run options, used by the pass manager.
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Transforms/IPO/Attributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3459,6 +3459,7 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
}
}

bool markedAsAAConvertArgument = false;
for (Argument &Arg : F.args()) {
IRPosition ArgPos = IRPosition::argument(Arg);
auto ArgNo = Arg.getArgNo();
Expand Down Expand Up @@ -3510,6 +3511,12 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Every argument with pointer type might be privatizable (or
// promotable)
getOrCreateAAFor<AAPrivatizablePtr>(ArgPos);

// Every function with pointer argument type can have out arguments.
if (!markedAsAAConvertArgument) {
getOrCreateAAFor<AAConvertOutArgument>(FPos);
markedAsAAConvertArgument = true;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should just be with the other FPos attributes before the argument loop.
You can put an if (F.arg_size()) around it or sth.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So every function with at least one argument is a candidate to have out argument?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I don't know how aggressive the filters here should try to be, but it could more specifically be there is at least one pointer argument that is writable

} else if (AttributeFuncs::isNoFPClassCompatibleType(Arg.getType())) {
getOrCreateAAFor<AANoFPClass>(ArgPos);
}
Expand Down
223 changes: 223 additions & 0 deletions llvm/lib/Transforms/IPO/AttributorAttributes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <cassert>
Expand Down Expand Up @@ -197,6 +198,7 @@ PIPE_OPERATOR(AAAllocationInfo)
PIPE_OPERATOR(AAIndirectCallInfo)
PIPE_OPERATOR(AAGlobalValueInfo)
PIPE_OPERATOR(AADenormalFPMath)
PIPE_OPERATOR(AAConvertOutArgument)

#undef PIPE_OPERATOR

Expand Down Expand Up @@ -12987,6 +12989,225 @@ struct AAAllocationInfoCallSiteArgument : AAAllocationInfoImpl {
};
} // namespace

/// ----------- AAConvertOutArgument ----------
namespace {
static bool isEligibleArgument(const Argument &Arg, Attributor &A,
const AbstractAttribute &AA) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make this a (static) member function, no need for the global namespace. If it's a regular member you don't need to pass AA.

if (!Arg.getType()->isPointerTy())
return false;

const IRPosition &ArgPos = IRPosition::argument(Arg);
auto *AAMem = A.getAAFor<AAMemoryBehavior>(AA, ArgPos, DepClassTy::OPTIONAL);
auto *NoAlias = A.getAAFor<AANoAlias>(AA, ArgPos, DepClassTy::OPTIONAL);

return AAMem && NoAlias && AAMem->isAssumedWriteOnly() &&
NoAlias->isAssumedNoAlias() && !Arg.hasPointeeInMemoryValueAttr();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check the hasPointeeInMemoryValueAttr stuff first, with the type above.

}

struct AAConvertOutArgumentFunction final : AAConvertOutArgument {
AAConvertOutArgumentFunction(const IRPosition &IRP, Attributor &A)
: AAConvertOutArgument(IRP, A) {}

SmallVector<bool> ArgumentsStates;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make it a bitvector/bitset. I think ADT has one of those as well.


/// See AbstractAttribute::updateImpl(...).
void initialize(Attributor &A) override {
const Function *F = getAssociatedFunction();
if (!F || F->isDeclaration())
return;

// Assume that all args are convertable at the begining.
ArgumentsStates.resize(F->arg_size(), true);
}

/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
const Function *F = getAssociatedFunction();
if (!F || F->isDeclaration())
return indicatePessimisticFixpoint();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need for this check, you checked in initialize.


Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you have to rewrite all the call sites, you need to check if they are amenable. Use Attributor::checkForAllCallSites to do so, and verify (1) you see them all, and (2) you can properly rewrite them. For (2) I don't have all of the conditions at hand but setting up the check will allow us to easily fill them. In manifest you can use the same API to iterate over all call sites again, create new ones and new code that sets up the return values into the originally passed pointers.

auto NewStates = ArgumentsStates;
for (unsigned ArgIdx = 0; ArgIdx < F->arg_size(); ++ArgIdx)
if (!isEligibleArgument(*F->getArg(ArgIdx), A, *this))
NewStates[ArgIdx] = false;

bool Changed = NewStates == ArgumentsStates;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't need to copy the state, you can set the Changed flag as part of the assignment above if you check first that it currently is set to true.

ArgumentsStates = NewStates;
return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
}

/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
Function &F = *getAssociatedFunction();
DenseMap<Argument *, Type *> PtrToType;
SmallVector<Argument *, 4> CandidateArgs;

for (unsigned ArgIdx = 0; ArgIdx < F.arg_size(); ++ArgIdx) {
Argument *Arg = F.getArg(ArgIdx);
if (!isEligibleArgument(*Arg, A, *this))
continue;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need for this check, just look into the AssumedState.


CandidateArgs.push_back(Arg);
// AAPointerInfo on args
for (auto &Use : Arg->uses())
if (auto *Store = dyn_cast<StoreInst>(Use.getUser()))
PtrToType[Arg] = Store->getValueOperand()->getType();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You need to handle more than just stores users here. What if the user is a gep, or some other instruction. You can use AAPointerInfo instead of AAMemoryBehavior and then iterate over all the instructions that actually modify the pointer. It will also tell you the offsets, which you'll likely need.

}

// If there is no valid candidates then return false.
if (PtrToType.empty())
return indicatePessimisticFixpoint();

// Create the new struct return type.
SmallVector<Type *, 4> OutStructElementsTypes;
if (auto *OriginalFuncTy = F.getReturnType(); !OriginalFuncTy->isVoidTy())
OutStructElementsTypes.push_back(OriginalFuncTy);
Comment on lines +13063 to +13064
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the function already has a struct return, you could flatten and insert directly into the struct type. It may be a little more effort to emit though.

You also should check whether TargetLowering::CanLowerReturn succeeds, you should avoid over-committing return values if they're going to be forced to the stack. As written out arguments may be in any address space, not just allocas

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd say we get the functionality in and then we add the heuristic on top. It should be easier to test the "maximal" approach first.


for (auto *Arg : CandidateArgs)
OutStructElementsTypes.push_back(PtrToType[Arg]);

auto *ReturnStructType = StructType::create(
F.getContext(), OutStructElementsTypes, (F.getName() + "_out").str());

// Get the new Args.
SmallVector<Type *, 4> NewParamTypes;
for (auto &Arg : F.args())
if (!PtrToType.count(&Arg))
NewParamTypes.push_back(Arg.getType());

auto *NewFunctionType =
FunctionType::get(ReturnStructType, NewParamTypes, F.isVarArg());
auto *NewFunction =
Function::Create(NewFunctionType, F.getLinkage(), F.getAddressSpace(),
F.getName() + ".converted");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check out

Attributor::isValidFunctionSignatureRewrite

I believe it will even check the call sites for you so you don't have to do it yourself (basically ignore what I wrote above).

Attributor::registerFunctionSignatureRewrite 

And this will do the function cloning and other things for you.

You might need to add a new pair of APIs though:

Attributor::isValidFunctionReturnRewrite

and

Attributor::registerFunctionReturnRewrite


// Map old arguments to new ones, And also map the old arguments to struct
// elements.
ValueToValueMapTy VMap;
auto NewArgIt = NewFunction->arg_begin();
BasicBlock *EntryBlock =
BasicBlock::Create(NewFunction->getContext(), "entry", NewFunction);

IRBuilder<> EntryBuilder(EntryBlock);
for (auto &OldArg : F.args()) {
if (PtrToType.count(&OldArg)) {
dbgs() << "OldArg: " << OldArg
<< " ======> Type: " << *PtrToType[&OldArg] << "\n";
AllocaInst *Alloca = EntryBuilder.CreateAlloca(
PtrToType[&OldArg], nullptr, OldArg.getName() + "_");
VMap[&OldArg] = Alloca;
} else
VMap[&OldArg] = &(*NewArgIt++);
}

// Clone the old function into the new one.
SmallVector<ReturnInst *, 8> Returns;
CloneFunctionInto(NewFunction, &F, VMap,
CloneFunctionChangeType::LocalChangesOnly, Returns);

// Update the return values (make it struct).
for (ReturnInst *Ret : Returns) {
IRBuilder<> Builder(Ret);
SmallVector<Value *, 4> StructValues;
// Include original return type, if any
if (auto *OriginalFuncTy = F.getReturnType(); !OriginalFuncTy->isVoidTy())
StructValues.push_back(Ret->getReturnValue());

// Create a load instruction to fill the struct element.
for (auto *Arg : CandidateArgs) {
Value *OutVal = Builder.CreateLoad(PtrToType[Arg], VMap[Arg]);
StructValues.push_back(OutVal);
}

// Build the return struct incrementally.
Value *StructRetVal = UndefValue::get(ReturnStructType);
for (unsigned i = 0; i < StructValues.size(); ++i)
StructRetVal =
Builder.CreateInsertValue(StructRetVal, StructValues[i], i);

Builder.CreateRet(StructRetVal);
A.deleteAfterManifest(*Ret);
}
return ChangeStatus::CHANGED;
}

/// See AbstractAttribute::getAsStr(...).
const std::string getAsStr(Attributor *A) const override {
return "AAConvertOutArgumentFunction";
}

/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
};

struct AAConvertOutArgumentCallSite final : AAConvertOutArgument {
AAConvertOutArgumentCallSite(const IRPosition &IRP, Attributor &A)
: AAConvertOutArgument(IRP, A) {}

/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
CallBase *CB = cast<CallBase>(getCtxI());
Function *F = CB->getCalledFunction();
if (!F)
return indicatePessimisticFixpoint();

// Get convert attribute.
auto *ConvertAA = A.getAAFor<AAConvertOutArgument>(
*this, IRPosition::function(*F), DepClassTy::REQUIRED);

// If function will be transformed, mark this call site for update
if (!ConvertAA || ConvertAA->isAssumedConvertible())
return ChangeStatus::CHANGED;

return ChangeStatus::UNCHANGED;
}

/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
CallBase *CB = cast<CallBase>(getCtxI());
Function *F = CB->getCalledFunction();
if (!F)
return ChangeStatus::UNCHANGED;

IRBuilder<> Builder(CB);
// Create args for new call.
SmallVector<Value *, 4> NewArgs;
for (unsigned ArgIdx = 0; ArgIdx < CB->arg_size(); ++ArgIdx) {
Value *Arg = CB->getArgOperand(ArgIdx);
Argument *ParamArg = F->getArg(ArgIdx);
if (!isEligibleArgument(*ParamArg, A, *this))
NewArgs.push_back(Arg);
}

Module *M = F->getParent();
auto *NewF = M->getFunction((F->getName() + ".converted").str());
if (!NewF)
return ChangeStatus::UNCHANGED;

FunctionCallee NewCallee(NewF->getFunctionType(), NewF);
Instruction *NewCall =
CallInst::Create(NewCallee, NewArgs, CB->getName() + ".converted", CB);
IRPosition ReturnPos = IRPosition::callsite_returned(*CB);
A.changeAfterManifest(ReturnPos, *NewCall);

// Redirect all uses of the old call to the new call.
for (auto &Use : CB->uses())
Use.set(NewCall);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should assert that this is the callee use, and not a data operand or other type of non-call user


A.deleteAfterManifest(*CB);
return ChangeStatus::CHANGED;
}

/// See AbstractAttribute::getAsStr(...).
const std::string getAsStr(Attributor *A) const override {
return "AAConvertOutArgumentCallSite";
}

/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
};
} // namespace

const char AANoUnwind::ID = 0;
const char AANoSync::ID = 0;
const char AANoFree::ID = 0;
Expand Down Expand Up @@ -13024,6 +13245,7 @@ const char AAAllocationInfo::ID = 0;
const char AAIndirectCallInfo::ID = 0;
const char AAGlobalValueInfo::ID = 0;
const char AADenormalFPMath::ID = 0;
const char AAConvertOutArgument::ID = 0;

// Macro magic to create the static generator function for attributes that
// follow the naming scheme.
Expand Down Expand Up @@ -13139,6 +13361,7 @@ CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryLocation)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AACallEdges)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAssumptionInfo)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMustProgress)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAConvertOutArgument)

CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias)
Expand Down
46 changes: 46 additions & 0 deletions llvm/test/Transforms/Attributor/remove_out_args.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes=attributor < %s | FileCheck %s


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs about 100 more tests llvm/test/CodeGen/AMDGPU/rewrite-out-arguments.ll and llvm/test/CodeGen/AMDGPU/rewrite-out-arguments-address-space.ll would be better starting points

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will. BTW, I don't have an AMD GPU, I have a nvidia one, should J add the tests in NVPTX?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't matter what GPU you have, it doesn't matter when working on the compiler. NVPTX will be less useful for IR testing since it doesn't use a non-0 address space for stack objects

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still need all of these tests, using AMDGPU

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arsenm, I will test using AMDGPU, but I don't know why my changes don't change this simple test? Do I need to call the some function somewhere? the optimizer dosn't see my changes or do my changes don't affect the input?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an externally visible function, so it's not allowed to change the signature. It would need to be be internal or similar linkage, with a call use


define internal i1 @foo(ptr %dst, i32 %a, i32 %b) {
entry:
%x = xor i32 %a, 13
%y = add i32 %b, 5
%z = icmp sle i32 %x, %y
br i1 %z, label %if, label %else

if:
store i32 %x, ptr %dst, align 4
br label %end

else:
store i32 %y, ptr %dst, align 4
br label %end

end:
%t = mul i32 %x, %y
%tt = xor i32 %x, %y
%result = icmp eq i32 %t, %tt
ret i1 %result
}


define i1 @fee(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: define i1 @fee(
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[PTR:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[A:%.*]] = call i1 @foo.converted(ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32 [[Y]], i32 [[Z]]) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[PTR]], align 4
; CHECK-NEXT: [[C:%.*]] = icmp sle i32 [[B]], [[X]]
; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[A]], [[C]]
; CHECK-NEXT: ret i1 [[XOR]]
;
%ptr = alloca i32
%a = call i1 @foo(ptr %ptr, i32 %y, i32 %z)
%b = load i32, ptr %ptr
%c = icmp sle i32 %b, %x
%xor = xor i1 %a, %c
ret i1 %xor
}

Loading