Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,53 @@ bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
// the callers'.
bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
(CalleeBits & InlineFeaturesAllowed);

LLVM_DEBUG({
dbgs() << "=== Inline compatibility debug ===\n";
dbgs() << "Caller: " << Caller->getName() << "\n";
dbgs() << "Callee: " << Callee->getName() << "\n";

// Bit diffs
FeatureBitset MissingInCaller = CalleeBits & ~CallerBits; // callee-only
FeatureBitset ExtraInCaller = CallerBits & ~CalleeBits; // caller-only

// Counts
dbgs() << "Only-in-caller bit count: " << ExtraInCaller.count() << "\n";
dbgs() << "Only-in-callee bit count: " << MissingInCaller.count() << "\n";

dbgs() << "Only-in-caller feature indices [";
{
bool First = true;
for (size_t I = 0, E = ExtraInCaller.size(); I < E; ++I) {
if (ExtraInCaller.test(I)) {
if (!First)
dbgs() << ", ";
dbgs() << I;
First = false;
}
}
}
dbgs() << "]\n";

dbgs() << "Only-in-callee feature indices [";
{
bool First = true;
for (size_t I = 0, E = MissingInCaller.size(); I < E; ++I) {
if (MissingInCaller.test(I)) {
if (!First)
dbgs() << ", ";
dbgs() << I;
First = false;
}
}
}
dbgs() << "]\n";

// Indicies map to features as found in
// llvm-project/(your_build)/lib/Target/ARM/ARMGenSubtargetInfo.inc
dbgs() << "MatchExact=" << (MatchExact ? "true" : "false")
<< " MatchSubset=" << (MatchSubset ? "true" : "false") << "\n";
});
return MatchExact && MatchSubset;
}

Expand Down
138 changes: 105 additions & 33 deletions llvm/lib/Target/ARM/ARMTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@ class Type;
class Value;

namespace TailPredication {
enum Mode {
Disabled = 0,
EnabledNoReductions,
Enabled,
ForceEnabledNoReductions,
ForceEnabled
};
enum Mode {
Disabled = 0,
EnabledNoReductions,
Enabled,
ForceEnabledNoReductions,
ForceEnabled
};
}

// For controlling conversion of memcpy into Tail Predicated loop.
Expand All @@ -64,37 +64,109 @@ class ARMTTIImpl final : public BasicTTIImplBase<ARMTTIImpl> {
const ARMTargetLowering *TLI;

// Currently the following features are excluded from InlineFeaturesAllowed.
// ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
// ModeThumb, FeatureNoARM, ModeSoftFloat.
// Depending on whether they are set or unset, different
// instructions/registers are available. For example, inlining a callee with
// -thumb-mode in a caller with +thumb-mode, may cause the assembler to
// fail if the callee uses ARM only instructions, e.g. in inline asm.
const FeatureBitset InlineFeaturesAllowed = {
ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
ARM::FeatureAClass, ARM::FeatureStrictAlign, ARM::FeatureLongCalls,
ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt,
ARM::FeatureNoNegativeImmediates
};
ARM::FeatureD32,
ARM::FeatureFPRegs64,
ARM::FeatureFPRegs16,
ARM::FeatureFPRegs,
ARM::FeatureAES,
ARM::FeatureVFP2_SP,
ARM::FeatureSHA2,
ARM::HasV5TEOps,
ARM::HasV6Ops,
ARM::HasV6KOps,
ARM::HasV6T2Ops,
ARM::HasV7Ops,
ARM::HasV5TOps,
ARM::HasV6MOps,
ARM::HasV8MBaselineOps,
ARM::HasV8MMainlineOps,
ARM::HasV8_1aOps,
ARM::HasV8_2aOps,
ARM::HasV8_3aOps,
ARM::HasV8_4aOps,
ARM::HasV8_5aOps,
ARM::HasV8_6aOps,
ARM::HasV8_7aOps,
ARM::HasV8_8aOps,
ARM::HasV8_9aOps,
ARM::HasV9_0aOps,
ARM::HasV9_1aOps,
ARM::HasV9_2aOps,
ARM::HasV9_3aOps,
ARM::HasV9_4aOps,
ARM::HasV9_5aOps,
ARM::HasV9_6aOps,
ARM::HasV9_7aOps,
ARM::HasV8_1MMainlineOps,
ARM::FeatureDotProd,
ARM::HasV8Ops,
ARM::FeatureSB,
ARM::FeatureBF16,
ARM::FeatureVFP2,
ARM::FeatureVFP3,
ARM::FeatureNEON,
ARM::FeatureThumb2,
ARM::FeatureFP16,
ARM::FeatureVFP4,
ARM::FeatureFPARMv8,
ARM::FeatureFullFP16,
ARM::FeatureFP16FML,
ARM::FeatureHWDivThumb,
ARM::FeatureHWDivARM,
ARM::FeatureDB,
ARM::FeatureV7Clrex,
ARM::FeatureAcquireRelease,
ARM::FeatureSlowFPBrcc,
ARM::FeaturePerfMon,
ARM::FeatureTrustZone,
ARM::Feature8MSecExt,
ARM::FeatureCrypto,
ARM::FeatureCRC,
ARM::FeatureRAS,
ARM::FeatureFPAO,
ARM::FeatureFuseAES,
ARM::FeatureZCZeroing,
ARM::FeatureProfUnpredicate,
ARM::FeatureSlowVGETLNi32,
ARM::FeatureSlowVDUP32,
ARM::FeaturePreferVMOVSR,
ARM::FeaturePrefISHSTBarrier,
ARM::FeatureMuxedUnits,
ARM::FeatureSlowOddRegister,
ARM::FeatureSlowLoadDSubreg,
ARM::FeatureDontWidenVMOVS,
ARM::FeatureExpandMLx,
ARM::FeatureHasVMLxHazards,
ARM::FeatureNEONForFPMovs,
ARM::FeatureNEONForFP,
ARM::FeatureCheckVLDnAlign,
ARM::FeatureHasSlowFPVMLx,
ARM::FeatureHasSlowFPVFMx,
ARM::FeatureVMLxForwarding,
ARM::FeaturePref32BitThumb,
ARM::FeatureAvoidPartialCPSR,
ARM::FeatureCheapPredicableCPSR,
ARM::FeatureAvoidMOVsShOp,
ARM::FeatureHasRetAddrStack,
ARM::FeatureHasNoBranchPredictor,
ARM::FeatureDSP,
ARM::FeatureMP,
ARM::FeatureVirtualization,
ARM::FeatureMClass,
ARM::FeatureRClass,
ARM::FeatureAClass,
ARM::FeatureStrictAlign,
ARM::FeatureLongCalls,
ARM::FeatureExecuteOnly,
ARM::FeatureReserveR9,
ARM::FeatureNoMovt,
ARM::FeatureNoNegativeImmediates};

const ARMSubtarget *getST() const { return ST; }
const ARMTargetLowering *getTLI() const { return TLI; }
Expand Down
35 changes: 35 additions & 0 deletions llvm/test/Transforms/Inline/ARM/inline-dotprod.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -passes=inline | FileCheck %s
; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s

declare i32 @foo(...) #0

define i32 @callee() #0 {
entry:
%call = call i32 (...) @foo()
ret i32 %call
}

define i32 @dotcallee() #1 {
entry:
%call = call i32 (...) @foo()
ret i32 %call
}

define i32 @dotcaller() #1 {
entry:
%call = call i32 @callee()
ret i32 %call
; CHECK-LABEL: dotcaller
; CHECK: call i32 (...) @foo()
}

define i32 @caller() #0 {
entry:
%call = call i32 @dotcallee()
ret i32 %call
; CHECK-LABEL: caller
; CHECK: call i32 @dotcallee()
}

attributes #0 = { "target-cpu"="generic" "target-features"="+dsp,+neon" }
attributes #1 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+dotprod" }