Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 4a1ad50

Browse files
authored
Merge pull request #6037 from sejongoh/linux_struct_arg
Unecessary Linux stack argument copy
2 parents 63e59cc + 68d5832 commit 4a1ad50

File tree

1 file changed

+70
-22
lines changed

1 file changed

+70
-22
lines changed

src/jit/morph.cpp

100644100755
Lines changed: 70 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1366,22 +1366,6 @@ void fgArgInfo::ArgsComplete()
13661366
assert(curArgTabEntry != NULL);
13671367
GenTreePtr argx = curArgTabEntry->node;
13681368

1369-
#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1370-
// If this is a struct, mark it for needing a tempVar.
1371-
// In the copyblk and store this should have minimal perf impact since
1372-
// the local vars where we copy/store to already exist and the logic for temp
1373-
// var will not create a new one if it creates a tempVar from another tempVar.
1374-
// (Debugging through the code, there was no new copy of data created, neither a new tempVar.)
1375-
// The need for this arise from Lower::LowerArg.
1376-
// In case of copyblk and store operation, the NewPutArg method will
1377-
// not be invoked and the struct will not be loaded to be passed in
1378-
// registers or by value on the stack.
1379-
if (varTypeIsStruct(argx) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY( || curArgTabEntry->isStruct))
1380-
{
1381-
curArgTabEntry->needTmp = true;
1382-
}
1383-
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1384-
13851369
if (curArgTabEntry->regNum == REG_STK)
13861370
{
13871371
hasStackArgs = true;
@@ -2598,6 +2582,13 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
25982582
bool callIsVararg = call->IsVarargs();
25992583
#endif
26002584

2585+
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2586+
// If fgMakeOutgoingStructArgCopy is called and copies are generated, hasStackArgCopy is set
2587+
// to make sure to call EvalArgsToTemp. fgMakeOutgoingStructArgCopy just marks the argument
2588+
// to need a temp variable, and EvalArgsToTemp actually creates the temp variable node.
2589+
bool hasStackArgCopy = false;
2590+
#endif
2591+
26012592
#ifndef LEGACY_BACKEND
26022593
// Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
26032594
// following the normal calling convention or in the normal argument registers. We either mark existing
@@ -3312,15 +3303,64 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
33123303
#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
33133304
if (!structDesc.passedInRegisters)
33143305
{
3306+
GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
3307+
bool needCpyBlk = false;
3308+
if (lclVar != nullptr)
3309+
{
3310+
// If the struct is promoted to registers, it has to be materialized
3311+
// on stack. We may want to support promoted structures in
3312+
// codegening pugarg_stk instead of creating a copy here.
3313+
LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3314+
needCpyBlk = varDsc->lvPromoted;
3315+
}
3316+
else
3317+
{
3318+
// If simd16 comes from vector<t>, eeGetSystemVAmd64PassStructInRegisterDescriptor
3319+
// sets structDesc.passedInRegisters to be false.
3320+
//
3321+
// GT_ADDR(GT_SIMD) is not a rationalized IR form and is not handled
3322+
// by rationalizer. For now we will let SIMD struct arg to be copied to
3323+
// a local. As part of cpblk rewrite, rationalizer will handle GT_ADDR(GT_SIMD)
3324+
//
3325+
// +--* obj simd16
3326+
// | \--* addr byref
3327+
// | | /--* lclVar simd16 V05 loc4
3328+
// | \--* simd simd16 int -
3329+
// | \--* lclVar simd16 V08 tmp1
3330+
//
3331+
// TODO-Amd64-Unix: The rationalizer can be updated to handle this pattern,
3332+
// so that we don't need to generate a copy here.
3333+
GenTree* addr = argObj->gtOp.gtOp1;
3334+
if (addr->OperGet() == GT_ADDR)
3335+
{
3336+
GenTree* addrChild = addr->gtOp.gtOp1;
3337+
if (addrChild->OperGet() == GT_SIMD)
3338+
{
3339+
needCpyBlk = true;
3340+
}
3341+
}
3342+
}
33153343
passStructInRegisters = false;
3316-
copyBlkClass = NO_CLASS_HANDLE;
3344+
if (needCpyBlk)
3345+
{
3346+
copyBlkClass = objClass;
3347+
}
3348+
else
3349+
{
3350+
copyBlkClass = NO_CLASS_HANDLE;
3351+
}
33173352
}
33183353
else
33193354
{
33203355
// The objClass is used to materialize the struct on stack.
3356+
// For SystemV, the code below generates copies for struct arguments classified
3357+
// as register argument.
3358+
// TODO-Amd64-Unix: We don't always need copies for this case. Struct arguments
3359+
// can be passed on registers or can be copied directly to outgoing area.
33213360
passStructInRegisters = true;
33223361
copyBlkClass = objClass;
33233362
}
3363+
33243364
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
33253365
#elif defined(_TARGET_ARM64_)
33263366
if ((size > 2) && !isHfaArg)
@@ -3350,6 +3390,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
33503390
#endif // _TARGET_ARM_
33513391
}
33523392
#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3393+
// TODO-Amd64-Unix: Since the else part below is disabled for UNIX_AMD64, copies are always
3394+
// generated for struct 1, 2, 4, or 8.
33533395
else // We have a struct argument with size 1, 2, 4 or 8 bytes
33543396
{
33553397
// change our GT_OBJ into a GT_IND of the correct type.
@@ -3841,13 +3883,18 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
38413883
{
38423884
noway_assert(!lateArgsComputed);
38433885
fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc));
3886+
38443887
// This can cause a GTF_EXCEPT flag to be set.
38453888
// TODO-CQ: Fix the cases where this happens. We shouldn't be adding any new flags.
38463889
// This currently occurs in the case where we are re-morphing the args on x86/RyuJIT, and
38473890
// there are no register arguments. Then lateArgsComputed is never true, so we keep re-copying
38483891
// any struct arguments.
38493892
// i.e. assert(((call->gtFlags & GTF_EXCEPT) != 0) || ((args->Current()->gtFlags & GTF_EXCEPT) == 0)
38503893
flagsSummary |= (args->Current()->gtFlags & GTF_EXCEPT);
3894+
3895+
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3896+
hasStackArgCopy = true;
3897+
#endif
38513898
}
38523899

38533900
#ifndef LEGACY_BACKEND
@@ -3993,12 +4040,13 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
39934040
// or we have no register arguments then we don't need to
39944041
// call SortArgs() and EvalArgsToTemps()
39954042
//
3996-
// Note that we do this for UNIX_AMD64 when we have a struct argument
3997-
//
4043+
// For UNIX_AMD64, the condition without hasStackArgCopy cannot catch
4044+
// all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy
4045+
// is added to make sure to call EvalArgsToTemp.
39984046
if (!lateArgsComputed && (call->fgArgInfo->HasRegArgs()
3999-
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4000-
|| hasStructArgument
4001-
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4047+
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4048+
|| hasStackArgCopy
4049+
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
40024050
))
40034051
{
40044052
// This is the first time that we morph this call AND it has register arguments.

0 commit comments

Comments
 (0)