@@ -1366,22 +1366,6 @@ void fgArgInfo::ArgsComplete()
1366
1366
assert(curArgTabEntry != NULL);
1367
1367
GenTreePtr argx = curArgTabEntry->node;
1368
1368
1369
- #if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1370
- // If this is a struct, mark it for needing a tempVar.
1371
- // In the copyblk and store this should have minimal perf impact since
1372
- // the local vars where we copy/store to already exist and the logic for temp
1373
- // var will not create a new one if it creates a tempVar from another tempVar.
1374
- // (Debugging through the code, there was no new copy of data created, neither a new tempVar.)
1375
- // The need for this arise from Lower::LowerArg.
1376
- // In case of copyblk and store operation, the NewPutArg method will
1377
- // not be invoked and the struct will not be loaded to be passed in
1378
- // registers or by value on the stack.
1379
- if (varTypeIsStruct(argx) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY( || curArgTabEntry->isStruct))
1380
- {
1381
- curArgTabEntry->needTmp = true;
1382
- }
1383
- #endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
1384
-
1385
1369
if (curArgTabEntry->regNum == REG_STK)
1386
1370
{
1387
1371
hasStackArgs = true;
@@ -2598,6 +2582,13 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
2598
2582
bool callIsVararg = call->IsVarargs();
2599
2583
#endif
2600
2584
2585
+ #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
2586
+ // If fgMakeOutgoingStructArgCopy is called and copies are generated, hasStackArgCopy is set
2587
+ // to make sure to call EvalArgsToTemp. fgMakeOutgoingStructArgCopy just marks the argument
2588
+ // to need a temp variable, and EvalArgsToTemp actually creates the temp variable node.
2589
+ bool hasStackArgCopy = false;
2590
+ #endif
2591
+
2601
2592
#ifndef LEGACY_BACKEND
2602
2593
// Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
2603
2594
// following the normal calling convention or in the normal argument registers. We either mark existing
@@ -3312,15 +3303,64 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
3312
3303
#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
3313
3304
if (!structDesc.passedInRegisters)
3314
3305
{
3306
+ GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
3307
+ bool needCpyBlk = false;
3308
+ if (lclVar != nullptr)
3309
+ {
3310
+ // If the struct is promoted to registers, it has to be materialized
3311
+ // on stack. We may want to support promoted structures in
3312
+ // codegening pugarg_stk instead of creating a copy here.
3313
+ LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
3314
+ needCpyBlk = varDsc->lvPromoted;
3315
+ }
3316
+ else
3317
+ {
3318
+ // If simd16 comes from vector<t>, eeGetSystemVAmd64PassStructInRegisterDescriptor
3319
+ // sets structDesc.passedInRegisters to be false.
3320
+ //
3321
+ // GT_ADDR(GT_SIMD) is not a rationalized IR form and is not handled
3322
+ // by rationalizer. For now we will let SIMD struct arg to be copied to
3323
+ // a local. As part of cpblk rewrite, rationalizer will handle GT_ADDR(GT_SIMD)
3324
+ //
3325
+ // +--* obj simd16
3326
+ // | \--* addr byref
3327
+ // | | /--* lclVar simd16 V05 loc4
3328
+ // | \--* simd simd16 int -
3329
+ // | \--* lclVar simd16 V08 tmp1
3330
+ //
3331
+ // TODO-Amd64-Unix: The rationalizer can be updated to handle this pattern,
3332
+ // so that we don't need to generate a copy here.
3333
+ GenTree* addr = argObj->gtOp.gtOp1;
3334
+ if (addr->OperGet() == GT_ADDR)
3335
+ {
3336
+ GenTree* addrChild = addr->gtOp.gtOp1;
3337
+ if (addrChild->OperGet() == GT_SIMD)
3338
+ {
3339
+ needCpyBlk = true;
3340
+ }
3341
+ }
3342
+ }
3315
3343
passStructInRegisters = false;
3316
- copyBlkClass = NO_CLASS_HANDLE;
3344
+ if (needCpyBlk)
3345
+ {
3346
+ copyBlkClass = objClass;
3347
+ }
3348
+ else
3349
+ {
3350
+ copyBlkClass = NO_CLASS_HANDLE;
3351
+ }
3317
3352
}
3318
3353
else
3319
3354
{
3320
3355
// The objClass is used to materialize the struct on stack.
3356
+ // For SystemV, the code below generates copies for struct arguments classified
3357
+ // as register argument.
3358
+ // TODO-Amd64-Unix: We don't always need copies for this case. Struct arguments
3359
+ // can be passed on registers or can be copied directly to outgoing area.
3321
3360
passStructInRegisters = true;
3322
3361
copyBlkClass = objClass;
3323
3362
}
3363
+
3324
3364
#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
3325
3365
#elif defined(_TARGET_ARM64_)
3326
3366
if ((size > 2) && !isHfaArg)
@@ -3350,6 +3390,8 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
3350
3390
#endif // _TARGET_ARM_
3351
3391
}
3352
3392
#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
3393
+ // TODO-Amd64-Unix: Since the else part below is disabled for UNIX_AMD64, copies are always
3394
+ // generated for struct 1, 2, 4, or 8.
3353
3395
else // We have a struct argument with size 1, 2, 4 or 8 bytes
3354
3396
{
3355
3397
// change our GT_OBJ into a GT_IND of the correct type.
@@ -3841,13 +3883,18 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
3841
3883
{
3842
3884
noway_assert(!lateArgsComputed);
3843
3885
fgMakeOutgoingStructArgCopy(call, args, argIndex, copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc));
3886
+
3844
3887
// This can cause a GTF_EXCEPT flag to be set.
3845
3888
// TODO-CQ: Fix the cases where this happens. We shouldn't be adding any new flags.
3846
3889
// This currently occurs in the case where we are re-morphing the args on x86/RyuJIT, and
3847
3890
// there are no register arguments. Then lateArgsComputed is never true, so we keep re-copying
3848
3891
// any struct arguments.
3849
3892
// i.e. assert(((call->gtFlags & GTF_EXCEPT) != 0) || ((args->Current()->gtFlags & GTF_EXCEPT) == 0)
3850
3893
flagsSummary |= (args->Current()->gtFlags & GTF_EXCEPT);
3894
+
3895
+ #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
3896
+ hasStackArgCopy = true;
3897
+ #endif
3851
3898
}
3852
3899
3853
3900
#ifndef LEGACY_BACKEND
@@ -3993,12 +4040,13 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
3993
4040
// or we have no register arguments then we don't need to
3994
4041
// call SortArgs() and EvalArgsToTemps()
3995
4042
//
3996
- // Note that we do this for UNIX_AMD64 when we have a struct argument
3997
- //
4043
+ // For UNIX_AMD64, the condition without hasStackArgCopy cannot catch
4044
+ // all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy
4045
+ // is added to make sure to call EvalArgsToTemp.
3998
4046
if (!lateArgsComputed && (call->fgArgInfo->HasRegArgs()
3999
- #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4000
- || hasStructArgument
4001
- #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4047
+ #ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
4048
+ || hasStackArgCopy
4049
+ #endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
4002
4050
))
4003
4051
{
4004
4052
// This is the first time that we morph this call AND it has register arguments.
0 commit comments