@@ -1114,6 +1114,47 @@ void LateLowerGCFrame::FixUpRefinements(ArrayRef<int> PHINumbers, State &S)
1114
1114
}
1115
1115
}
1116
1116
1117
+ // Look through instructions to find all possible allocas that might become the sret argument
1118
+ static SmallSetVector<AllocaInst *, 8 > FindSretAllocas (Value* SRetArg) {
1119
+ SmallSetVector<AllocaInst *, 8 > allocas;
1120
+ if (AllocaInst *OneSRet = dyn_cast<AllocaInst>(SRetArg)) {
1121
+ allocas.insert (OneSRet); // Found it directly
1122
+ } else {
1123
+ SmallSetVector<Value *, 8 > worklist;
1124
+ worklist.insert (SRetArg);
1125
+ while (!worklist.empty ()) {
1126
+ Value *V = worklist.pop_back_val ();
1127
+ if (AllocaInst *Alloca = dyn_cast<AllocaInst>(V->stripInBoundsOffsets ())) {
1128
+ allocas.insert (Alloca); // Found a candidate
1129
+ } else if (PHINode *Phi = dyn_cast<PHINode>(V)) {
1130
+ for (Value *Incoming : Phi->incoming_values ()) {
1131
+ worklist.insert (Incoming);
1132
+ }
1133
+ } else if (SelectInst *SI = dyn_cast<SelectInst>(SRetArg)) {
1134
+ auto TrueBranch = SI->getTrueValue ();
1135
+ auto FalseBranch = SI->getFalseValue ();
1136
+ if (TrueBranch && FalseBranch) {
1137
+ worklist.insert (TrueBranch);
1138
+ worklist.insert (FalseBranch);
1139
+ } else {
1140
+ llvm_dump (SI);
1141
+ assert (false && " Malformed Select" );
1142
+ }
1143
+ } else {
1144
+ llvm_dump (V);
1145
+ assert (false && " Unexpected SRet argument" );
1146
+ }
1147
+ }
1148
+ }
1149
+ assert (allocas.size () > 0 );
1150
+ assert (std::all_of (allocas.begin (), allocas.end (), [&] (AllocaInst* SRetAlloca) JL_NOTSAFEPOINT {
1151
+ return (SRetAlloca->getArraySize () == allocas[0 ]->getArraySize () &&
1152
+ SRetAlloca->getAllocatedType () == allocas[0 ]->getAllocatedType ());
1153
+ }
1154
+ ));
1155
+ return allocas;
1156
+ }
1157
+
1117
1158
State LateLowerGCFrame::LocalScan (Function &F) {
1118
1159
State S (F);
1119
1160
SmallVector<int , 8 > PHINumbers;
@@ -1165,46 +1206,35 @@ State LateLowerGCFrame::LocalScan(Function &F) {
1165
1206
Type *ElT = getAttributeAtIndex (CI->getAttributes (), 1 , Attribute::StructRet).getValueAsType ();
1166
1207
auto tracked = CountTrackedPointers (ElT, true );
1167
1208
if (tracked.count ) {
1168
- AllocaInst *SRet = dyn_cast<AllocaInst>((CI->arg_begin ()[0 ])->stripInBoundsOffsets ());
1169
- assert (SRet);
1170
- {
1209
+ SmallSetVector<AllocaInst *, 8 > allocas = FindSretAllocas ((CI->arg_begin ()[0 ])->stripInBoundsOffsets ());
1210
+ // We know that with the right optimizations we can forward a sret directly from an argument
1211
+ // This hasn't been seen without adding IPO effects to julia functions but it's possible we need to handle that too
1212
+ // If they are tracked.all we can just pass through but if they have a roots bundle it's possible we need to emit some copies ¯\_(ツ)_/¯
1213
+ for (AllocaInst *SRet : allocas) {
1171
1214
if (!(SRet->isStaticAlloca () && isa<PointerType>(ElT) && ElT->getPointerAddressSpace () == AddressSpace::Tracked)) {
1172
1215
assert (!tracked.derived );
1173
1216
if (tracked.all ) {
1174
1217
S.ArrayAllocas [SRet] = tracked.count * cast<ConstantInt>(SRet->getArraySize ())->getZExtValue ();
1175
1218
}
1176
1219
else {
1177
1220
Value *arg1 = (CI->arg_begin ()[1 ])->stripInBoundsOffsets ();
1221
+ SmallSetVector<AllocaInst *, 8 > gc_allocas = FindSretAllocas (arg1);
1178
1222
AllocaInst *SRet_gc = nullptr ;
1179
- if (PHINode *Phi = dyn_cast<PHINode>(arg1)) {
1180
- for (Value *V : Phi->incoming_values ()) {
1181
- if (AllocaInst *Alloca = dyn_cast<AllocaInst>(V->stripInBoundsOffsets ())) {
1182
- if (SRet_gc == nullptr ) {
1183
- SRet_gc = Alloca;
1184
- } else if (SRet_gc == Alloca) {
1185
- continue ;
1186
- } else {
1187
- llvm_dump (Alloca);
1188
- llvm_dump (SRet_gc);
1189
- assert (false && " Allocas in Phi node should match" );
1190
- }
1191
- } else {
1192
- llvm_dump (V->stripInBoundsOffsets ());
1193
- assert (false && " Expected alloca" );
1194
- }
1195
- }
1196
- } else {
1197
- SRet_gc = dyn_cast<AllocaInst>(arg1);
1223
+ if (gc_allocas.size () == 1 ) {
1224
+ SRet_gc = gc_allocas.pop_back_val ();
1198
1225
}
1199
- if (!SRet_gc) {
1226
+ else {
1200
1227
llvm_dump (CI);
1201
- llvm_dump (arg1);
1202
- assert (false && " Expected alloca" );
1228
+ for (AllocaInst *Alloca : gc_allocas) {
1229
+ llvm_dump (Alloca);
1230
+ }
1231
+ assert (false && " Expected single alloca" );
1203
1232
}
1204
1233
Type *ElT = SRet_gc->getAllocatedType ();
1205
1234
if (!(SRet_gc->isStaticAlloca () && isa<PointerType>(ElT) && ElT->getPointerAddressSpace () == AddressSpace::Tracked)) {
1206
1235
S.ArrayAllocas [SRet_gc] = tracked.count * cast<ConstantInt>(SRet_gc->getArraySize ())->getZExtValue ();
1207
1236
}
1237
+ break ; // Found our gc roots
1208
1238
}
1209
1239
}
1210
1240
}
@@ -1401,6 +1431,8 @@ State LateLowerGCFrame::LocalScan(Function &F) {
1401
1431
return S;
1402
1432
}
1403
1433
1434
+
1435
+
1404
1436
static Value *ExtractScalar (Value *V, Type *VTy, bool isptr, ArrayRef<unsigned > Idxs, IRBuilder<> &irbuilder) {
1405
1437
Type *T_int32 = Type::getInt32Ty (V->getContext ());
1406
1438
if (isptr) {
0 commit comments