Skip to content

Commit 111ceb1

Browse files
committed
[Distributed] IRGen: Adjust distributed method accessor to store result into a provided buffer
Instead of trying to return result from distributed thunk directly, modify accessor to store result into the caller-provided buffer. Doing so helps us avoid boxing the result into `Any`.
1 parent ed97120 commit 111ceb1

File tree

3 files changed

+74
-38
lines changed

3 files changed

+74
-38
lines changed

lib/IRGen/GenDistributed.cpp

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ static CanSILFunctionType getAccessorType(IRGenModule &IGM,
9898
SILFunction *DistMethod) {
9999
auto &Context = IGM.Context;
100100

101-
auto getParamForArguments = [&]() {
101+
auto getRawPointerParmeter = [&]() {
102102
auto ptrType = Context.getUnsafeRawPointerType();
103103
return SILParameterInfo(ptrType->getCanonicalType(),
104104
ParameterConvention::Direct_Guaranteed);
@@ -121,9 +121,11 @@ static CanSILFunctionType getAccessorType(IRGenModule &IGM,
121121
// its result(s) out.
122122
return SILFunctionType::get(
123123
/*genericSignature=*/nullptr, extInfo, SILCoroutineKind::None,
124-
ParameterConvention::Direct_Guaranteed, {getParamForArguments()},
124+
ParameterConvention::Direct_Guaranteed,
125+
{/*argumentBuffer=*/getRawPointerParmeter(),
126+
/*resultBuffer=*/getRawPointerParmeter()},
125127
/*Yields=*/{},
126-
/*Results=*/methodTy->getResults(),
128+
/*Results=*/{},
127129
/*ErrorResult=*/methodTy->getErrorResult(),
128130
/*patternSubs=*/SubstitutionMap(),
129131
/*invocationSubs=*/SubstitutionMap(), Context);
@@ -257,30 +259,25 @@ void DistributedAccessor::computeArguments(llvm::Value *argumentBuffer,
257259

258260
void DistributedAccessor::emit() {
259261
auto methodTy = Method->getLoweredFunctionType();
260-
SILFunctionConventions conv(methodTy, IGF.getSILModule());
262+
SILFunctionConventions targetConv(methodTy, IGF.getSILModule());
263+
SILFunctionConventions accessorConv(AccessorType, IGF.getSILModule());
261264
TypeExpansionContext expansionContext = IGM.getMaximalTypeExpansionContext();
262265

263266
auto params = IGF.collectParameters();
264267

265-
auto directResultTy = conv.getSILResultType(expansionContext);
268+
auto directResultTy = targetConv.getSILResultType(expansionContext);
266269
const auto &directResultTI = IGM.getTypeInfo(directResultTy);
267-
auto &resultSchema = directResultTI.nativeReturnValueSchema(IGM);
268-
llvm::Value *indirectResultSlot = nullptr;
269-
270-
if (resultSchema.requiresIndirect())
271-
indirectResultSlot = params.claimNext();
272270

273271
Explosion arguments;
274-
// Claim indirect results first, they are going to be passed
275-
// through to the distributed method.
276-
params.transferInto(arguments, conv.getNumIndirectSILResults());
277272

278273
unsigned numAsyncContextParams =
279274
(unsigned)AsyncFunctionArgumentIndex::Context + 1;
280275
(void)params.claim(numAsyncContextParams);
281276

282277
// UnsafeRawPointer that holds all of the argument values.
283278
auto *argBuffer = params.claimNext();
279+
// UnsafeRawPointer that is used to store the result.
280+
auto *resultBuffer = params.claimNext();
284281
// Reference to a `self` of the actor to be called.
285282
auto *actorSelf = params.claimNext();
286283

@@ -298,6 +295,17 @@ void DistributedAccessor::emit() {
298295
emitAsyncFunctionPointer(IGM, IGF.CurFn, entity, AsyncLayout.getSize());
299296
}
300297

298+
auto *typedResultBuffer = IGF.Builder.CreateBitCast(
299+
resultBuffer, IGM.getStoragePointerType(directResultTy));
300+
301+
if (targetConv.getNumIndirectSILResults()) {
302+
// Since tuples are not allowed as valid result types (because they cannot
303+
// conform to protocols), there could be only a single indirect result type
304+
// associated with distributed method.
305+
assert(targetConv.getNumIndirectSILResults() == 1);
306+
arguments.add(typedResultBuffer);
307+
}
308+
301309
// Step one is to load all of the data from argument buffer,
302310
// so it could be forwarded to the distributed method.
303311
computeArguments(argBuffer, arguments);
@@ -316,30 +324,34 @@ void DistributedAccessor::emit() {
316324
emission->setArgs(arguments, /*isOutlined=*/false,
317325
/*witnessMetadata=*/nullptr);
318326

319-
if (resultSchema.requiresIndirect()) {
320-
Address resultAddr(indirectResultSlot,
327+
// Load result of the thunk into the location provided by the caller.
328+
// This would only generate code for direct results, if thunk has an
329+
// indirect result (e.g. large struct) it result buffer would be passed
330+
// as an argument.
331+
{
332+
Address resultAddr(typedResultBuffer,
321333
directResultTI.getBestKnownAlignment());
322334
emission->emitToMemory(resultAddr, cast<LoadableTypeInfo>(directResultTI),
323335
/*isOutlined=*/false);
324-
} else {
325-
emission->emitToExplosion(result, /*isOutlined=*/false);
326336
}
327337

328338
// Both accessor and distributed method are always `async throws`
329339
// so we need to load error value (if any) from the slot.
330340
{
331341
assert(methodTy->hasErrorResult());
332342

333-
SILType errorType = conv.getSILErrorType(expansionContext);
343+
SILType errorType = accessorConv.getSILErrorType(expansionContext);
334344
Address calleeErrorSlot =
335345
emission->getCalleeErrorSlot(errorType, /*isCalleeAsync=*/true);
336346
error.add(IGF.Builder.CreateLoad(calleeErrorSlot));
337347
}
338348

339349
emission->end();
340350

341-
emitAsyncReturn(IGF, AsyncLayout, directResultTy, AccessorType, result,
342-
error);
351+
Explosion voidResult;
352+
emitAsyncReturn(IGF, AsyncLayout,
353+
accessorConv.getSILResultType(expansionContext),
354+
AccessorType, voidResult, error);
343355
}
344356
}
345357

stdlib/public/Concurrency/Actor.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2030,10 +2030,14 @@ SWIFT_CC(swiftasync)
20302030
SWIFT_RUNTIME_STDLIB_SPI
20312031
TargetExecutorSignature::FunctionType swift_distributed_execute_target;
20322032

2033-
/// Accessor takes a context, an argument buffer as a raw pointer,
2034-
/// and a reference to an actor.
2035-
using DistributedAccessorSignature = AsyncSignature<void(void *, HeapObject *),
2036-
/*throws=*/true>;
2033+
/// Accessor takes:
2034+
/// - an async context
2035+
/// - an argument buffer as a raw pointer
2036+
/// - a result buffer as a raw pointer
2037+
/// - a reference to an actor to execute method on.
2038+
using DistributedAccessorSignature =
2039+
AsyncSignature<void(void *, void *, HeapObject *),
2040+
/*throws=*/true>;
20372041

20382042
SWIFT_CC(swiftasync)
20392043
static DistributedAccessorSignature::ContinuationType
@@ -2073,6 +2077,5 @@ void ::swift_distributed_execute_target(
20732077
calleeContext->ResumeParent = reinterpret_cast<TaskContinuationFunction *>(
20742078
swift_distributed_execute_target_resume);
20752079

2076-
// TODO: Add resultBuffer as an argument to store an indirect result into.
2077-
accessorEntry(calleeContext, argumentBuffer, actor);
2080+
accessorEntry(calleeContext, argumentBuffer, resultBuffer, actor);
20782081
}

test/IRGen/distributed_actor_accessors.swift

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ public distributed actor MyOtherActor {
139139

140140
// CHECK: define hidden swifttailcc void @"$s27distributed_actor_accessors7MyActorC7simple1yySiFTE"
141141

142-
// CHECK: define internal swifttailcc void @"$s27distributed_actor_accessors7MyActorC7simple1yySiFTETF"(%swift.context* swiftasync %0, i8* %1, %swift.refcounted* swiftself %2)
142+
// CHECK: define internal swifttailcc void @"$s27distributed_actor_accessors7MyActorC7simple1yySiFTETF"(%swift.context* swiftasync %0, i8* %1, i8* %2, %swift.refcounted* swiftself %3)
143143

144144
/// Read the current offset and cast an element to `Int`
145145

@@ -192,14 +192,22 @@ public distributed actor MyOtherActor {
192192
// CHECK: [[STR_STRUCT:%.*]] = insertvalue { i64, %swift.bridge* } {{.*}}, %swift.bridge* {{.*}}, 1
193193
// CHECK: [[STR_SIZE:%.*]] = extractvalue { i64, %swift.bridge* } [[STR_STRUCT]], 0
194194
// CHECK-NEXT: [[STR_VAL:%.*]] = extractvalue { i64, %swift.bridge* } [[STR_STRUCT]], 1
195-
// CHECK: {{.*}} = call i1 (i8*, i1, ...) @llvm.coro.end.async({{.*}}, %swift.context* {{.*}}, i64 [[STR_SIZE]], %swift.bridge* [[STR_VAL]], %swift.error* {{.*}})
195+
196+
/// Initialize the result buffer with values produced by the thunk
197+
198+
// CHECK: store i64 [[STR_SIZE]], i64* %._guts._object._countAndFlagsBits._value, align 8
199+
// CHECK: store %swift.bridge* [[STR_VAL]], %swift.bridge** %._guts._object._object, align 8
200+
201+
// CHECK: {{.*}} = call i1 (i8*, i1, ...) @llvm.coro.end.async({{.*}}, %swift.context* {{.*}}, %swift.error* {{.*}})
196202

197203
/// ---> Thunk and distributed method accessor for `simple3`
198204

199205
// CHECK: define hidden swifttailcc void @"$s27distributed_actor_accessors7MyActorC7simple3ySiSSFTE"
200206

201207
/// !!! in `simple3` interesting bits are: argument value extraction (because string is exploded into N arguments) and call to distributed thunk
202-
// CHECK: define internal swifttailcc void @"$s27distributed_actor_accessors7MyActorC7simple3ySiSSFTETF"
208+
// CHECK: define internal swifttailcc void @"$s27distributed_actor_accessors7MyActorC7simple3ySiSSFTETF"(%swift.context* swiftasync {{.*}}, i8* [[ARG_BUFF:%.*]], i8* [[RESULT_BUFF:%.*]], %swift.refcounted* swiftself {{.*}})
209+
210+
// CHECK: [[TYPED_RESULT_BUFF:%.*]] = bitcast i8* [[RESULT_BUFF]] to %TSi*
203211

204212
// CHECK: %argval = load %TSS, %TSS* {{.*}}, align 8
205213
// CHECK: [[NATIVE_STR_PTR:%.*]] = bitcast %TSS* %argval.coercion.coerced to { i64, %swift.bridge* }*
@@ -220,13 +228,15 @@ public distributed actor MyOtherActor {
220228
// CHECK-NEXT: [[TASK_REF:%.*]] = extractvalue { i8*, i64, %swift.error* } [[THUNK_RESULT]], 0
221229
// CHECK-NEXT: {{.*}} = call i8* @__swift_async_resume_project_context(i8* [[TASK_REF]])
222230
// CHECK: [[INT_RES:%.*]] = extractvalue { i8*, i64, %swift.error* } [[THUNK_RESULT]], 1
223-
// CHECK: {{.*}} = call i1 (i8*, i1, ...) @llvm.coro.end.async({{.*}}, %swift.context* {{.*}}, i64 [[INT_RES]], %swift.error* {{.*}})
231+
// CHECK: %._value = getelementptr inbounds %TSi, %TSi* [[TYPED_RESULT_BUFF]], i32 0, i32 0
232+
// CHECK: store i64 [[INT_RES]], i64* %._value, align 8
233+
// CHECK: {{.*}} = call i1 (i8*, i1, ...) @llvm.coro.end.async({{.*}}, %swift.context* {{.*}}, %swift.error* {{.*}})
224234

225235
/// --> Thunk and distributed method accessor for `single_case_enum`
226236

227237
// CHECK: define hidden swifttailcc void @"$s27distributed_actor_accessors7MyActorC16single_case_enumyAA7SimpleEOAFFTE"
228238

229-
// CHECK: define internal swifttailcc void @"$s27distributed_actor_accessors7MyActorC16single_case_enumyAA7SimpleEOAFFTETF"(%swift.context* swiftasync %0, i8* [[BUFFER:%.*]], %swift.refcounted* swiftself %2)
239+
// CHECK: define internal swifttailcc void @"$s27distributed_actor_accessors7MyActorC16single_case_enumyAA7SimpleEOAFFTETF"(%swift.context* swiftasync %0, i8* [[BUFFER:%.*]], i8* [[RESULT_BUFF:%.*]], %swift.refcounted* swiftself {{.*}})
230240

231241
/// First, let's check that there were no loads from the argument buffer and no stores to "current offset".
232242

@@ -251,6 +261,7 @@ public distributed actor MyOtherActor {
251261

252262
/// First, Load both arguments from the buffer.
253263

264+
// CHECK: [[TYPED_RESULT_BUFF:%.*]] = bitcast i8* %2 to %T27distributed_actor_accessors9IndirectEO*
254265
// CHECK: store i8* %1, i8** %offset, align 8
255266
// CHECK-NEXT: %elt_offset = load i8*, i8** %offset, align 8
256267
// CHECK-NEXT: [[ENUM_PTR:%.*]] = bitcast i8* %elt_offset to %T27distributed_actor_accessors9IndirectEO*
@@ -261,7 +272,7 @@ public distributed actor MyOtherActor {
261272
// CHECK-NEXT: [[NATIVE_ENUM_VAL:%.*]] = load i64, i64* [[COERCED_ENUM_PTR]], align 8
262273
// CHECK: [[ENUM_PTR_INT:%.*]] = ptrtoint %T27distributed_actor_accessors9IndirectEO* [[ENUM_PTR]] to i64
263274
// CHECK-NEXT: [[NEXT_ELT_LOC:%.*]] = add i64 [[ENUM_PTR_INT]], 8
264-
// CHECK-NEXT: [[NEXT_ELT_PTR:%.*]] = inttoptr i64 %13 to i8*
275+
// CHECK-NEXT: [[NEXT_ELT_PTR:%.*]] = inttoptr i64 [[NEXT_ELT_LOC]] to i8*
265276
// CHECK-NEXT: store i8* [[NEXT_ELT_PTR]], i8** %offset, align 8
266277
// CHECK-NEXT: %elt_offset1 = load i8*, i8** %offset, align 8
267278
// CHECK-NEXT: [[INT_PTR:%.*]] = bitcast i8* %elt_offset1 to %TSi*
@@ -277,16 +288,22 @@ public distributed actor MyOtherActor {
277288
// CHECK-NEXT: [[TASK_REF:%.*]] = extractvalue { i8*, i64, %swift.error* } [[THUNK_RESULT]], 0
278289
// CHECK-NEXT: {{.*}} = call i8* @__swift_async_resume_project_context(i8* [[TASK_REF]])
279290
// CHECK: [[ENUM_RESULT:%.*]] = extractvalue { i8*, i64, %swift.error* } [[THUNK_RESULT]], 1
280-
// CHECK: {{.*}} = call i1 (i8*, i1, ...) @llvm.coro.end.async({{.*}}, %swift.context* {{.*}}, i64 [[ENUM_RESULT]], %swift.error* {{.*}})
291+
// CHECK: [[NATIVE_RESULT_PTR:%.*]] = bitcast %T27distributed_actor_accessors9IndirectEO* [[TYPED_RESULT_BUFF]] to i64*
292+
// CHECK-NEXT: store i64 [[ENUM_RESULT]], i64* [[NATIVE_RESULT_PTR]], align 8
293+
294+
// CHECK: {{.*}} = call i1 (i8*, i1, ...) @llvm.coro.end.async({{.*}}, %swift.context* {{.*}}, %swift.error* {{.*}})
281295

282296
/// ---> Thunk and distributed method for `complex`
283297

284298
// CHECK: define hidden swifttailcc void @"$s27distributed_actor_accessors7MyActorC7complexyAA11LargeStructVSaySiG_AA3ObjCSSSgAFtFTE"
285299

286-
// CHECK: define internal swifttailcc void @"$s27distributed_actor_accessors7MyActorC7complexyAA11LargeStructVSaySiG_AA3ObjCSSSgAFtFTETF"(%T27distributed_actor_accessors11LargeStructV* noalias nocapture [[INDIRECT_RES:%.*]], %swift.context* swiftasync %1, i8* %2, %swift.refcounted* swiftself %3)
300+
// CHECK: define internal swifttailcc void @"$s27distributed_actor_accessors7MyActorC7complexyAA11LargeStructVSaySiG_AA3ObjCSSSgAFtFTETF"(%swift.context* swiftasync {{.*}}, i8* [[ARG_BUFF:%.*]], i8* [[RESULT_BUFF:%.*]], %swift.refcounted* swiftself {{.*}})
287301

288302
/// First, let's check that all of the different argument types here are loaded correctly.
289303

304+
/// Cast result buffer to the expected result type (in this case its indirect opaque pointer)
305+
// CHECK: [[TYPED_RESULT_BUFF:%.*]] = bitcast i8* [[RESULT_BUFF]] to %swift.opaque*
306+
290307
/// -> [Int]
291308

292309
// CHECK: %elt_offset = load i8*, i8** %offset, align 8
@@ -307,7 +324,7 @@ public distributed actor MyOtherActor {
307324
// CHECK-NEXT: [[NATIVE_OBJ_VAL:%.*]] = load %T27distributed_actor_accessors3ObjC*, %T27distributed_actor_accessors3ObjC** [[OBJ_PTR]], align 8
308325
// CHECK-NEXT: [[OBJ_PTR_INT:%.*]] = ptrtoint %T27distributed_actor_accessors3ObjC** [[OBJ_PTR]] to i64
309326
// CHECK-NEXT: [[NEXT_ELT:%.*]] = add i64 [[OBJ_PTR_INT]], 8
310-
// CHECK-NEXT: [[NEXT_ELT_PTR:%.*]] = inttoptr i64 %18 to i8*
327+
// CHECK-NEXT: [[NEXT_ELT_PTR:%.*]] = inttoptr i64 [[NEXT_ELT]] to i8*
311328
// CHECK-NEXT: store i8* [[NEXT_ELT_PTR]], i8** %offset, align 8
312329

313330
/// -> String?
@@ -330,13 +347,16 @@ public distributed actor MyOtherActor {
330347
// CHECK-NEXT: %elt_offset5 = load i8*, i8** %offset, align 8
331348
// CHECK-NEXT: [[STRUCT_PTR:%.*]] = bitcast i8* %elt_offset5 to %T27distributed_actor_accessors11LargeStructV*
332349
// CHECK-NEXT: [[STRUCT_VAL:%.*]] = load %T27distributed_actor_accessors11LargeStructV, %T27distributed_actor_accessors11LargeStructV* [[STRUCT_PTR]], align 8
350+
351+
// CHECK: [[INDIRECT_RESULT_BUFF:%.*]] = bitcast %swift.opaque* [[TYPED_RESULT_BUFF]] to %T27distributed_actor_accessors11LargeStructV*
352+
333353
// CHECK: store %T27distributed_actor_accessors11LargeStructV [[STRUCT_VAL]], %T27distributed_actor_accessors11LargeStructV* %argval6.coercion.coerced, align 8
334354
// CHECK-NEXT: [[PTR_TO_STRUCT:%.*]] = bitcast %T27distributed_actor_accessors11LargeStructV* %argval6.coercion.coerced to %T27distributed_actor_accessors11LargeStructV**
335355
// CHECK-NEXT: [[NATIVE_STRUCT_VAL:%.*]] = load %T27distributed_actor_accessors11LargeStructV*, %T27distributed_actor_accessors11LargeStructV** [[PTR_TO_STRUCT]], align 8
336356

337357
/// Now let's make sure that distributed thunk call uses the arguments correctly
338358

339-
// CHECK: [[THUNK_RESULT:%.*]] = call { i8*, %swift.error* } (i32, i8*, i8*, ...) @llvm.coro.suspend.async.sl_p0i8p0s_swift.errorss({{.*}}, %T27distributed_actor_accessors11LargeStructV* [[INDIRECT_RES]], %swift.context* {{.*}}, %swift.bridge* [[NATIVE_ARR_VAL]], %T27distributed_actor_accessors3ObjC* [[NATIVE_OBJ_VAL]], i64 [[NATIVE_OPT_VAL_0]], i64 [[NATIVE_OPT_VAL_1]], %T27distributed_actor_accessors11LargeStructV* [[NATIVE_STRUCT_VAL]], %T27distributed_actor_accessors7MyActorC* {{.*}})
359+
// CHECK: [[THUNK_RESULT:%.*]] = call { i8*, %swift.error* } (i32, i8*, i8*, ...) @llvm.coro.suspend.async.sl_p0i8p0s_swift.errorss({{.*}}, %T27distributed_actor_accessors11LargeStructV* [[INDIRECT_RESULT_BUFF]], %swift.context* {{.*}}, %swift.bridge* [[NATIVE_ARR_VAL]], %T27distributed_actor_accessors3ObjC* [[NATIVE_OBJ_VAL]], i64 [[NATIVE_OPT_VAL_0]], i64 [[NATIVE_OPT_VAL_1]], %T27distributed_actor_accessors11LargeStructV* [[NATIVE_STRUCT_VAL]], %T27distributed_actor_accessors7MyActorC* {{.*}})
340360

341361
/// RESULT is returned indirectly so there is nothing to pass to `end`
342362

@@ -346,12 +366,13 @@ public distributed actor MyOtherActor {
346366

347367
/// Let's check that there is no offset allocation here since parameter list is empty
348368

349-
// CHECK: define internal swifttailcc void @"$s27distributed_actor_accessors12MyOtherActorC5emptyyyFTETF"
369+
// CHECK: define internal swifttailcc void @"$s27distributed_actor_accessors12MyOtherActorC5emptyyyFTETF"(%swift.context* swiftasync {{.*}}, i8* [[ARG_BUFF:%.*]], i8* [[RESULT_BUFF:%.*]], %swift.refcounted* swiftself {{.*}})
350370
// CHECK-NEXT: entry:
351371
// CHECK-NEXT: {{.*}} = alloca %swift.context*, align 8
352372
// CHECK-NEXT: %swifterror = alloca swifterror %swift.error*, align 8
353373
// CHECK-NEXT: {{.*}} = call token @llvm.coro.id.async(i32 20, i32 16, i32 0, i8* bitcast (%swift.async_func_pointer* @"$s27distributed_actor_accessors12MyOtherActorC5emptyyyFTETFTu" to i8*))
354-
// CHECK-NEXT: {{.*}} = call i8* @llvm.coro.begin(token %4, i8* null)
374+
// CHECK-NEXT: {{.*}} = call i8* @llvm.coro.begin(token {{%.*}}, i8* null)
355375
// CHECK-NEXT: store %swift.context* {{.*}}, %swift.context** {{.*}}, align 8
356376
// CHECK-NEXT: store %swift.error* null, %swift.error** %swifterror, align 8
377+
// CHECK-NEXT: {{.*}} = bitcast i8* [[RESULT_BUFF]] to %swift.opaque*
357378
// CHECK-NEXT: {{.*}} = load i32, i32* getelementptr inbounds (%swift.async_func_pointer, %swift.async_func_pointer* bitcast (void (%swift.context*, %T27distributed_actor_accessors12MyOtherActorC*)* @"$s27distributed_actor_accessors12MyOtherActorC5emptyyyFTE" to %swift.async_func_pointer*), i32 0, i32 0), align 8

0 commit comments

Comments
 (0)