Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 19 additions & 10 deletions Docs/durable-execution-design.md
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ var paymentResult = await context.InvokeAsync<PaymentRequest, PaymentResult>(
name: "process_payment",
config: new InvokeConfig
{
Timeout = TimeSpan.FromMinutes(5)
TenantId = "tenant-42"
});
```

Expand Down Expand Up @@ -1294,11 +1294,6 @@ public class WaitForCallbackConfig : CallbackConfig
/// </summary>
public class InvokeConfig
{
/// <summary>
/// Maximum time to wait for the invoked function. Default (TimeSpan.Zero) means no timeout.
/// </summary>
public TimeSpan Timeout { get; set; } = TimeSpan.Zero;
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

other sdks dont have this. this was a hallucination


/// <summary>
/// Optional tenant identifier propagated to the chained invocation.
/// Matches the tenantId field on Python/JS/Java InvokeConfig.
Expand Down Expand Up @@ -1627,15 +1622,29 @@ public class CallbackTimeoutException : CallbackException { }
public class CallbackSubmitterException : CallbackException { }

/// <summary>
/// Thrown when an invoked function fails.
/// Base exception for chained-invoke failures. Catch <c>InvokeException</c>
/// to handle every non-success terminal state uniformly, or pattern-match the
/// concrete subclasses (<c>InvokeFailedException</c>, <c>InvokeTimedOutException</c>,
/// <c>InvokeStoppedException</c>) to react differently to specific outcomes.
/// Mirrors the Java SDK's invoke exception tree.
/// </summary>
public class InvokeException : DurableExecutionException
{
public string? FunctionName { get; }
public string? ErrorType { get; }
public string? ErrorData { get; }
public string? FunctionName { get; init; }
public string? ErrorType { get; init; }
public string? ErrorData { get; init; }
public IReadOnlyList<string>? OriginalStackTrace { get; init; }
}

/// <summary>The chained function ran and threw.</summary>
public class InvokeFailedException : InvokeException { }

/// <summary>The chained invocation reached the service-side TIMED_OUT terminal state.</summary>
public class InvokeTimedOutException : InvokeException { }

/// <summary>The chained execution was stopped by the service before reaching a normal terminal state.</summary>
public class InvokeStoppedException : InvokeException { }

/// <summary>
/// Thrown when a child context operation fails.
/// </summary>
Expand Down
38 changes: 38 additions & 0 deletions Libraries/src/Amazon.Lambda.DurableExecution/DurableContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,44 @@ private static bool IsCallbackErrorTypeString(string? errorType) =>
|| errorType == typeof(CallbackTimeoutException).FullName
|| errorType == typeof(CallbackSubmitterException).FullName
|| errorType == typeof(CallbackException).FullName;

public Task<TResult> InvokeAsync<TPayload, TResult>(
string functionName,
TPayload payload,
string? name = null,
InvokeConfig? config = null,
CancellationToken cancellationToken = default)
=> RunInvoke<TPayload, TResult>(
functionName, payload,
name, config, cancellationToken);

private Task<TResult> RunInvoke<TPayload, TResult>(
string functionName,
TPayload payload,
string? name,
InvokeConfig? config,
CancellationToken cancellationToken)
{
// Argument validation runs synchronously at the call site (matches the
// .NET convention of failing fast for misuse). Match Python/JS/Java
// parity: only check for null/empty here; the durable execution service
// enforces the qualified-ARN rule and surfaces a precise error when an
// unqualified identifier is used.
ArgumentNullException.ThrowIfNull(functionName);
if (string.IsNullOrWhiteSpace(functionName))
throw new ArgumentException("Function name must not be empty or whitespace.", nameof(functionName));

var serializer = LambdaSerializerHelper.GetRequired(LambdaContext);

cancellationToken.ThrowIfCancellationRequested();

var operationId = _idGenerator.NextId();
var op = new InvokeOperation<TPayload, TResult>(
operationId, name, _idGenerator.ParentId, functionName, payload, config,
serializer,
_state, _terminationManager, _durableExecutionArn, _batcher);
return op.ExecuteAsync(cancellationToken);
}
}

internal sealed class WaitForCallbackContext : IWaitForCallbackContext
Expand Down
50 changes: 46 additions & 4 deletions Libraries/src/Amazon.Lambda.DurableExecution/ErrorObject.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,55 @@ public sealed class ErrorObject
/// <summary>
/// Creates an ErrorObject from an exception.
/// </summary>
/// <remarks>
/// SDK operation wrappers (<see cref="StepException"/>,
/// <see cref="ChildContextException"/>, <see cref="InvokeException"/>,
/// <see cref="CallbackException"/>) unwrap to the original error captured
/// from the failed operation — preserving the user-visible
/// <c>ErrorType</c>/<c>ErrorData</c>/<c>StackTrace</c> instead of recording
/// the wrapper's type. This way a chained invoker sees the originating
/// exception (e.g. <c>System.InvalidOperationException</c>) rather than
/// <c>Amazon.Lambda.DurableExecution.StepException</c>. Mirrors the Java
/// SDK's <c>DurableExecutor.buildErrorObject</c> behavior.
/// </remarks>
public static ErrorObject FromException(Exception exception)
{
return new ErrorObject
return exception switch
{
ErrorType = exception.GetType().FullName,
ErrorMessage = exception.Message,
StackTrace = exception.StackTrace?.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries)
StepException step => new ErrorObject
{
ErrorType = step.ErrorType,
ErrorMessage = step.Message,
StackTrace = step.OriginalStackTrace,
ErrorData = step.ErrorData
},
ChildContextException child => new ErrorObject
{
ErrorType = child.ErrorType,
ErrorMessage = child.Message,
StackTrace = child.OriginalStackTrace,
ErrorData = child.ErrorData
},
InvokeException invoke => new ErrorObject
{
ErrorType = invoke.ErrorType,
ErrorMessage = invoke.Message,
StackTrace = invoke.OriginalStackTrace,
ErrorData = invoke.ErrorData
},
CallbackException callback => new ErrorObject
{
ErrorType = callback.ErrorType,
ErrorMessage = callback.Message,
StackTrace = callback.OriginalStackTrace,
ErrorData = callback.ErrorData
},
_ => new ErrorObject
{
ErrorType = exception.GetType().FullName,
ErrorMessage = exception.Message,
StackTrace = exception.StackTrace?.Split(new[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries)
}
};
}
}
19 changes: 19 additions & 0 deletions Libraries/src/Amazon.Lambda.DurableExecution/IDurableContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,25 @@ Task<T> WaitForCallbackAsync<T>(
string? name = null,
WaitForCallbackConfig? config = null,
CancellationToken cancellationToken = default);

/// <summary>
/// Invoke another durable Lambda function and await its result. The
/// invocation is checkpointed so it survives parent failures and is not
/// double-fired on replay. The payload and result are serialized to/from
/// a checkpoint using the <see cref="ILambdaSerializer"/> registered on
/// <see cref="ILambdaContext.Serializer"/>.
/// </summary>
/// <remarks>
/// <paramref name="functionName"/> must be a qualified identifier (version,
/// alias, or <c>$LATEST</c>); unqualified ARNs are rejected by the durable
/// execution service.
/// </remarks>
Task<TResult> InvokeAsync<TPayload, TResult>(
string functionName,
TPayload payload,
string? name = null,
InvokeConfig? config = null,
CancellationToken cancellationToken = default);
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,5 +146,6 @@ private static bool IsTerminalStatus(string? status) =>
status == OperationStatuses.Succeeded
|| status == OperationStatuses.Failed
|| status == OperationStatuses.Cancelled
|| status == OperationStatuses.Stopped;
|| status == OperationStatuses.Stopped
|| status == OperationStatuses.TimedOut;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
using System.IO;
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add license header

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#2398 added in this separate PR instead

using System.Text;
using Amazon.Lambda.Core;
using SdkChainedInvokeOptions = Amazon.Lambda.Model.ChainedInvokeOptions;
using SdkOperationUpdate = Amazon.Lambda.Model.OperationUpdate;

namespace Amazon.Lambda.DurableExecution.Internal;

/// <summary>
/// Durable chained-invoke operation. Schedules an asynchronous invocation of
/// another durable Lambda function via the durable execution service and
/// suspends the parent workflow until the chained execution reaches a terminal
/// state. The service drives the chained function and re-invokes the parent
/// with an updated operation status.
/// </summary>
/// <remarks>
/// Replay branches — example:
/// <c>await ctx.InvokeAsync&lt;Req, Resp&gt;("arn:...:fn:prod", req, "process_payment")</c>
/// <list type="bullet">
/// <item><b>Fresh</b>: serialize payload → sync-flush <c>CHAINED_INVOKE START</c>
/// (carrying <see cref="SdkChainedInvokeOptions"/>) → suspend with
/// <see cref="TerminationReason.InvokePending"/>.</item>
/// <item><b>SUCCEEDED</b>: deserialize and return cached result from
/// <c>ChainedInvokeDetails.Result</c>; the chained function is NOT
/// re-invoked.</item>
/// <item><b>FAILED</b>: throw <see cref="InvokeFailedException"/> populated
/// from the recorded error.</item>
/// <item><b>TIMED_OUT</b>: throw <see cref="InvokeTimedOutException"/>.</item>
/// <item><b>STOPPED</b>: throw <see cref="InvokeStoppedException"/>.</item>
/// <item><b>STARTED</b> / <b>PENDING</b>: chained execution is still in
/// flight; re-suspend without re-checkpointing — the original
/// <c>START</c> remains authoritative.</item>
/// </list>
/// Mirrors <see cref="WaitOperation"/>'s "sync-flush START → suspend" idiom;
/// the chained function executes out-of-process so there is nothing to run
/// locally on either fresh or replay paths besides the suspend wiring.
/// Serialization is delegated to the <see cref="ILambdaSerializer"/> registered
/// on <see cref="ILambdaContext.Serializer"/>; AOT-safe and reflection-based
/// callers share the same code path (the AOT story is determined by the
/// registered serializer).
/// </remarks>
internal sealed class InvokeOperation<TPayload, TResult> : DurableOperation<TResult>
{
private readonly string _functionName;
private readonly TPayload _payload;
private readonly InvokeConfig? _config;
private readonly ILambdaSerializer _serializer;

public InvokeOperation(
string operationId,
string? name,
string? parentId,
string functionName,
TPayload payload,
InvokeConfig? config,
ILambdaSerializer serializer,
ExecutionState state,
TerminationManager termination,
string durableExecutionArn,
CheckpointBatcher? batcher = null)
: base(operationId, name, parentId, state, termination, durableExecutionArn, batcher)
{
_functionName = functionName;
_payload = payload;
_config = config;
_serializer = serializer;
}

protected override string OperationType => OperationTypes.ChainedInvoke;

protected override async Task<TResult> StartAsync(CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();

var serializedPayload = SerializeValue(_payload);

// The service is what actually invokes the chained function, so it
// must receive this START before we suspend. If we only batched it
// locally and the parent process were recycled at suspend, the START
// would be lost and the chained function would never run.
await EnqueueAsync(new SdkOperationUpdate
{
Id = OperationId,
ParentId = ParentId,
Type = OperationTypes.ChainedInvoke,
Action = OperationAction.START,
SubType = OperationSubTypes.ChainedInvoke,
Name = Name,
Payload = serializedPayload,
ChainedInvokeOptions = new SdkChainedInvokeOptions
{
FunctionName = _functionName,
TenantId = _config?.TenantId
}
}, cancellationToken);

return await Termination.SuspendAndAwait<TResult>(
TerminationReason.InvokePending, $"invoke:{Name ?? _functionName}");
}

protected override Task<TResult> ReplayAsync(Operation existing, CancellationToken cancellationToken)
{
switch (existing.Status)
{
case OperationStatuses.Succeeded:
return Task.FromResult(DeserializeResult(existing.ChainedInvokeDetails?.Result));

case OperationStatuses.Failed:
throw BuildFailed(existing);

case OperationStatuses.TimedOut:
throw BuildTimedOut(existing);

case OperationStatuses.Stopped:
throw BuildStopped(existing);

case OperationStatuses.Started:
case OperationStatuses.Pending:
// Chained function is still running. Just suspend again —
// the original START is already on the service, so don't
// re-checkpoint it. Whenever the service re-invokes us next,
// it will include the updated status.
return Termination.SuspendAndAwait<TResult>(
TerminationReason.InvokePending, $"invoke:{Name ?? _functionName}");

default:
throw new NonDeterministicExecutionException(
$"Chained invoke operation '{Name ?? OperationId}' has unexpected status '{existing.Status}' on replay.");
}
}

private string SerializeValue(TPayload value)
{
using var ms = new MemoryStream();
_serializer.Serialize(value, ms);
return Encoding.UTF8.GetString(ms.ToArray());
}

private TResult DeserializeResult(string? serialized)
{
if (serialized == null) return default!;
var bytes = Encoding.UTF8.GetBytes(serialized);
using var ms = new MemoryStream(bytes);
return _serializer.Deserialize<TResult>(ms);
}

private InvokeFailedException BuildFailed(Operation failedOp)
{
var err = failedOp.ChainedInvokeDetails?.Error;
return new InvokeFailedException(err?.ErrorMessage ?? "Chained invoke failed.")
{
FunctionName = _functionName,
ErrorType = err?.ErrorType,
ErrorData = err?.ErrorData,
OriginalStackTrace = err?.StackTrace
};
}

private InvokeTimedOutException BuildTimedOut(Operation failedOp)
{
var err = failedOp.ChainedInvokeDetails?.Error;
return new InvokeTimedOutException(err?.ErrorMessage ?? "Chained invoke timed out.")
{
FunctionName = _functionName,
ErrorType = err?.ErrorType,
ErrorData = err?.ErrorData,
OriginalStackTrace = err?.StackTrace
};
}

private InvokeStoppedException BuildStopped(Operation failedOp)
{
var err = failedOp.ChainedInvokeDetails?.Error;
return new InvokeStoppedException(err?.ErrorMessage ?? "Chained invoke was stopped.")
{
FunctionName = _functionName,
ErrorType = err?.ErrorType,
ErrorData = err?.ErrorData,
OriginalStackTrace = err?.StackTrace
};
}
}
Loading
Loading