-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Azure Blob storage: pooled reads, streaming serializer, buffered writes #9879
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,6 +11,23 @@ | |
|
|
||
| namespace Orleans.Configuration | ||
| { | ||
| public enum AzureBlobStorageWriteMode | ||
| { | ||
| /// <summary> | ||
| /// Serialize to <see cref="BinaryData"/> and upload. | ||
| /// This uses the <see cref="IGrainStorageSerializer"/> binary path, materializing the full payload in memory. | ||
| /// It is typically the fastest path but can create large allocations (including LOH) for big payloads. | ||
| /// </summary> | ||
| BinaryData, | ||
|
|
||
| /// <summary> | ||
| /// Serialize using the stream serializer into a pooled in-memory stream and upload from that buffer. | ||
| /// This still buffers the full payload but avoids LOH churn by reusing pooled segments. | ||
| /// Requires <see cref="IGrainStorageStreamingSerializer"/>; otherwise the write falls back to <see cref="BinaryData"/>. | ||
| /// </summary> | ||
| BufferedStream, | ||
| } | ||
|
|
||
| public class AzureBlobStorageOptions : IStorageProviderSerializerOptions | ||
| { | ||
| private BlobServiceClient _blobServiceClient; | ||
|
|
@@ -57,6 +74,20 @@ public BlobServiceClient BlobServiceClient | |
| /// </summary> | ||
| public bool DeleteStateOnClear { get; set; } = true; | ||
|
|
||
| /// <summary> | ||
| /// Gets or sets a value indicating whether to use pooled buffers when reading blob contents. | ||
| /// The deserializer must not retain the <see cref="BinaryData"/> or underlying buffer after deserialization. | ||
| /// When a stream serializer is configured, pooled buffers are used only if the content length fits in an <see cref="int"/>. | ||
| /// When pooled buffers are used, deserialization goes through the <see cref="IGrainStorageSerializer"/> binary path. | ||
| /// </summary> | ||
| public bool UsePooledBufferForReads { get; set; } = true; | ||
|
|
||
| /// <summary> | ||
| /// Gets or sets the write path to use when a stream serializer is available. | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. write path -> write mode |
||
| /// If the stream serializer is not configured, writes always use <see cref="BinaryData"/>. | ||
| /// </summary> | ||
| public AzureBlobStorageWriteMode WriteMode { get; set; } = AzureBlobStorageWriteMode.BinaryData; | ||
|
|
||
| /// <summary> | ||
| /// A function for building container factory instances | ||
| /// </summary> | ||
|
|
@@ -149,7 +180,7 @@ public void ValidateConfiguration() | |
| AzureBlobUtils.ValidateContainerName(options.ContainerName); | ||
| AzureBlobUtils.ValidateBlobName(this.name); | ||
| } | ||
| catch(ArgumentException e) | ||
| catch (ArgumentException e) | ||
| { | ||
| throw new OrleansConfigurationException( | ||
| $"Configuration for AzureBlobStorageOptions {name} is invalid. {nameof(this.options.ContainerName)} is not valid", e); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,7 @@ | ||
| using System; | ||
| using System.IO; | ||
| using System.Threading; | ||
| using System.Threading.Tasks; | ||
| using Microsoft.Extensions.DependencyInjection; | ||
| using Microsoft.Extensions.Options; | ||
| using Orleans.Runtime; | ||
|
|
@@ -27,6 +30,32 @@ public interface IGrainStorageSerializer | |
| T Deserialize<T>(BinaryData input); | ||
| } | ||
|
|
||
| #nullable enable | ||
| /// <summary> | ||
| /// Optional stream-based serializer for grain state. | ||
| /// </summary> | ||
| public interface IGrainStorageStreamingSerializer | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ReubenBond said on Discord he prefers something like this for the abstraction. /// <summary>
/// Optional stream-based serializer for grain state.
/// </summary>
public interface IGrainStorageStreamingSerializer
{
/// <summary>
/// Serializes the object input to a stream.
/// </summary>
/// <param name="input">The object to serialize.</param>
/// <param name="destination">The destination buffer writer.</param>
/// <param name="cancellationToken">The cancellation token.</param>
/// <typeparam name="T">The input type.</typeparam>
ValueTask SerializeAsync<T>(T input, IBufferWriter<byte> destination, CancellationToken cancellationToken = default);
/// <summary>
/// Deserializes the provided data from a stream.
/// </summary>
/// <param name="input">The input byte sequence.</param>
/// <param name="cancellationToken">The cancellation token.</param>
/// <typeparam name="T">The output type.</typeparam>
/// <returns>The deserialized object, or null.</returns>
ValueTask<T?> DeserializeAsync<T>(ReadOnlySequence<byte> input, CancellationToken cancellationToken = default);
}For the data providers where
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For serialization, we can use ready build adapters in Orleans: public static class GrainStorageStreamingSerializerExtensions
{
/// <summary>
/// Serializes the object input to a stream.
/// </summary>
/// <param name="input">The object to serialize.</param>
/// <param name="destination">The destination stream.</param>
/// <param name="cancellationToken">The cancellation token.</param>
/// <typeparam name="T">The input type.</typeparam>
public static ValueTask SerializeAsync<T>(this IGrainStorageStreamingSerializer serializer, T input, Stream destination, CancellationToken cancellationToken = default)
{
ArgumentNullException.ThrowIfNull(serializer);
ArgumentNullException.ThrowIfNull(destination);
if (destination is MemoryStream memoryStream)
{
return serializer.SerializeAsync(input, new MemoryStreamBufferWriter(memoryStream), cancellationToken);
}
else
{
return serializer.SerializeAsync(input, new ArrayStreamBufferWriter(destination), cancellationToken);
}
}
}For There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The naming feels off, now you've switched from stream, maybe change to My gut is telling me, drop the ValueTask, by this point all IO should be done, and Task/Await is just overhead 99% of the time, unless I am missing why you would need...
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I hope IO is not done at this point, no, since that means data has been loaded into memory, which was the main problem I was trying to avoid. With large datasets, e.g., blobs, that leads to more GC churn. So there need to be support for streaming data from blob storage to the serializer and then to objects. Calling the interface There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was thinking about the IO and buffering in the serializer implementations ( Note: overhead here is tiny — pure micro-optimization territory! The overall solution I was getting at earlier on Discord before I dropped off 😅 has been implemented at the calling level (in the provider) rather than inside the serializer. // In AzureBlobGrainStorage.UploadSerializedStateBufferedAsync<T>
var bufferStream = PooledBufferStream.Rent();
try
{
// Serialize: sync write to the pooled stream (no real await needed inside most impls)
await streamSerializer.SerializeAsync(value, bufferStream).ConfigureAwait(false);
bufferStream.Position = 0;
// Actual IO: upload from the pooled stream
return await blob.UploadAsync(bufferStream, options).ConfigureAwait(false);
}
finally
{
PooledBufferStream.Return(bufferStream);
}In JsonGrainStorageSerializer.SerializeAsync (similar for Orleans serializer): public ValueTask SerializeAsync<T>(T value, Stream destination, CancellationToken ct = default)
{
ct.ThrowIfCancellationRequested();
_orleansJsonSerializer.Serialize(value, typeof(T), destination); // sync write
return ValueTask.CompletedTask; // no suspension
}The await on serialize is still basically zero-cost (completed ValueTask), but unnecessary.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ahh I see. If there is no need for asynchrony in the serializer implementations, then dropping ValueTask is fine. Looks like there are no asynchronous methods on ReadOnlySequence nor on IBufferWriter, so probably a good indicator ValueTask is not needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep 😁 |
||
| { | ||
| /// <summary> | ||
| /// Serializes the object input to a stream. | ||
| /// </summary> | ||
| /// <param name="input">The object to serialize.</param> | ||
| /// <param name="destination">The destination stream.</param> | ||
| /// <param name="cancellationToken">The cancellation token.</param> | ||
| /// <typeparam name="T">The input type.</typeparam> | ||
| ValueTask SerializeAsync<T>(T input, Stream destination, CancellationToken cancellationToken = default); | ||
|
|
||
| /// <summary> | ||
| /// Deserializes the provided data from a stream. | ||
| /// </summary> | ||
| /// <param name="input">The input stream.</param> | ||
| /// <param name="cancellationToken">The cancellation token.</param> | ||
| /// <typeparam name="T">The output type.</typeparam> | ||
| /// <returns>The deserialized object.</returns> | ||
| ValueTask<T?> DeserializeAsync<T>(Stream input, CancellationToken cancellationToken = default); | ||
| } | ||
| #nullable restore | ||
|
|
||
| /// <summary> | ||
| /// Extensions for <see cref="IGrainStorageSerializer"/>. | ||
| /// </summary> | ||
|
|
@@ -76,7 +105,7 @@ public void PostConfigure(string name, TOptions options) | |
| { | ||
| if (options.GrainStorageSerializer == default) | ||
| { | ||
| // First, try to get a IGrainStorageSerializer that was registered with | ||
| // First, try to get a IGrainStorageSerializer that was registered with | ||
| // the same name as the storage provider | ||
| // If none is found, fallback to system wide default | ||
| options.GrainStorageSerializer = _serviceProvider.GetKeyedService<IGrainStorageSerializer>(name) ?? _serviceProvider.GetRequiredService<IGrainStorageSerializer>(); | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.