From c276b8aad42a053712cbb6d34ec1551c367652f2 Mon Sep 17 00:00:00 2001 From: nickliu-msft Date: Tue, 10 Mar 2026 15:54:02 -0400 Subject: [PATCH 1/9] initial draft --- .../src/BlobContainerClient.cs | 99 +++++++++++++---- ...erListBlobFlatSegmentApacheArrowHeaders.cs | 24 +++++ .../src/Generated/ContainerRestClient.cs | 101 ++++++++++++++++++ .../Azure.Storage.Blobs/src/autorest.md | 2 +- 4 files changed, 204 insertions(+), 22 deletions(-) create mode 100644 sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerListBlobFlatSegmentApacheArrowHeaders.cs diff --git a/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs b/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs index 4f44b45cb5e7..2f7359b6f32c 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs +++ b/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs @@ -15,6 +15,7 @@ using Azure.Storage.Common; using Azure.Storage.Cryptography; using Azure.Storage.Sas; +using System.Xml.Linq; using Metadata = System.Collections.Generic.IDictionary; #pragma warning disable SA1402 // File may only contain a single type @@ -2719,11 +2720,13 @@ public virtual AsyncPageable GetBlobsAsync( /// containing each failure instance. /// internal async Task> GetBlobsInternal( + bool useArrow, string marker, BlobTraits traits, BlobStates states, string prefix, string startFrom, + string endBefore, int? pageSizeHint, bool async, CancellationToken cancellationToken) @@ -2743,35 +2746,89 @@ internal async Task> GetBlobsInternal( try { scope.Start(); - ResponseWithHeaders response; - if (async) + ListBlobsFlatSegmentResponse listblobFlatResponse; + Response rawResponse; + + if (useArrow) { - response = await ContainerRestClient.ListBlobFlatSegmentAsync( - prefix: prefix, - marker: marker, - maxresults: pageSizeHint, - include: BlobExtensions.AsIncludeItems(traits, states), - startFrom: startFrom, - cancellationToken: cancellationToken) - .ConfigureAwait(false); + ResponseWithHeaders arrowResponse; + + if (async) + { + arrowResponse = await ContainerRestClient.ListBlobFlatSegmentApacheArrowAsync( + prefix: prefix, + marker: marker, + maxresults: pageSizeHint, + include: BlobExtensions.AsIncludeItems(traits, states), + startFrom: startFrom, + endBefore: endBefore, + cancellationToken: cancellationToken) + .ConfigureAwait(false); + } + else + { + arrowResponse = ContainerRestClient.ListBlobFlatSegmentApacheArrow( + prefix: prefix, + marker: marker, + maxresults: pageSizeHint, + include: BlobExtensions.AsIncludeItems(traits, states), + startFrom: startFrom, + endBefore: endBefore, + cancellationToken: cancellationToken); + } + + rawResponse = arrowResponse.GetRawResponse(); + + if (arrowResponse.Headers.ContentType == "application/vnd.apache.arrow.stream") + { + // TODO: Parse Apache Arrow IPC stream into ListBlobsFlatSegmentResponse + throw new NotImplementedException("Apache Arrow response parsing is not yet implemented."); + } + else + { + // XML fallback: server returned XML despite requesting Arrow + listblobFlatResponse = default; + var document = XDocument.Load(arrowResponse.Value, LoadOptions.PreserveWhitespace); + if (document.Element("EnumerationResults") is XElement enumerationResultsElement) + { + listblobFlatResponse = ListBlobsFlatSegmentResponse.DeserializeListBlobsFlatSegmentResponse(enumerationResultsElement); + } + } } else { - response = ContainerRestClient.ListBlobFlatSegment( - prefix: prefix, - marker: marker, - maxresults: pageSizeHint, - include: BlobExtensions.AsIncludeItems(traits, states), - startFrom: startFrom, - cancellationToken: cancellationToken); - } + ResponseWithHeaders response; - ListBlobsFlatSegmentResponse listblobFlatResponse = response.Value; + if (async) + { + response = await ContainerRestClient.ListBlobFlatSegmentAsync( + prefix: prefix, + marker: marker, + maxresults: pageSizeHint, + include: BlobExtensions.AsIncludeItems(traits, states), + startFrom: startFrom, + cancellationToken: cancellationToken) + .ConfigureAwait(false); + } + else + { + response = ContainerRestClient.ListBlobFlatSegment( + prefix: prefix, + marker: marker, + maxresults: pageSizeHint, + include: BlobExtensions.AsIncludeItems(traits, states), + startFrom: startFrom, + cancellationToken: cancellationToken); + } + + listblobFlatResponse = response.Value; + rawResponse = response.GetRawResponse(); + } if ((traits & BlobTraits.Metadata) != BlobTraits.Metadata) { - List blobItemInternals = response.Value.Segment.BlobItems.Select(r => new BlobItemInternal( + List blobItemInternals = listblobFlatResponse.Segment.BlobItems.Select(r => new BlobItemInternal( r.Name, r.Deleted, r.Snapshot, @@ -2787,7 +2844,7 @@ internal async Task> GetBlobsInternal( return Response.FromValue( listblobFlatResponse, - response.GetRawResponse()); + rawResponse); } catch (Exception ex) { diff --git a/sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerListBlobFlatSegmentApacheArrowHeaders.cs b/sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerListBlobFlatSegmentApacheArrowHeaders.cs new file mode 100644 index 000000000000..c7eb897f6cb1 --- /dev/null +++ b/sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerListBlobFlatSegmentApacheArrowHeaders.cs @@ -0,0 +1,24 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// + +#nullable disable + +using Azure.Core; + +namespace Azure.Storage.Blobs +{ + internal partial class ContainerListBlobFlatSegmentApacheArrowHeaders + { + private readonly Response _response; + public ContainerListBlobFlatSegmentApacheArrowHeaders(Response response) + { + _response = response; + } + /// The media type of the response body. Either 'application/vnd.apache.arrow.stream' if Arrow is enabled for the account, or 'application/xml' as a fallback. Clients must check this header to determine how to deserialize the response. + public string ContentType => _response.Headers.TryGetValue("Content-Type", out string value) ? value : null; + /// Indicates the version of the Blob service used to execute the request. This header is returned for requests made against version 2009-09-19 and above. + public string Version => _response.Headers.TryGetValue("x-ms-version", out string value) ? value : null; + } +} diff --git a/sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerRestClient.cs b/sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerRestClient.cs index f067266a5596..0745db09546a 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerRestClient.cs +++ b/sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerRestClient.cs @@ -1332,6 +1332,107 @@ public ResponseWithHeaders ChangeLease(string lease } } + internal HttpMessage CreateListBlobFlatSegmentApacheArrowRequest(string prefix, string delimiter, string marker, int? maxresults, IEnumerable include, int? timeout, string startFrom, string endBefore) + { + var message = _pipeline.CreateMessage(); + var request = message.Request; + request.Method = RequestMethod.Get; + var uri = new RawRequestUriBuilder(); + uri.AppendRaw(_url, false); + uri.AppendQuery("restype", "container", true); + uri.AppendQuery("comp", "list", true); + if (prefix != null) + { + uri.AppendQuery("prefix", prefix, true); + } + if (delimiter != null) + { + uri.AppendQuery("delimiter", delimiter, true); + } + if (marker != null) + { + uri.AppendQuery("marker", marker, true); + } + if (maxresults != null) + { + uri.AppendQuery("maxresults", maxresults.Value, true); + } + if (include != null && !(include is Common.ChangeTrackingList changeTrackingList && changeTrackingList.IsUndefined)) + { + uri.AppendQueryDelimited("include", include, ",", true); + } + if (timeout != null) + { + uri.AppendQuery("timeout", timeout.Value, true); + } + if (startFrom != null) + { + uri.AppendQuery("startFrom", startFrom, true); + } + if (endBefore != null) + { + uri.AppendQuery("endBefore", endBefore, true); + } + request.Uri = uri; + request.Headers.Add("Accept", "application/vnd.apache.arrow.stream"); + request.Headers.Add("x-ms-version", _version); + return message; + } + + /// The List Blobs operation returns a list of the blobs under the specified container. This operation is for Apache Arrow use case so response is returned as raw to be deserialized by the client. + /// Filters the results to return only containers whose name begins with the specified prefix. + /// When the request includes this parameter, the operation returns a BlobPrefix element in the response body that acts as a placeholder for all blobs whose names begin with the same substring up to the appearance of the delimiter character. The delimiter may be a single character or a string. + /// A string value that identifies the portion of the list of containers to be returned with the next listing operation. The operation returns the NextMarker value within the response body if the listing operation did not return all containers remaining to be listed with the current page. The NextMarker value can be used as the value for the marker parameter in a subsequent call to request the next page of list items. The marker value is opaque to the client. + /// Specifies the maximum number of containers to return. If the request does not specify maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the listing operation crosses a partition boundary, then the service will return a continuation token for retrieving the remainder of the results. For this reason, it is possible that the service will return fewer results than specified by maxresults, or than the default of 5000. + /// Include this parameter to specify one or more datasets to include in the response. + /// The timeout parameter is expressed in seconds. For more information, see <a href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting Timeouts for Blob Service Operations.</a>. + /// Specifies the relative path to list paths from. For non-recursive list, only one entity level is supported; For recursive list, multiple entity levels are supported. (Inclusive). + /// Specifies the relative path to end before list paths. (Exclusive). + /// The cancellation token to use. + public async Task> ListBlobFlatSegmentApacheArrowAsync(string prefix = null, string delimiter = null, string marker = null, int? maxresults = null, IEnumerable include = null, int? timeout = null, string startFrom = null, string endBefore = null, CancellationToken cancellationToken = default) + { + using var message = CreateListBlobFlatSegmentApacheArrowRequest(prefix, delimiter, marker, maxresults, include, timeout, startFrom, endBefore); + await _pipeline.SendAsync(message, cancellationToken).ConfigureAwait(false); + var headers = new ContainerListBlobFlatSegmentApacheArrowHeaders(message.Response); + switch (message.Response.Status) + { + case 200: + { + var value = message.ExtractResponseContent(); + return ResponseWithHeaders.FromValue(value, headers, message.Response); + } + default: + throw new RequestFailedException(message.Response); + } + } + + /// The List Blobs operation returns a list of the blobs under the specified container. This operation is for Apache Arrow use case so response is returned as raw to be deserialized by the client. + /// Filters the results to return only containers whose name begins with the specified prefix. + /// When the request includes this parameter, the operation returns a BlobPrefix element in the response body that acts as a placeholder for all blobs whose names begin with the same substring up to the appearance of the delimiter character. The delimiter may be a single character or a string. + /// A string value that identifies the portion of the list of containers to be returned with the next listing operation. The operation returns the NextMarker value within the response body if the listing operation did not return all containers remaining to be listed with the current page. The NextMarker value can be used as the value for the marker parameter in a subsequent call to request the next page of list items. The marker value is opaque to the client. + /// Specifies the maximum number of containers to return. If the request does not specify maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the listing operation crosses a partition boundary, then the service will return a continuation token for retrieving the remainder of the results. For this reason, it is possible that the service will return fewer results than specified by maxresults, or than the default of 5000. + /// Include this parameter to specify one or more datasets to include in the response. + /// The timeout parameter is expressed in seconds. For more information, see <a href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting Timeouts for Blob Service Operations.</a>. + /// Specifies the relative path to list paths from. For non-recursive list, only one entity level is supported; For recursive list, multiple entity levels are supported. (Inclusive). + /// Specifies the relative path to end before list paths. (Exclusive). + /// The cancellation token to use. + public ResponseWithHeaders ListBlobFlatSegmentApacheArrow(string prefix = null, string delimiter = null, string marker = null, int? maxresults = null, IEnumerable include = null, int? timeout = null, string startFrom = null, string endBefore = null, CancellationToken cancellationToken = default) + { + using var message = CreateListBlobFlatSegmentApacheArrowRequest(prefix, delimiter, marker, maxresults, include, timeout, startFrom, endBefore); + _pipeline.Send(message, cancellationToken); + var headers = new ContainerListBlobFlatSegmentApacheArrowHeaders(message.Response); + switch (message.Response.Status) + { + case 200: + { + var value = message.ExtractResponseContent(); + return ResponseWithHeaders.FromValue(value, headers, message.Response); + } + default: + throw new RequestFailedException(message.Response); + } + } + internal HttpMessage CreateListBlobFlatSegmentRequest(string prefix, string marker, int? maxresults, IEnumerable include, string startFrom, int? timeout) { var message = _pipeline.CreateMessage(); diff --git a/sdk/storage/Azure.Storage.Blobs/src/autorest.md b/sdk/storage/Azure.Storage.Blobs/src/autorest.md index 965da43a84a1..db1ff4ad7027 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/autorest.md +++ b/sdk/storage/Azure.Storage.Blobs/src/autorest.md @@ -4,7 +4,7 @@ Run `dotnet build /t:GenerateCode` to generate code. ``` yaml input-file: - - https://raw.githubusercontent.com/Azure/azure-rest-api-specs/be46becafeb29aa993898709e35759d3643b2809/specification/storage/data-plane/Microsoft.BlobStorage/stable/2026-04-06/blob.json + - C:\azure-rest-api-specs\specification\storage\data-plane\Microsoft.BlobStorage\stable\2026-10-06\blob.json generation1-convenience-client: true # https://github.com/Azure/autorest/issues/4075 skip-semantics-validation: true From 501e7132ff5312aaaa13365256bce1f13d886f99 Mon Sep 17 00:00:00 2001 From: nickliu-msft Date: Mon, 23 Mar 2026 15:18:23 -0400 Subject: [PATCH 2/9] Wiring up request params --- .../src/BlobContainerClient.cs | 36 ++++++++++++------- .../src/Models/GetBlobsAsyncCollection.cs | 13 ++++++- .../src/Models/GetBlobsOptions.cs | 13 +++++++ 3 files changed, 49 insertions(+), 13 deletions(-) diff --git a/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs b/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs index 2f7359b6f32c..b6e2bd326379 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs +++ b/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs @@ -2525,10 +2525,12 @@ public virtual Pageable GetBlobs( CancellationToken cancellationToken = default) => new GetBlobsAsyncCollection( this, - options?.Traits ?? BlobTraits.None, - options?.States ?? BlobStates.None, - options?.Prefix, - startFrom: options?.StartFrom) + useApacheArrow: options?.UseApacheArrow ?? false, + traits: options?.Traits ?? BlobTraits.None, + states: options?.States ?? BlobStates.None, + prefix: options?.Prefix, + startFrom: options?.StartFrom, + endBefore: options?.EndBefore) .ToSyncCollection(cancellationToken); /// @@ -2564,10 +2566,12 @@ public virtual AsyncPageable GetBlobsAsync( CancellationToken cancellationToken = default) => new GetBlobsAsyncCollection( this, - options?.Traits ?? BlobTraits.None, - options?.States ?? BlobStates.None, - options?.Prefix, - options?.StartFrom) + useApacheArrow: options?.UseApacheArrow ?? false, + traits: options?.Traits ?? BlobTraits.None, + states: options?.States ?? BlobStates.None, + prefix: options?.Prefix, + startFrom: options?.StartFrom, + endBefore: options?.EndBefore) .ToAsyncCollection(cancellationToken); /// @@ -2613,7 +2617,7 @@ public virtual Pageable GetBlobs( BlobStates states, string prefix, CancellationToken cancellationToken) => - new GetBlobsAsyncCollection(this, traits, states, prefix, startFrom: default).ToSyncCollection(cancellationToken); + new GetBlobsAsyncCollection(this, false, traits, states, prefix, startFrom: default, endBefore: default).ToSyncCollection(cancellationToken); /// /// The @@ -2658,7 +2662,7 @@ public virtual AsyncPageable GetBlobsAsync( BlobStates states, string prefix, CancellationToken cancellationToken) => - new GetBlobsAsyncCollection(this, traits, states, prefix, startFrom: default).ToAsyncCollection(cancellationToken); + new GetBlobsAsyncCollection(this, false, traits, states, prefix, startFrom: default, endBefore: default).ToAsyncCollection(cancellationToken); /// /// The operation returns a @@ -2674,6 +2678,9 @@ public virtual AsyncPageable GetBlobsAsync( /// /// List Blobs. /// + /// + /// Specifies whether to use Apache Arrow for the operation. + /// /// /// An optional string value that identifies the segment of the list /// of blobs to be returned with the next listing operation. The @@ -2698,6 +2705,11 @@ public virtual AsyncPageable GetBlobsAsync( /// is used to list paths starting from a defined location within prefix’s specified range. /// For non-recursive list, only one entity level is supported. /// + /// + /// Optional. Specifies a fully qualified path within the container, + /// ending the listing when all results before have been returned. + /// This is only supported if is set to true. + /// /// /// Gets or sets a value indicating the size of the page that should be /// requested. @@ -2720,7 +2732,7 @@ public virtual AsyncPageable GetBlobsAsync( /// containing each failure instance. /// internal async Task> GetBlobsInternal( - bool useArrow, + bool useApacheArrow, string marker, BlobTraits traits, BlobStates states, @@ -2750,7 +2762,7 @@ internal async Task> GetBlobsInternal( ListBlobsFlatSegmentResponse listblobFlatResponse; Response rawResponse; - if (useArrow) + if (useApacheArrow) { ResponseWithHeaders arrowResponse; diff --git a/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsAsyncCollection.cs b/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsAsyncCollection.cs index c2fd83c87452..7b79d63352ad 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsAsyncCollection.cs +++ b/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsAsyncCollection.cs @@ -19,19 +19,26 @@ internal class GetBlobsAsyncCollection : StorageCollectionEnumerator private readonly BlobStates _states; private readonly string _prefix; private readonly string _startFrom; + private readonly string _endBefore; + //private readonly string _delimiter; + private readonly bool _useApacheArrow; public GetBlobsAsyncCollection( BlobContainerClient client, + bool useApacheArrow, BlobTraits traits, BlobStates states, string prefix, - string startFrom) + string startFrom, + string endBefore) { _client = client; _traits = traits; _states = states; _prefix = prefix; _startFrom = startFrom; + _endBefore = endBefore; + _useApacheArrow = useApacheArrow; } public override async ValueTask> GetNextPageAsync( @@ -45,11 +52,13 @@ public override async ValueTask> GetNextPageAsync( if (async) { response = await _client.GetBlobsInternal( + useApacheArrow: _useApacheArrow, marker: continuationToken, traits: _traits, states: _states, prefix: _prefix, startFrom: _startFrom, + endBefore: _endBefore, pageSizeHint: pageSizeHint, async: async, cancellationToken: cancellationToken) @@ -58,11 +67,13 @@ public override async ValueTask> GetNextPageAsync( else { response = _client.GetBlobsInternal( + useApacheArrow: _useApacheArrow, marker: continuationToken, traits: _traits, states: _states, prefix: _prefix, startFrom: _startFrom, + endBefore: _endBefore, pageSizeHint: pageSizeHint, async: async, cancellationToken: cancellationToken) diff --git a/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsOptions.cs b/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsOptions.cs index e0a9c17562de..e9364bdb29d2 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsOptions.cs +++ b/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsOptions.cs @@ -31,5 +31,18 @@ public class GetBlobsOptions /// For non-recursive list, only one entity level is supported. /// public string StartFrom { get; set; } + + /// + /// Optional. Specifies a fully qualified path within the container, + /// ending the listing when all results before have been returned. + /// This is only supported if is set to true. + /// + public string EndBefore { get; set; } + + /// + /// Optional. Specifies whether we are using Apache Arrow to list blobs. + /// Defaults to false. + /// + public bool UseApacheArrow { get; set; } } } From 4df127aea1584a62bd40772e2f99b36367865696 Mon Sep 17 00:00:00 2001 From: nickliu-msft Date: Mon, 23 Mar 2026 18:40:57 -0400 Subject: [PATCH 3/9] Added Apache Arrow parsing --- .../Directory.Packages.props | 1 + .../src/Azure.Storage.Blobs.csproj | 1 + .../src/BlobContainerClient.cs | 237 +++++++++++++++++- .../src/Generated/ContainerRestClient.cs | 16 +- .../tests/ContainerClientTests.cs | 27 ++ .../src/Shared/Constants.cs | 1 + 6 files changed, 268 insertions(+), 15 deletions(-) diff --git a/eng/centralpackagemanagement/Directory.Packages.props b/eng/centralpackagemanagement/Directory.Packages.props index 0e1499c4e749..3b8bffade67f 100644 --- a/eng/centralpackagemanagement/Directory.Packages.props +++ b/eng/centralpackagemanagement/Directory.Packages.props @@ -179,6 +179,7 @@ + diff --git a/sdk/storage/Azure.Storage.Blobs/src/Azure.Storage.Blobs.csproj b/sdk/storage/Azure.Storage.Blobs/src/Azure.Storage.Blobs.csproj index 994bd2fa188b..880a483e9f0d 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/Azure.Storage.Blobs.csproj +++ b/sdk/storage/Azure.Storage.Blobs/src/Azure.Storage.Blobs.csproj @@ -38,6 +38,7 @@ + diff --git a/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs b/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs index b6e2bd326379..4dcd7d05b7fd 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs +++ b/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs @@ -6,8 +6,12 @@ using System.ComponentModel; using System.IO; using System.Linq; +using System.Runtime.InteropServices.ComTypes; using System.Threading; using System.Threading.Tasks; +using System.Xml.Linq; +using Apache.Arrow; +using Apache.Arrow.Ipc; using Azure.Core; using Azure.Core.Pipeline; using Azure.Storage.Blobs.Models; @@ -15,7 +19,6 @@ using Azure.Storage.Common; using Azure.Storage.Cryptography; using Azure.Storage.Sas; -using System.Xml.Linq; using Metadata = System.Collections.Generic.IDictionary; #pragma warning disable SA1402 // File may only contain a single type @@ -2792,10 +2795,15 @@ internal async Task> GetBlobsInternal( rawResponse = arrowResponse.GetRawResponse(); - if (arrowResponse.Headers.ContentType == "application/vnd.apache.arrow.stream") + if (arrowResponse.Headers.ContentType == Constants.Blob.ApacheArrowContentType) { - // TODO: Parse Apache Arrow IPC stream into ListBlobsFlatSegmentResponse - throw new NotImplementedException("Apache Arrow response parsing is not yet implemented."); + listblobFlatResponse = await ParseArrowListBlobsFlatResponse( + arrowResponse.Value, + prefix, + marker, + pageSizeHint, + async, + cancellationToken).ConfigureAwait(false); } else { @@ -2871,6 +2879,227 @@ internal async Task> GetBlobsInternal( } } } + + private async Task ParseArrowListBlobsFlatResponse( + Stream arrowStream, + string prefix, + string marker, + int? maxResults, + bool async, + CancellationToken cancellationToken) + { + using var reader = new ArrowStreamReader(arrowStream); + + string nextMarker = null; + if (reader.Schema.Metadata != null) + { + reader.Schema.Metadata.TryGetValue("NextMarker", out nextMarker); + } + + var blobItems = new List(); + + while (true) + { + RecordBatch batch = async + ? await reader.ReadNextRecordBatchAsync(cancellationToken).ConfigureAwait(false) + : reader.ReadNextRecordBatch(); + + if (batch == null) + { + break; + } + + // Blob-level columns + StringArray nameCol = GetArrowColumn(batch, "Name") as StringArray; + BooleanArray deletedCol = GetArrowColumn(batch, "Deleted") as BooleanArray; + StringArray snapshotCol = GetArrowColumn(batch, "Snapshot") as StringArray; + StringArray versionIdCol = GetArrowColumn(batch, "VersionId") as StringArray; + BooleanArray isCurrentVersionCol = GetArrowColumn(batch, "IsCurrentVersion") as BooleanArray; + BooleanArray hasVersionsOnlyCol = GetArrowColumn(batch, "HasVersionsOnly") as BooleanArray; + + // Properties columns + TimestampArray creationTimeCol = GetArrowColumn(batch, "Creation-Time") as TimestampArray; + TimestampArray lastModifiedCol = GetArrowColumn(batch, "Last-Modified") as TimestampArray; + StringArray etagCol = GetArrowColumn(batch, "Etag") as StringArray; + UInt64Array contentLengthCol = GetArrowColumn(batch, "Content-Length") as UInt64Array; + StringArray contentTypeCol = GetArrowColumn(batch, "Content-Type") as StringArray; + StringArray contentEncodingCol = GetArrowColumn(batch, "Content-Encoding") as StringArray; + StringArray contentLanguageCol = GetArrowColumn(batch, "Content-Language") as StringArray; + StringArray contentMD5Col = GetArrowColumn(batch, "Content-MD5") as StringArray; + StringArray contentDispositionCol = GetArrowColumn(batch, "Content-Disposition") as StringArray; + StringArray cacheControlCol = GetArrowColumn(batch, "Cache-Control") as StringArray; + UInt64Array blobSequenceNumberCol = GetArrowColumn(batch, "x-ms-blob-sequence-number") as UInt64Array; + StringArray blobTypeCol = GetArrowColumn(batch, "BlobType") as StringArray; + StringArray leaseStatusCol = GetArrowColumn(batch, "LeaseStatus") as StringArray; + StringArray leaseStateCol = GetArrowColumn(batch, "LeaseState") as StringArray; + StringArray leaseDurationCol = GetArrowColumn(batch, "LeaseDuration") as StringArray; + StringArray copyIdCol = GetArrowColumn(batch, "CopyId") as StringArray; + StringArray copyStatusCol = GetArrowColumn(batch, "CopyStatus") as StringArray; + StringArray copySourceCol = GetArrowColumn(batch, "CopySource") as StringArray; + StringArray copyProgressCol = GetArrowColumn(batch, "CopyProgress") as StringArray; + TimestampArray copyCompletionTimeCol = GetArrowColumn(batch, "CopyCompletionTime") as TimestampArray; + StringArray copyStatusDescriptionCol = GetArrowColumn(batch, "CopyStatusDescription") as StringArray; + StringArray destinationSnapshotCol = GetArrowColumn(batch, "CopyDestinationSnapshot") as StringArray; + BooleanArray serverEncryptedCol = GetArrowColumn(batch, "ServerEncrypted") as BooleanArray; + BooleanArray incrementalCopyCol = GetArrowColumn(batch, "IncrementalCopy") as BooleanArray; + TimestampArray deletedTimeCol = GetArrowColumn(batch, "DeletedTime") as TimestampArray; + UInt64Array remainingRetentionDaysCol = GetArrowColumn(batch, "RemainingRetentionDays") as UInt64Array; + StringArray accessTierCol = GetArrowColumn(batch, "AccessTier") as StringArray; + BooleanArray accessTierInferredCol = GetArrowColumn(batch, "AccessTierInferred") as BooleanArray; + StringArray archiveStatusCol = GetArrowColumn(batch, "ArchiveStatus") as StringArray; + StringArray customerProvidedKeySha256Col = GetArrowColumn(batch, "CustomerProvidedKeySha256") as StringArray; + StringArray encryptionScopeCol = GetArrowColumn(batch, "EncryptionScope") as StringArray; + TimestampArray accessTierChangeTimeCol = GetArrowColumn(batch, "AccessTierChangeTime") as TimestampArray; + UInt64Array tagCountCol = GetArrowColumn(batch, "TagCount") as UInt64Array; + BooleanArray sealedCol = GetArrowColumn(batch, "Sealed") as BooleanArray; + StringArray rehydratePriorityCol = GetArrowColumn(batch, "RehydratePriority") as StringArray; + TimestampArray lastAccessTimeCol = GetArrowColumn(batch, "LastAccessTime") as TimestampArray; + TimestampArray immutabilityPolicyUntilDateCol = GetArrowColumn(batch, "ImmutabilityPolicyUntilDate") as TimestampArray; + StringArray immutabilityPolicyModeCol = GetArrowColumn(batch, "ImmutabilityPolicyMode") as StringArray; + BooleanArray legalHoldCol = GetArrowColumn(batch, "LegalHold") as BooleanArray; + + // Map columns + MapArray tagsCol = GetArrowColumn(batch, "Tags") as MapArray; + MapArray metadataCol = GetArrowColumn(batch, "Metadata") as MapArray; + MapArray orMetadataCol = GetArrowColumn(batch, "OrMetadata") as MapArray; + + for (int i = 0; i < batch.Length; i++) + { + string contentMD5Str = contentMD5Col?.GetString(i); + byte[] contentMD5 = contentMD5Str != null ? Convert.FromBase64String(contentMD5Str) : null; + + var properties = new BlobPropertiesInternal( + creationTime: creationTimeCol?.GetTimestamp(i), + lastModified: lastModifiedCol?.GetTimestamp(i) ?? default, + etag: etagCol?.GetString(i), + contentLength: ReadNullableInt64(contentLengthCol, i), + contentType: contentTypeCol?.GetString(i), + contentEncoding: contentEncodingCol?.GetString(i), + contentLanguage: contentLanguageCol?.GetString(i), + contentMD5: contentMD5, + contentDisposition: contentDispositionCol?.GetString(i), + cacheControl: cacheControlCol?.GetString(i), + blobSequenceNumber: ReadNullableInt64(blobSequenceNumberCol, i), + blobType: ReadEnum(blobTypeCol, i, s => s.ToBlobType()), + leaseStatus: ReadEnum(leaseStatusCol, i, s => s.ToLeaseStatus()), + leaseState: ReadEnum(leaseStateCol, i, s => s.ToLeaseState()), + leaseDuration: ReadEnum(leaseDurationCol, i, s => s.ToLeaseDurationType()), + copyId: copyIdCol?.GetString(i), + copyStatus: ReadEnum(copyStatusCol, i, s => s.ToCopyStatus()), + copySource: copySourceCol?.GetString(i), + copyProgress: copyProgressCol?.GetString(i), + copyCompletionTime: copyCompletionTimeCol?.GetTimestamp(i), + copyStatusDescription: copyStatusDescriptionCol?.GetString(i), + serverEncrypted: ReadNullableBool(serverEncryptedCol, i), + incrementalCopy: ReadNullableBool(incrementalCopyCol, i), + destinationSnapshot: destinationSnapshotCol?.GetString(i), + deletedTime: deletedTimeCol?.GetTimestamp(i), + remainingRetentionDays: ReadNullableInt32(remainingRetentionDaysCol, i), + accessTier: ReadEnum(accessTierCol, i, s => new AccessTier(s)), + accessTierInferred: ReadNullableBool(accessTierInferredCol, i), + archiveStatus: ReadEnum(archiveStatusCol, i, s => s.ToArchiveStatus()), + customerProvidedKeySha256: customerProvidedKeySha256Col?.GetString(i), + encryptionScope: encryptionScopeCol?.GetString(i), + accessTierChangeTime: accessTierChangeTimeCol?.GetTimestamp(i), + tagCount: ReadNullableInt32(tagCountCol, i), + expiresOn: null, + isSealed: ReadNullableBool(sealedCol, i), + rehydratePriority: ReadEnum(rehydratePriorityCol, i, s => s.ToRehydratePriority().Value), + lastAccessedOn: lastAccessTimeCol?.GetTimestamp(i), + immutabilityPolicyExpiresOn: immutabilityPolicyUntilDateCol?.GetTimestamp(i), + immutabilityPolicyMode: ReadEnum(immutabilityPolicyModeCol, i, s => s.ToBlobImmutabilityPolicyMode()), + legalHold: ReadNullableBool(legalHoldCol, i)); + + IReadOnlyDictionary metadata = ReadArrowMap(metadataCol, i); + IReadOnlyDictionary orMetadata = ReadArrowMap(orMetadataCol, i); + + BlobTags blobTags = null; + IReadOnlyDictionary tagsDict = ReadArrowMap(tagsCol, i); + if (tagsDict != null) + { + var tagList = new List(); + foreach (KeyValuePair kvp in tagsDict) + { + tagList.Add(new BlobTag(kvp.Key, kvp.Value)); + } + blobTags = new BlobTags(tagList); + } + + blobItems.Add(new BlobItemInternal( + name: new BlobName(encoded: false, content: nameCol?.GetString(i)), + deleted: ReadNullableBool(deletedCol, i) == true, + snapshot: snapshotCol?.GetString(i) ?? string.Empty, + versionId: versionIdCol?.GetString(i), + isCurrentVersion: ReadNullableBool(isCurrentVersionCol, i), + properties: properties, + metadata: metadata, + blobTags: blobTags, + hasVersionsOnly: ReadNullableBool(hasVersionsOnlyCol, i), + orMetadata: orMetadata)); + } + } + + return new ListBlobsFlatSegmentResponse( + serviceEndpoint: Uri.GetLeftPart(UriPartial.Authority), + containerName: Name, + prefix: prefix, + marker: marker, + maxResults: maxResults, + segment: new BlobFlatListSegment(blobItems), + nextMarker: nextMarker); + } + + private static IArrowArray GetArrowColumn(RecordBatch batch, string name) + { + int index = batch.Schema.GetFieldIndex(name); + return index >= 0 ? batch.Column(index) : null; + } + + private static IReadOnlyDictionary ReadArrowMap(MapArray mapArray, int rowIndex) + { + if (mapArray == null || mapArray.IsNull(rowIndex)) + { + return null; + } + + StringArray keys = mapArray.Keys as StringArray; + StringArray values = mapArray.Values as StringArray; + int start = mapArray.ValueOffsets[rowIndex]; + int length = mapArray.ValueOffsets[rowIndex + 1] - start; + + var dict = new Dictionary(length); + for (int j = start; j < start + length; j++) + { + string key = keys.GetString(j); + string value = values.GetString(j); + if (key != null) + { + dict[key] = value; + } + } + return dict; + } + + private static bool? ReadNullableBool(BooleanArray array, int index) + { + return array != null && !array.IsNull(index) ? (bool?)array.GetValue(index) : null; + } + + private static long? ReadNullableInt64(UInt64Array array, int index) + { + return array != null && !array.IsNull(index) ? (long?)array.GetValue(index) : null; + } + + private static int? ReadNullableInt32(UInt64Array array, int index) + { + return array != null && !array.IsNull(index) ? (int?)array.GetValue(index) : null; + } + + private static T? ReadEnum(StringArray array, int index, Func parse) where T : struct + { + string value = array?.GetString(index); + return value != null ? parse(value) : null; + } #endregion GetBlobs #region GetBlobsByHierarchy diff --git a/sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerRestClient.cs b/sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerRestClient.cs index 0745db09546a..8bd839258cc0 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerRestClient.cs +++ b/sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerRestClient.cs @@ -1332,7 +1332,7 @@ public ResponseWithHeaders ChangeLease(string lease } } - internal HttpMessage CreateListBlobFlatSegmentApacheArrowRequest(string prefix, string delimiter, string marker, int? maxresults, IEnumerable include, int? timeout, string startFrom, string endBefore) + internal HttpMessage CreateListBlobFlatSegmentApacheArrowRequest(string prefix, string marker, int? maxresults, IEnumerable include, int? timeout, string startFrom, string endBefore) { var message = _pipeline.CreateMessage(); var request = message.Request; @@ -1345,10 +1345,6 @@ internal HttpMessage CreateListBlobFlatSegmentApacheArrowRequest(string prefix, { uri.AppendQuery("prefix", prefix, true); } - if (delimiter != null) - { - uri.AppendQuery("delimiter", delimiter, true); - } if (marker != null) { uri.AppendQuery("marker", marker, true); @@ -1381,7 +1377,6 @@ internal HttpMessage CreateListBlobFlatSegmentApacheArrowRequest(string prefix, /// The List Blobs operation returns a list of the blobs under the specified container. This operation is for Apache Arrow use case so response is returned as raw to be deserialized by the client. /// Filters the results to return only containers whose name begins with the specified prefix. - /// When the request includes this parameter, the operation returns a BlobPrefix element in the response body that acts as a placeholder for all blobs whose names begin with the same substring up to the appearance of the delimiter character. The delimiter may be a single character or a string. /// A string value that identifies the portion of the list of containers to be returned with the next listing operation. The operation returns the NextMarker value within the response body if the listing operation did not return all containers remaining to be listed with the current page. The NextMarker value can be used as the value for the marker parameter in a subsequent call to request the next page of list items. The marker value is opaque to the client. /// Specifies the maximum number of containers to return. If the request does not specify maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the listing operation crosses a partition boundary, then the service will return a continuation token for retrieving the remainder of the results. For this reason, it is possible that the service will return fewer results than specified by maxresults, or than the default of 5000. /// Include this parameter to specify one or more datasets to include in the response. @@ -1389,9 +1384,9 @@ internal HttpMessage CreateListBlobFlatSegmentApacheArrowRequest(string prefix, /// Specifies the relative path to list paths from. For non-recursive list, only one entity level is supported; For recursive list, multiple entity levels are supported. (Inclusive). /// Specifies the relative path to end before list paths. (Exclusive). /// The cancellation token to use. - public async Task> ListBlobFlatSegmentApacheArrowAsync(string prefix = null, string delimiter = null, string marker = null, int? maxresults = null, IEnumerable include = null, int? timeout = null, string startFrom = null, string endBefore = null, CancellationToken cancellationToken = default) + public async Task> ListBlobFlatSegmentApacheArrowAsync(string prefix = null, string marker = null, int? maxresults = null, IEnumerable include = null, int? timeout = null, string startFrom = null, string endBefore = null, CancellationToken cancellationToken = default) { - using var message = CreateListBlobFlatSegmentApacheArrowRequest(prefix, delimiter, marker, maxresults, include, timeout, startFrom, endBefore); + using var message = CreateListBlobFlatSegmentApacheArrowRequest(prefix, marker, maxresults, include, timeout, startFrom, endBefore); await _pipeline.SendAsync(message, cancellationToken).ConfigureAwait(false); var headers = new ContainerListBlobFlatSegmentApacheArrowHeaders(message.Response); switch (message.Response.Status) @@ -1408,7 +1403,6 @@ public async Task The List Blobs operation returns a list of the blobs under the specified container. This operation is for Apache Arrow use case so response is returned as raw to be deserialized by the client. /// Filters the results to return only containers whose name begins with the specified prefix. - /// When the request includes this parameter, the operation returns a BlobPrefix element in the response body that acts as a placeholder for all blobs whose names begin with the same substring up to the appearance of the delimiter character. The delimiter may be a single character or a string. /// A string value that identifies the portion of the list of containers to be returned with the next listing operation. The operation returns the NextMarker value within the response body if the listing operation did not return all containers remaining to be listed with the current page. The NextMarker value can be used as the value for the marker parameter in a subsequent call to request the next page of list items. The marker value is opaque to the client. /// Specifies the maximum number of containers to return. If the request does not specify maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the listing operation crosses a partition boundary, then the service will return a continuation token for retrieving the remainder of the results. For this reason, it is possible that the service will return fewer results than specified by maxresults, or than the default of 5000. /// Include this parameter to specify one or more datasets to include in the response. @@ -1416,9 +1410,9 @@ public async Task Specifies the relative path to list paths from. For non-recursive list, only one entity level is supported; For recursive list, multiple entity levels are supported. (Inclusive). /// Specifies the relative path to end before list paths. (Exclusive). /// The cancellation token to use. - public ResponseWithHeaders ListBlobFlatSegmentApacheArrow(string prefix = null, string delimiter = null, string marker = null, int? maxresults = null, IEnumerable include = null, int? timeout = null, string startFrom = null, string endBefore = null, CancellationToken cancellationToken = default) + public ResponseWithHeaders ListBlobFlatSegmentApacheArrow(string prefix = null, string marker = null, int? maxresults = null, IEnumerable include = null, int? timeout = null, string startFrom = null, string endBefore = null, CancellationToken cancellationToken = default) { - using var message = CreateListBlobFlatSegmentApacheArrowRequest(prefix, delimiter, marker, maxresults, include, timeout, startFrom, endBefore); + using var message = CreateListBlobFlatSegmentApacheArrowRequest(prefix, marker, maxresults, include, timeout, startFrom, endBefore); _pipeline.Send(message, cancellationToken); var headers = new ContainerListBlobFlatSegmentApacheArrowHeaders(message.Response); switch (message.Response.Status) diff --git a/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs b/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs index b3f057cd5ba5..e4d0067340cb 100644 --- a/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs +++ b/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs @@ -2826,6 +2826,33 @@ public async Task ListBlobsFlatSegmentAsync_StartFrom() Assert.AreEqual(3, blobs.Count); } + [RecordedTest] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + await SetUpContainerForListing(test.Container); + + // Act + var blobs = new List(); + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true + }; + await foreach (Page page in test.Container.GetBlobsAsync(options).AsPages()) + { + blobs.AddRange(page.Values); + } + + // Assert + Assert.AreEqual(BlobNames.Length, blobs.Count); + + var foundBlobNames = blobs.Select(blob => blob.Name).ToArray(); + + Assert.IsTrue(BlobNames.All(blobName => foundBlobNames.Contains(blobName))); + } + [RecordedTest] [PlaybackOnly("Service bug - https://github.com/Azure/azure-sdk-for-net/issues/16516")] public async Task ListBlobsHierarchySegmentAsync() diff --git a/sdk/storage/Azure.Storage.Common/src/Shared/Constants.cs b/sdk/storage/Azure.Storage.Common/src/Shared/Constants.cs index 07ddd8ddb1fd..ceb7fdd91ba9 100644 --- a/sdk/storage/Azure.Storage.Common/src/Shared/Constants.cs +++ b/sdk/storage/Azure.Storage.Common/src/Shared/Constants.cs @@ -235,6 +235,7 @@ internal static class Blob public const int QuickQueryDownloadSize = 4 * Constants.MB; public const string MetadataHeaderPrefix = "x-ms-meta-"; public const string ObjectReplicationRulesHeaderPrefix = "x-ms-or-"; + public const string ApacheArrowContentType = "application/vnd.apache.arrow.stream"; internal static class Append { From aa603f4f6fa6b06e6a14338a84a87038dd10de74 Mon Sep 17 00:00:00 2001 From: nickliu-msft Date: Tue, 24 Mar 2026 12:21:59 -0400 Subject: [PATCH 4/9] More changes --- .../src/BlobContainerClient.cs | 3 + ...tBlobHierarchySegmentApacheArrowHeaders.cs | 24 ++ .../src/Generated/ContainerRestClient.cs | 257 +++++++++++++++--- .../src/Models/GetBlobsAsyncCollection.cs | 1 - .../tests/ContainerClientTests.cs | 28 ++ 5 files changed, 275 insertions(+), 38 deletions(-) create mode 100644 sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerListBlobHierarchySegmentApacheArrowHeaders.cs diff --git a/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs b/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs index 4dcd7d05b7fd..7c91a2cb8d8c 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs +++ b/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs @@ -2797,6 +2797,7 @@ internal async Task> GetBlobsInternal( if (arrowResponse.Headers.ContentType == Constants.Blob.ApacheArrowContentType) { + // Parse using Apache Arrow listblobFlatResponse = await ParseArrowListBlobsFlatResponse( arrowResponse.Value, prefix, @@ -2947,6 +2948,7 @@ private async Task ParseArrowListBlobsFlatResponse StringArray accessTierCol = GetArrowColumn(batch, "AccessTier") as StringArray; BooleanArray accessTierInferredCol = GetArrowColumn(batch, "AccessTierInferred") as BooleanArray; StringArray archiveStatusCol = GetArrowColumn(batch, "ArchiveStatus") as StringArray; + StringArray smartAccessTierCol = GetArrowColumn(batch, "SmartAccessTier") as StringArray; StringArray customerProvidedKeySha256Col = GetArrowColumn(batch, "CustomerProvidedKeySha256") as StringArray; StringArray encryptionScopeCol = GetArrowColumn(batch, "EncryptionScope") as StringArray; TimestampArray accessTierChangeTimeCol = GetArrowColumn(batch, "AccessTierChangeTime") as TimestampArray; @@ -2998,6 +3000,7 @@ private async Task ParseArrowListBlobsFlatResponse accessTier: ReadEnum(accessTierCol, i, s => new AccessTier(s)), accessTierInferred: ReadNullableBool(accessTierInferredCol, i), archiveStatus: ReadEnum(archiveStatusCol, i, s => s.ToArchiveStatus()), + smartAccessTier: ReadEnum(smartAccessTierCol, i, s => new AccessTier(s)), customerProvidedKeySha256: customerProvidedKeySha256Col?.GetString(i), encryptionScope: encryptionScopeCol?.GetString(i), accessTierChangeTime: accessTierChangeTimeCol?.GetTimestamp(i), diff --git a/sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerListBlobHierarchySegmentApacheArrowHeaders.cs b/sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerListBlobHierarchySegmentApacheArrowHeaders.cs new file mode 100644 index 000000000000..688d350d44a4 --- /dev/null +++ b/sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerListBlobHierarchySegmentApacheArrowHeaders.cs @@ -0,0 +1,24 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// + +#nullable disable + +using Azure.Core; + +namespace Azure.Storage.Blobs +{ + internal partial class ContainerListBlobHierarchySegmentApacheArrowHeaders + { + private readonly Response _response; + public ContainerListBlobHierarchySegmentApacheArrowHeaders(Response response) + { + _response = response; + } + /// The media type of the body of the response. For List Blobs this is 'application/xml'. + public string ContentType => _response.Headers.TryGetValue("Content-Type", out string value) ? value : null; + /// Indicates the version of the Blob service used to execute the request. This header is returned for requests made against version 2009-09-19 and above. + public string Version => _response.Headers.TryGetValue("x-ms-version", out string value) ? value : null; + } +} diff --git a/sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerRestClient.cs b/sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerRestClient.cs index 8bd839258cc0..a701c7831e4b 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerRestClient.cs +++ b/sdk/storage/Azure.Storage.Blobs/src/Generated/ContainerRestClient.cs @@ -1332,6 +1332,105 @@ public ResponseWithHeaders ChangeLease(string lease } } + internal HttpMessage CreateListBlobFlatSegmentRequest(string prefix, string marker, int? maxresults, IEnumerable include, string startFrom, int? timeout) + { + var message = _pipeline.CreateMessage(); + var request = message.Request; + request.Method = RequestMethod.Get; + var uri = new RawRequestUriBuilder(); + uri.AppendRaw(_url, false); + uri.AppendQuery("restype", "container", true); + uri.AppendQuery("comp", "list", true); + if (prefix != null) + { + uri.AppendQuery("prefix", prefix, true); + } + if (marker != null) + { + uri.AppendQuery("marker", marker, true); + } + if (maxresults != null) + { + uri.AppendQuery("maxresults", maxresults.Value, true); + } + if (include != null && !(include is Common.ChangeTrackingList changeTrackingList && changeTrackingList.IsUndefined)) + { + uri.AppendQueryDelimited("include", include, ",", true); + } + if (startFrom != null) + { + uri.AppendQuery("startFrom", startFrom, true); + } + if (timeout != null) + { + uri.AppendQuery("timeout", timeout.Value, true); + } + request.Uri = uri; + request.Headers.Add("x-ms-version", _version); + request.Headers.Add("Accept", "application/xml"); + return message; + } + + /// [Update] The List Blobs operation returns a list of the blobs under the specified container. + /// Filters the results to return only containers whose name begins with the specified prefix. + /// A string value that identifies the portion of the list of containers to be returned with the next listing operation. The operation returns the NextMarker value within the response body if the listing operation did not return all containers remaining to be listed with the current page. The NextMarker value can be used as the value for the marker parameter in a subsequent call to request the next page of list items. The marker value is opaque to the client. + /// Specifies the maximum number of containers to return. If the request does not specify maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the listing operation crosses a partition boundary, then the service will return a continuation token for retrieving the remainder of the results. For this reason, it is possible that the service will return fewer results than specified by maxresults, or than the default of 5000. + /// Include this parameter to specify one or more datasets to include in the response. + /// Specifies the relative path to list paths from. For non-recursive list, only one entity level is supported; For recursive list, multiple entity levels are supported. (Inclusive). + /// The timeout parameter is expressed in seconds. For more information, see <a href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting Timeouts for Blob Service Operations.</a>. + /// The cancellation token to use. + public async Task> ListBlobFlatSegmentAsync(string prefix = null, string marker = null, int? maxresults = null, IEnumerable include = null, string startFrom = null, int? timeout = null, CancellationToken cancellationToken = default) + { + using var message = CreateListBlobFlatSegmentRequest(prefix, marker, maxresults, include, startFrom, timeout); + await _pipeline.SendAsync(message, cancellationToken).ConfigureAwait(false); + var headers = new ContainerListBlobFlatSegmentHeaders(message.Response); + switch (message.Response.Status) + { + case 200: + { + ListBlobsFlatSegmentResponse value = default; + var document = XDocument.Load(message.Response.ContentStream, LoadOptions.PreserveWhitespace); + if (document.Element("EnumerationResults") is XElement enumerationResultsElement) + { + value = ListBlobsFlatSegmentResponse.DeserializeListBlobsFlatSegmentResponse(enumerationResultsElement); + } + return ResponseWithHeaders.FromValue(value, headers, message.Response); + } + default: + throw new RequestFailedException(message.Response); + } + } + + /// [Update] The List Blobs operation returns a list of the blobs under the specified container. + /// Filters the results to return only containers whose name begins with the specified prefix. + /// A string value that identifies the portion of the list of containers to be returned with the next listing operation. The operation returns the NextMarker value within the response body if the listing operation did not return all containers remaining to be listed with the current page. The NextMarker value can be used as the value for the marker parameter in a subsequent call to request the next page of list items. The marker value is opaque to the client. + /// Specifies the maximum number of containers to return. If the request does not specify maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the listing operation crosses a partition boundary, then the service will return a continuation token for retrieving the remainder of the results. For this reason, it is possible that the service will return fewer results than specified by maxresults, or than the default of 5000. + /// Include this parameter to specify one or more datasets to include in the response. + /// Specifies the relative path to list paths from. For non-recursive list, only one entity level is supported; For recursive list, multiple entity levels are supported. (Inclusive). + /// The timeout parameter is expressed in seconds. For more information, see <a href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting Timeouts for Blob Service Operations.</a>. + /// The cancellation token to use. + public ResponseWithHeaders ListBlobFlatSegment(string prefix = null, string marker = null, int? maxresults = null, IEnumerable include = null, string startFrom = null, int? timeout = null, CancellationToken cancellationToken = default) + { + using var message = CreateListBlobFlatSegmentRequest(prefix, marker, maxresults, include, startFrom, timeout); + _pipeline.Send(message, cancellationToken); + var headers = new ContainerListBlobFlatSegmentHeaders(message.Response); + switch (message.Response.Status) + { + case 200: + { + ListBlobsFlatSegmentResponse value = default; + var document = XDocument.Load(message.Response.ContentStream, LoadOptions.PreserveWhitespace); + if (document.Element("EnumerationResults") is XElement enumerationResultsElement) + { + value = ListBlobsFlatSegmentResponse.DeserializeListBlobsFlatSegmentResponse(enumerationResultsElement); + } + return ResponseWithHeaders.FromValue(value, headers, message.Response); + } + default: + throw new RequestFailedException(message.Response); + } + } + internal HttpMessage CreateListBlobFlatSegmentApacheArrowRequest(string prefix, string marker, int? maxresults, IEnumerable include, int? timeout, string startFrom, string endBefore) { var message = _pipeline.CreateMessage(); @@ -1427,7 +1526,7 @@ public ResponseWithHeaders include, string startFrom, int? timeout) + internal HttpMessage CreateListBlobHierarchySegmentRequest(string prefix, string delimiter, string marker, int? maxresults, IEnumerable include, string startFrom, int? timeout) { var message = _pipeline.CreateMessage(); var request = message.Request; @@ -1440,6 +1539,10 @@ internal HttpMessage CreateListBlobFlatSegmentRequest(string prefix, string mark { uri.AppendQuery("prefix", prefix, true); } + if (delimiter != null) + { + uri.AppendQuery("delimiter", delimiter, true); + } if (marker != null) { uri.AppendQuery("marker", marker, true); @@ -1468,26 +1571,27 @@ internal HttpMessage CreateListBlobFlatSegmentRequest(string prefix, string mark /// [Update] The List Blobs operation returns a list of the blobs under the specified container. /// Filters the results to return only containers whose name begins with the specified prefix. + /// When the request includes this parameter, the operation returns a BlobPrefix element in the response body that acts as a placeholder for all blobs whose names begin with the same substring up to the appearance of the delimiter character. The delimiter may be a single character or a string. /// A string value that identifies the portion of the list of containers to be returned with the next listing operation. The operation returns the NextMarker value within the response body if the listing operation did not return all containers remaining to be listed with the current page. The NextMarker value can be used as the value for the marker parameter in a subsequent call to request the next page of list items. The marker value is opaque to the client. /// Specifies the maximum number of containers to return. If the request does not specify maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the listing operation crosses a partition boundary, then the service will return a continuation token for retrieving the remainder of the results. For this reason, it is possible that the service will return fewer results than specified by maxresults, or than the default of 5000. /// Include this parameter to specify one or more datasets to include in the response. /// Specifies the relative path to list paths from. For non-recursive list, only one entity level is supported; For recursive list, multiple entity levels are supported. (Inclusive). /// The timeout parameter is expressed in seconds. For more information, see <a href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting Timeouts for Blob Service Operations.</a>. /// The cancellation token to use. - public async Task> ListBlobFlatSegmentAsync(string prefix = null, string marker = null, int? maxresults = null, IEnumerable include = null, string startFrom = null, int? timeout = null, CancellationToken cancellationToken = default) + public async Task> ListBlobHierarchySegmentAsync(string prefix = null, string delimiter = null, string marker = null, int? maxresults = null, IEnumerable include = null, string startFrom = null, int? timeout = null, CancellationToken cancellationToken = default) { - using var message = CreateListBlobFlatSegmentRequest(prefix, marker, maxresults, include, startFrom, timeout); + using var message = CreateListBlobHierarchySegmentRequest(prefix, delimiter, marker, maxresults, include, startFrom, timeout); await _pipeline.SendAsync(message, cancellationToken).ConfigureAwait(false); - var headers = new ContainerListBlobFlatSegmentHeaders(message.Response); + var headers = new ContainerListBlobHierarchySegmentHeaders(message.Response); switch (message.Response.Status) { case 200: { - ListBlobsFlatSegmentResponse value = default; + ListBlobsHierarchySegmentResponse value = default; var document = XDocument.Load(message.Response.ContentStream, LoadOptions.PreserveWhitespace); if (document.Element("EnumerationResults") is XElement enumerationResultsElement) { - value = ListBlobsFlatSegmentResponse.DeserializeListBlobsFlatSegmentResponse(enumerationResultsElement); + value = ListBlobsHierarchySegmentResponse.DeserializeListBlobsHierarchySegmentResponse(enumerationResultsElement); } return ResponseWithHeaders.FromValue(value, headers, message.Response); } @@ -1498,26 +1602,27 @@ public async Task [Update] The List Blobs operation returns a list of the blobs under the specified container. /// Filters the results to return only containers whose name begins with the specified prefix. + /// When the request includes this parameter, the operation returns a BlobPrefix element in the response body that acts as a placeholder for all blobs whose names begin with the same substring up to the appearance of the delimiter character. The delimiter may be a single character or a string. /// A string value that identifies the portion of the list of containers to be returned with the next listing operation. The operation returns the NextMarker value within the response body if the listing operation did not return all containers remaining to be listed with the current page. The NextMarker value can be used as the value for the marker parameter in a subsequent call to request the next page of list items. The marker value is opaque to the client. /// Specifies the maximum number of containers to return. If the request does not specify maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the listing operation crosses a partition boundary, then the service will return a continuation token for retrieving the remainder of the results. For this reason, it is possible that the service will return fewer results than specified by maxresults, or than the default of 5000. /// Include this parameter to specify one or more datasets to include in the response. /// Specifies the relative path to list paths from. For non-recursive list, only one entity level is supported; For recursive list, multiple entity levels are supported. (Inclusive). /// The timeout parameter is expressed in seconds. For more information, see <a href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting Timeouts for Blob Service Operations.</a>. /// The cancellation token to use. - public ResponseWithHeaders ListBlobFlatSegment(string prefix = null, string marker = null, int? maxresults = null, IEnumerable include = null, string startFrom = null, int? timeout = null, CancellationToken cancellationToken = default) + public ResponseWithHeaders ListBlobHierarchySegment(string prefix = null, string delimiter = null, string marker = null, int? maxresults = null, IEnumerable include = null, string startFrom = null, int? timeout = null, CancellationToken cancellationToken = default) { - using var message = CreateListBlobFlatSegmentRequest(prefix, marker, maxresults, include, startFrom, timeout); + using var message = CreateListBlobHierarchySegmentRequest(prefix, delimiter, marker, maxresults, include, startFrom, timeout); _pipeline.Send(message, cancellationToken); - var headers = new ContainerListBlobFlatSegmentHeaders(message.Response); + var headers = new ContainerListBlobHierarchySegmentHeaders(message.Response); switch (message.Response.Status) { case 200: { - ListBlobsFlatSegmentResponse value = default; + ListBlobsHierarchySegmentResponse value = default; var document = XDocument.Load(message.Response.ContentStream, LoadOptions.PreserveWhitespace); if (document.Element("EnumerationResults") is XElement enumerationResultsElement) { - value = ListBlobsFlatSegmentResponse.DeserializeListBlobsFlatSegmentResponse(enumerationResultsElement); + value = ListBlobsHierarchySegmentResponse.DeserializeListBlobsHierarchySegmentResponse(enumerationResultsElement); } return ResponseWithHeaders.FromValue(value, headers, message.Response); } @@ -1526,7 +1631,7 @@ public ResponseWithHeaders include, string startFrom, int? timeout) + internal HttpMessage CreateListBlobHierarchySegmentApacheArrowRequest(string prefix, string delimiter, string marker, int? maxresults, IEnumerable include, int? timeout, string startFrom, string endBefore) { var message = _pipeline.CreateMessage(); var request = message.Request; @@ -1555,44 +1660,44 @@ internal HttpMessage CreateListBlobHierarchySegmentRequest(string prefix, string { uri.AppendQueryDelimited("include", include, ",", true); } + if (timeout != null) + { + uri.AppendQuery("timeout", timeout.Value, true); + } if (startFrom != null) { uri.AppendQuery("startFrom", startFrom, true); } - if (timeout != null) + if (endBefore != null) { - uri.AppendQuery("timeout", timeout.Value, true); + uri.AppendQuery("endBefore", endBefore, true); } request.Uri = uri; + request.Headers.Add("Accept", "application/vnd.apache.arrow.stream"); request.Headers.Add("x-ms-version", _version); - request.Headers.Add("Accept", "application/xml"); return message; } - /// [Update] The List Blobs operation returns a list of the blobs under the specified container. + /// [Update] The List Blobs operation returns a list of the blobs under the specified container. This operation is for Apache Arrow use case so response is returned as raw to be deserialized by the client. /// Filters the results to return only containers whose name begins with the specified prefix. /// When the request includes this parameter, the operation returns a BlobPrefix element in the response body that acts as a placeholder for all blobs whose names begin with the same substring up to the appearance of the delimiter character. The delimiter may be a single character or a string. /// A string value that identifies the portion of the list of containers to be returned with the next listing operation. The operation returns the NextMarker value within the response body if the listing operation did not return all containers remaining to be listed with the current page. The NextMarker value can be used as the value for the marker parameter in a subsequent call to request the next page of list items. The marker value is opaque to the client. /// Specifies the maximum number of containers to return. If the request does not specify maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the listing operation crosses a partition boundary, then the service will return a continuation token for retrieving the remainder of the results. For this reason, it is possible that the service will return fewer results than specified by maxresults, or than the default of 5000. /// Include this parameter to specify one or more datasets to include in the response. - /// Specifies the relative path to list paths from. For non-recursive list, only one entity level is supported; For recursive list, multiple entity levels are supported. (Inclusive). /// The timeout parameter is expressed in seconds. For more information, see <a href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting Timeouts for Blob Service Operations.</a>. + /// Specifies the relative path to list paths from. For non-recursive list, only one entity level is supported; For recursive list, multiple entity levels are supported. (Inclusive). + /// Specifies the relative path to end before list paths. (Exclusive). /// The cancellation token to use. - public async Task> ListBlobHierarchySegmentAsync(string prefix = null, string delimiter = null, string marker = null, int? maxresults = null, IEnumerable include = null, string startFrom = null, int? timeout = null, CancellationToken cancellationToken = default) + public async Task> ListBlobHierarchySegmentApacheArrowAsync(string prefix = null, string delimiter = null, string marker = null, int? maxresults = null, IEnumerable include = null, int? timeout = null, string startFrom = null, string endBefore = null, CancellationToken cancellationToken = default) { - using var message = CreateListBlobHierarchySegmentRequest(prefix, delimiter, marker, maxresults, include, startFrom, timeout); + using var message = CreateListBlobHierarchySegmentApacheArrowRequest(prefix, delimiter, marker, maxresults, include, timeout, startFrom, endBefore); await _pipeline.SendAsync(message, cancellationToken).ConfigureAwait(false); - var headers = new ContainerListBlobHierarchySegmentHeaders(message.Response); + var headers = new ContainerListBlobHierarchySegmentApacheArrowHeaders(message.Response); switch (message.Response.Status) { case 200: { - ListBlobsHierarchySegmentResponse value = default; - var document = XDocument.Load(message.Response.ContentStream, LoadOptions.PreserveWhitespace); - if (document.Element("EnumerationResults") is XElement enumerationResultsElement) - { - value = ListBlobsHierarchySegmentResponse.DeserializeListBlobsHierarchySegmentResponse(enumerationResultsElement); - } + var value = message.ExtractResponseContent(); return ResponseWithHeaders.FromValue(value, headers, message.Response); } default: @@ -1600,30 +1705,26 @@ public async Task [Update] The List Blobs operation returns a list of the blobs under the specified container. + /// [Update] The List Blobs operation returns a list of the blobs under the specified container. This operation is for Apache Arrow use case so response is returned as raw to be deserialized by the client. /// Filters the results to return only containers whose name begins with the specified prefix. /// When the request includes this parameter, the operation returns a BlobPrefix element in the response body that acts as a placeholder for all blobs whose names begin with the same substring up to the appearance of the delimiter character. The delimiter may be a single character or a string. /// A string value that identifies the portion of the list of containers to be returned with the next listing operation. The operation returns the NextMarker value within the response body if the listing operation did not return all containers remaining to be listed with the current page. The NextMarker value can be used as the value for the marker parameter in a subsequent call to request the next page of list items. The marker value is opaque to the client. /// Specifies the maximum number of containers to return. If the request does not specify maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the listing operation crosses a partition boundary, then the service will return a continuation token for retrieving the remainder of the results. For this reason, it is possible that the service will return fewer results than specified by maxresults, or than the default of 5000. /// Include this parameter to specify one or more datasets to include in the response. - /// Specifies the relative path to list paths from. For non-recursive list, only one entity level is supported; For recursive list, multiple entity levels are supported. (Inclusive). /// The timeout parameter is expressed in seconds. For more information, see <a href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting Timeouts for Blob Service Operations.</a>. + /// Specifies the relative path to list paths from. For non-recursive list, only one entity level is supported; For recursive list, multiple entity levels are supported. (Inclusive). + /// Specifies the relative path to end before list paths. (Exclusive). /// The cancellation token to use. - public ResponseWithHeaders ListBlobHierarchySegment(string prefix = null, string delimiter = null, string marker = null, int? maxresults = null, IEnumerable include = null, string startFrom = null, int? timeout = null, CancellationToken cancellationToken = default) + public ResponseWithHeaders ListBlobHierarchySegmentApacheArrow(string prefix = null, string delimiter = null, string marker = null, int? maxresults = null, IEnumerable include = null, int? timeout = null, string startFrom = null, string endBefore = null, CancellationToken cancellationToken = default) { - using var message = CreateListBlobHierarchySegmentRequest(prefix, delimiter, marker, maxresults, include, startFrom, timeout); + using var message = CreateListBlobHierarchySegmentApacheArrowRequest(prefix, delimiter, marker, maxresults, include, timeout, startFrom, endBefore); _pipeline.Send(message, cancellationToken); - var headers = new ContainerListBlobHierarchySegmentHeaders(message.Response); + var headers = new ContainerListBlobHierarchySegmentApacheArrowHeaders(message.Response); switch (message.Response.Status) { case 200: { - ListBlobsHierarchySegmentResponse value = default; - var document = XDocument.Load(message.Response.ContentStream, LoadOptions.PreserveWhitespace); - if (document.Element("EnumerationResults") is XElement enumerationResultsElement) - { - value = ListBlobsHierarchySegmentResponse.DeserializeListBlobsHierarchySegmentResponse(enumerationResultsElement); - } + var value = message.ExtractResponseContent(); return ResponseWithHeaders.FromValue(value, headers, message.Response); } default: @@ -1862,6 +1963,88 @@ public ResponseWithHeaders include, int? timeout, string startFrom, string endBefore) + { + var message = _pipeline.CreateMessage(); + var request = message.Request; + request.Method = RequestMethod.Get; + var uri = new RawRequestUriBuilder(); + uri.AppendRaw(_url, false); + uri.AppendRawNextLink(nextLink, false); + request.Uri = uri; + request.Headers.Add("Accept", "application/vnd.apache.arrow.stream"); + request.Headers.Add("x-ms-version", _version); + return message; + } + + /// [Update] The List Blobs operation returns a list of the blobs under the specified container. This operation is for Apache Arrow use case so response is returned as raw to be deserialized by the client. + /// The URL to the next page of results. + /// Filters the results to return only containers whose name begins with the specified prefix. + /// When the request includes this parameter, the operation returns a BlobPrefix element in the response body that acts as a placeholder for all blobs whose names begin with the same substring up to the appearance of the delimiter character. The delimiter may be a single character or a string. + /// A string value that identifies the portion of the list of containers to be returned with the next listing operation. The operation returns the NextMarker value within the response body if the listing operation did not return all containers remaining to be listed with the current page. The NextMarker value can be used as the value for the marker parameter in a subsequent call to request the next page of list items. The marker value is opaque to the client. + /// Specifies the maximum number of containers to return. If the request does not specify maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the listing operation crosses a partition boundary, then the service will return a continuation token for retrieving the remainder of the results. For this reason, it is possible that the service will return fewer results than specified by maxresults, or than the default of 5000. + /// Include this parameter to specify one or more datasets to include in the response. + /// The timeout parameter is expressed in seconds. For more information, see <a href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting Timeouts for Blob Service Operations.</a>. + /// Specifies the relative path to list paths from. For non-recursive list, only one entity level is supported; For recursive list, multiple entity levels are supported. (Inclusive). + /// Specifies the relative path to end before list paths. (Exclusive). + /// The cancellation token to use. + /// is null. + public async Task> ListBlobHierarchySegmentApacheArrowNextPageAsync(string nextLink, string prefix = null, string delimiter = null, string marker = null, int? maxresults = null, IEnumerable include = null, int? timeout = null, string startFrom = null, string endBefore = null, CancellationToken cancellationToken = default) + { + if (nextLink == null) + { + throw new ArgumentNullException(nameof(nextLink)); + } + + using var message = CreateListBlobHierarchySegmentApacheArrowNextPageRequest(nextLink, prefix, delimiter, marker, maxresults, include, timeout, startFrom, endBefore); + await _pipeline.SendAsync(message, cancellationToken).ConfigureAwait(false); + var headers = new ContainerListBlobHierarchySegmentApacheArrowHeaders(message.Response); + switch (message.Response.Status) + { + case 200: + { + var value = message.ExtractResponseContent(); + return ResponseWithHeaders.FromValue(value, headers, message.Response); + } + default: + throw new RequestFailedException(message.Response); + } + } + + /// [Update] The List Blobs operation returns a list of the blobs under the specified container. This operation is for Apache Arrow use case so response is returned as raw to be deserialized by the client. + /// The URL to the next page of results. + /// Filters the results to return only containers whose name begins with the specified prefix. + /// When the request includes this parameter, the operation returns a BlobPrefix element in the response body that acts as a placeholder for all blobs whose names begin with the same substring up to the appearance of the delimiter character. The delimiter may be a single character or a string. + /// A string value that identifies the portion of the list of containers to be returned with the next listing operation. The operation returns the NextMarker value within the response body if the listing operation did not return all containers remaining to be listed with the current page. The NextMarker value can be used as the value for the marker parameter in a subsequent call to request the next page of list items. The marker value is opaque to the client. + /// Specifies the maximum number of containers to return. If the request does not specify maxresults, or specifies a value greater than 5000, the server will return up to 5000 items. Note that if the listing operation crosses a partition boundary, then the service will return a continuation token for retrieving the remainder of the results. For this reason, it is possible that the service will return fewer results than specified by maxresults, or than the default of 5000. + /// Include this parameter to specify one or more datasets to include in the response. + /// The timeout parameter is expressed in seconds. For more information, see <a href="https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations">Setting Timeouts for Blob Service Operations.</a>. + /// Specifies the relative path to list paths from. For non-recursive list, only one entity level is supported; For recursive list, multiple entity levels are supported. (Inclusive). + /// Specifies the relative path to end before list paths. (Exclusive). + /// The cancellation token to use. + /// is null. + public ResponseWithHeaders ListBlobHierarchySegmentApacheArrowNextPage(string nextLink, string prefix = null, string delimiter = null, string marker = null, int? maxresults = null, IEnumerable include = null, int? timeout = null, string startFrom = null, string endBefore = null, CancellationToken cancellationToken = default) + { + if (nextLink == null) + { + throw new ArgumentNullException(nameof(nextLink)); + } + + using var message = CreateListBlobHierarchySegmentApacheArrowNextPageRequest(nextLink, prefix, delimiter, marker, maxresults, include, timeout, startFrom, endBefore); + _pipeline.Send(message, cancellationToken); + var headers = new ContainerListBlobHierarchySegmentApacheArrowHeaders(message.Response); + switch (message.Response.Status) + { + case 200: + { + var value = message.ExtractResponseContent(); + return ResponseWithHeaders.FromValue(value, headers, message.Response); + } + default: + throw new RequestFailedException(message.Response); + } + } + private static ResponseClassifier _responseClassifier201; private static ResponseClassifier ResponseClassifier201 => _responseClassifier201 ??= new StatusCodeClassifier(stackalloc ushort[] { 201 }); } diff --git a/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsAsyncCollection.cs b/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsAsyncCollection.cs index 7b79d63352ad..bd0d42929d53 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsAsyncCollection.cs +++ b/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsAsyncCollection.cs @@ -20,7 +20,6 @@ internal class GetBlobsAsyncCollection : StorageCollectionEnumerator private readonly string _prefix; private readonly string _startFrom; private readonly string _endBefore; - //private readonly string _delimiter; private readonly bool _useApacheArrow; public GetBlobsAsyncCollection( diff --git a/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs b/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs index e4d0067340cb..3564111ce54f 100644 --- a/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs +++ b/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs @@ -2853,6 +2853,34 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow() Assert.IsTrue(BlobNames.All(blobName => foundBlobNames.Contains(blobName))); } + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2019_12_12)] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Tags() + { + // Arrange + await using DisposingContainer test = await GetTestContainerAsync(); + AppendBlobClient appendBlob = InstrumentClient(test.Container.GetAppendBlobClient(GetNewBlobName())); + IDictionary tags = BuildTags(); + AppendBlobCreateOptions options = new AppendBlobCreateOptions + { + Tags = tags + }; + await appendBlob.CreateAsync(options); + + GetBlobsOptions getBlobsOptions = new GetBlobsOptions + { + UseApacheArrow = true, + Traits = BlobTraits.Tags + }; + + // Act + IList blobItems = await test.Container.GetBlobsAsync(getBlobsOptions).ToListAsync(); + + // Assert + AssertDictionaryEquality(tags, blobItems[0].Tags); + Assert.AreEqual(tags.Count, blobItems[0].Properties.TagCount); + } + [RecordedTest] [PlaybackOnly("Service bug - https://github.com/Azure/azure-sdk-for-net/issues/16516")] public async Task ListBlobsHierarchySegmentAsync() From 4cbcea60d4b05bb532d5aefd05d7934df65fb86f Mon Sep 17 00:00:00 2001 From: nickliu-msft Date: Wed, 25 Mar 2026 14:50:26 -0400 Subject: [PATCH 5/9] Added ListBlobs Hierarchy with Arrow --- .../src/BlobContainerClient.cs | 205 +++++-- .../GetBlobsByHierarchyAsyncCollection.cs | 12 +- .../src/Models/GetBlobsByHierarchyOptions.cs | 13 + .../src/Models/GetBlobsOptions.cs | 4 +- .../tests/ContainerClientTests.cs | 500 +++++++++++++++++- 5 files changed, 691 insertions(+), 43 deletions(-) diff --git a/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs b/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs index 7c91a2cb8d8c..63794182cf28 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs +++ b/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs @@ -2682,7 +2682,7 @@ public virtual AsyncPageable GetBlobsAsync( /// List Blobs. /// /// - /// Specifies whether to use Apache Arrow for the operation. + /// Specifies whether to use Apache Arrow to list blobs. /// /// /// An optional string value that identifies the segment of the list @@ -2888,6 +2888,28 @@ private async Task ParseArrowListBlobsFlatResponse int? maxResults, bool async, CancellationToken cancellationToken) + { + var (nextMarker, blobItems, _) = await ParseArrowBlobsResponse( + arrowStream, + async, + cancellationToken).ConfigureAwait(false); + + return new ListBlobsFlatSegmentResponse( + serviceEndpoint: Uri.GetLeftPart(UriPartial.Authority), + containerName: Name, + prefix: prefix, + marker: marker, + maxResults: maxResults, + segment: new BlobFlatListSegment(blobItems), + nextMarker: nextMarker); + } + #endregion GetBlobs + + #region ApacheArrowHelpers + private async Task<(string NextMarker, List BlobItems, List BlobPrefixes)> ParseArrowBlobsResponse( + Stream arrowStream, + bool async, + CancellationToken cancellationToken) { using var reader = new ArrowStreamReader(arrowStream); @@ -2898,6 +2920,7 @@ private async Task ParseArrowListBlobsFlatResponse } var blobItems = new List(); + var blobPrefixes = new List(); while (true) { @@ -2912,6 +2935,7 @@ private async Task ParseArrowListBlobsFlatResponse // Blob-level columns StringArray nameCol = GetArrowColumn(batch, "Name") as StringArray; + StringArray resourceTypeCol = GetArrowColumn(batch, "ResourceType") as StringArray; BooleanArray deletedCol = GetArrowColumn(batch, "Deleted") as BooleanArray; StringArray snapshotCol = GetArrowColumn(batch, "Snapshot") as StringArray; StringArray versionIdCol = GetArrowColumn(batch, "VersionId") as StringArray; @@ -2967,6 +2991,16 @@ private async Task ParseArrowListBlobsFlatResponse for (int i = 0; i < batch.Length; i++) { + string resourceType = resourceTypeCol?.GetString(i); + + // BlobPrefix rows only have Name populated; all other columns are null + if (string.Equals(resourceType, "blobprefix", StringComparison.InvariantCultureIgnoreCase)) + { + blobPrefixes.Add(new BlobPrefix( + new BlobName(encoded: false, content: nameCol?.GetString(i)))); + continue; + } + string contentMD5Str = contentMD5Col?.GetString(i); byte[] contentMD5 = contentMD5Str != null ? Convert.FromBase64String(contentMD5Str) : null; @@ -3020,7 +3054,7 @@ private async Task ParseArrowListBlobsFlatResponse IReadOnlyDictionary tagsDict = ReadArrowMap(tagsCol, i); if (tagsDict != null) { - var tagList = new List(); + List tagList = new List(); foreach (KeyValuePair kvp in tagsDict) { tagList.Add(new BlobTag(kvp.Key, kvp.Value)); @@ -3042,14 +3076,7 @@ private async Task ParseArrowListBlobsFlatResponse } } - return new ListBlobsFlatSegmentResponse( - serviceEndpoint: Uri.GetLeftPart(UriPartial.Authority), - containerName: Name, - prefix: prefix, - marker: marker, - maxResults: maxResults, - segment: new BlobFlatListSegment(blobItems), - nextMarker: nextMarker); + return (nextMarker, blobItems, blobPrefixes); } private static IArrowArray GetArrowColumn(RecordBatch batch, string name) @@ -3103,7 +3130,7 @@ private static IReadOnlyDictionary ReadArrowMap(MapArray mapArra string value = array?.GetString(index); return value != null ? parse(value) : null; } - #endregion GetBlobs + #endregion ApacheArrowHelpers #region GetBlobsByHierarchy /// @@ -3141,11 +3168,13 @@ public virtual Pageable GetBlobsByHierarchy( CancellationToken cancellationToken = default) => new GetBlobsByHierarchyAsyncCollection( this, + useApacheArrow: options?.UseApacheArrow ?? false, options?.Delimiter, options?.Traits ?? BlobTraits.None, options?.States ?? BlobStates.None, options?.Prefix, - options?.StartFrom) + options?.StartFrom, + options?.EndBefore) .ToSyncCollection(cancellationToken); /// @@ -3183,11 +3212,13 @@ public virtual AsyncPageable GetBlobsByHierarchyAsync( CancellationToken cancellationToken = default) => new GetBlobsByHierarchyAsyncCollection( this, + useApacheArrow: options?.UseApacheArrow ?? false, options?.Delimiter, options?.Traits ?? BlobTraits.None, options?.States ?? BlobStates.None, options?.Prefix, - options?.StartFrom) + options?.StartFrom, + options?.EndBefore) .ToAsyncCollection(cancellationToken); /// @@ -3253,7 +3284,7 @@ public virtual Pageable GetBlobsByHierarchy( string delimiter, string prefix, CancellationToken cancellationToken = default) => - new GetBlobsByHierarchyAsyncCollection(this, delimiter, traits, states, prefix, startFrom: default).ToSyncCollection(cancellationToken); + new GetBlobsByHierarchyAsyncCollection(this, false, delimiter, traits, states, prefix, startFrom: default, endBefore: default).ToSyncCollection(cancellationToken); /// /// The @@ -3318,7 +3349,7 @@ public virtual AsyncPageable GetBlobsByHierarchyAsync( string delimiter, string prefix, CancellationToken cancellationToken) => - new GetBlobsByHierarchyAsyncCollection(this, delimiter, traits, states, prefix, startFrom: default).ToAsyncCollection(cancellationToken); + new GetBlobsByHierarchyAsyncCollection(this, false, delimiter, traits, states, prefix, startFrom: default, endBefore: default).ToAsyncCollection(cancellationToken); /// /// The operation returns @@ -3336,6 +3367,9 @@ public virtual AsyncPageable GetBlobsByHierarchyAsync( /// /// List Blobs. /// + /// + /// Specifies whether to use Apache Arrow to list blobs. + /// /// /// An optional string value that identifies the segment of the list /// of blobs to be returned with the next listing operation. The @@ -3378,6 +3412,11 @@ public virtual AsyncPageable GetBlobsByHierarchyAsync( /// For non-recursive list, only one entity level is supported. /// For recursive list, multiple entity levels are supported. (Inclusive). /// + /// + /// Optional. Specifies a fully qualified path within the container, + /// ending the listing when all results before have been returned. + /// This is only supported if is set to true. + /// /// /// Gets or sets a value indicating the size of the page that should be /// requested. @@ -3400,12 +3439,14 @@ public virtual AsyncPageable GetBlobsByHierarchyAsync( /// containing each failure instance. /// internal async Task> GetBlobsByHierarchyInternal( + bool useApacheArrow, string marker, string delimiter, BlobTraits traits, BlobStates states, string prefix, string startFrom, + string endBefore, int? pageSizeHint, bool async, CancellationToken cancellationToken) @@ -3426,37 +3467,100 @@ internal async Task> GetBlobsByHiera try { scope.Start(); - ResponseWithHeaders response; - if (async) + ListBlobsHierarchySegmentResponse listblobHierachyResponse; + Response rawResponse; + + if (useApacheArrow) { - response = await ContainerRestClient.ListBlobHierarchySegmentAsync( - delimiter: delimiter, - prefix: prefix, - marker: marker, - maxresults: pageSizeHint, - include: BlobExtensions.AsIncludeItems(traits, states), - startFrom: startFrom, - cancellationToken: cancellationToken) - .ConfigureAwait(false); + ResponseWithHeaders arrowResponse; + + if (async) + { + arrowResponse = await ContainerRestClient.ListBlobHierarchySegmentApacheArrowAsync( + prefix: prefix, + delimiter: delimiter, + marker: marker, + maxresults: pageSizeHint, + include: BlobExtensions.AsIncludeItems(traits, states), + startFrom: startFrom, + endBefore: endBefore, + cancellationToken: cancellationToken) + .ConfigureAwait(false); + } + else + { + arrowResponse = ContainerRestClient.ListBlobHierarchySegmentApacheArrow( + prefix: prefix, + delimiter: delimiter, + marker: marker, + maxresults: pageSizeHint, + include: BlobExtensions.AsIncludeItems(traits, states), + startFrom: startFrom, + endBefore: endBefore, + cancellationToken: cancellationToken); + } + + rawResponse = arrowResponse.GetRawResponse(); + + if (arrowResponse.Headers.ContentType == Constants.Blob.ApacheArrowContentType) + { + // Parse using Apache Arrow + listblobHierachyResponse = await ParseArrowListBlobsHierarchyResponse( + arrowResponse.Value, + prefix, + marker, + pageSizeHint, + delimiter, + async, + cancellationToken).ConfigureAwait(false); + } + else + { + // XML fallback: server returned XML despite requesting Arrow + listblobHierachyResponse = default; + var document = XDocument.Load(arrowResponse.Value, LoadOptions.PreserveWhitespace); + if (document.Element("EnumerationResults") is XElement enumerationResultsElement) + { + listblobHierachyResponse = ListBlobsHierarchySegmentResponse.DeserializeListBlobsHierarchySegmentResponse(enumerationResultsElement); + } + } } else { - response = ContainerRestClient.ListBlobHierarchySegment( - delimiter: delimiter, - prefix: prefix, - marker: marker, - maxresults: pageSizeHint, - include: BlobExtensions.AsIncludeItems(traits, states), - startFrom: startFrom, - cancellationToken: cancellationToken); - } + ResponseWithHeaders response; - ListBlobsHierarchySegmentResponse listblobHierachyResponse = response.Value; + if (async) + { + response = await ContainerRestClient.ListBlobHierarchySegmentAsync( + delimiter: delimiter, + prefix: prefix, + marker: marker, + maxresults: pageSizeHint, + include: BlobExtensions.AsIncludeItems(traits, states), + startFrom: startFrom, + cancellationToken: cancellationToken) + .ConfigureAwait(false); + } + else + { + response = ContainerRestClient.ListBlobHierarchySegment( + delimiter: delimiter, + prefix: prefix, + marker: marker, + maxresults: pageSizeHint, + include: BlobExtensions.AsIncludeItems(traits, states), + startFrom: startFrom, + cancellationToken: cancellationToken); + } + + listblobHierachyResponse = response.Value; + rawResponse = response.GetRawResponse(); + } if ((traits & BlobTraits.Metadata) != BlobTraits.Metadata) { - List blobItemInternals = response.Value.Segment.BlobItems.Select(r => new BlobItemInternal( + List blobItemInternals = listblobHierachyResponse.Segment.BlobItems.Select(r => new BlobItemInternal( r.Name, r.Deleted, r.Snapshot, @@ -3472,7 +3576,7 @@ internal async Task> GetBlobsByHiera return Response.FromValue( listblobHierachyResponse, - response.GetRawResponse()); + rawResponse); } catch (Exception ex) { @@ -3487,6 +3591,31 @@ internal async Task> GetBlobsByHiera } } } + + private async Task ParseArrowListBlobsHierarchyResponse( + Stream arrowStream, + string prefix, + string marker, + int? maxResults, + string delimiter, + bool async, + CancellationToken cancellationToken) + { + var (nextMarker, blobItems, blobPrefixes) = await ParseArrowBlobsResponse( + arrowStream, + async, + cancellationToken).ConfigureAwait(false); + + return new ListBlobsHierarchySegmentResponse( + serviceEndpoint: Uri.GetLeftPart(UriPartial.Authority), + containerName: Name, + prefix: prefix, + marker: marker, + maxResults: maxResults, + delimiter: delimiter, + segment: new BlobHierarchyListSegment(blobPrefixes, blobItems), + nextMarker: nextMarker); + } #endregion GetBlobsByHierarchy #region UploadBlob diff --git a/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsByHierarchyAsyncCollection.cs b/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsByHierarchyAsyncCollection.cs index 12af6fa746cf..b446b6cead4e 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsByHierarchyAsyncCollection.cs +++ b/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsByHierarchyAsyncCollection.cs @@ -18,14 +18,18 @@ internal class GetBlobsByHierarchyAsyncCollection : StorageCollectionEnumerator< private readonly string _delimiter; private readonly string _prefix; private readonly string _startFrom; + private readonly string _endBefore; + private readonly bool _useApacheArrow; public GetBlobsByHierarchyAsyncCollection( BlobContainerClient client, + bool useApacheArrow, string delimiter, BlobTraits traits, BlobStates states, string prefix, - string startFrom) + string startFrom, + string endBefore) { _client = client; _delimiter = delimiter; @@ -33,6 +37,8 @@ public GetBlobsByHierarchyAsyncCollection( _states = states; _prefix = prefix; _startFrom = startFrom; + _endBefore = endBefore; + _useApacheArrow = useApacheArrow; } public override async ValueTask> GetNextPageAsync( @@ -46,12 +52,14 @@ public override async ValueTask> GetNextPageAsync( if (async) { response = await _client.GetBlobsByHierarchyInternal( + useApacheArrow: _useApacheArrow, marker: continuationToken, delimiter: _delimiter, traits: _traits, states: _states, prefix: _prefix, startFrom: _startFrom, + endBefore: _endBefore, pageSizeHint: pageSizeHint, async: async, cancellationToken: cancellationToken) @@ -60,12 +68,14 @@ public override async ValueTask> GetNextPageAsync( else { response = _client.GetBlobsByHierarchyInternal( + useApacheArrow: _useApacheArrow, marker: continuationToken, delimiter: _delimiter, traits: _traits, states: _states, prefix: _prefix, startFrom: _startFrom, + endBefore: _endBefore, pageSizeHint: pageSizeHint, async: async, cancellationToken: cancellationToken) diff --git a/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsByHierarchyOptions.cs b/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsByHierarchyOptions.cs index 07de8ee17a6a..841b0a2e64de 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsByHierarchyOptions.cs +++ b/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsByHierarchyOptions.cs @@ -50,5 +50,18 @@ public class GetBlobsByHierarchyOptions /// For recursive list, multiple entity levels are supported. (Inclusive). /// public string StartFrom { get; set; } + + /// + /// Optional. Specifies a fully qualified path within the container, + /// ending the listing when all results before have been returned. + /// This is only supported if is set to true. + /// + public string EndBefore { get; set; } + + /// + /// Optional. Specifies whether we are using Apache Arrow, rather than XML, + /// to list blobs. Defaults to false. + /// + public bool UseApacheArrow { get; set; } } } diff --git a/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsOptions.cs b/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsOptions.cs index e9364bdb29d2..9dafe6a06227 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsOptions.cs +++ b/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsOptions.cs @@ -40,8 +40,8 @@ public class GetBlobsOptions public string EndBefore { get; set; } /// - /// Optional. Specifies whether we are using Apache Arrow to list blobs. - /// Defaults to false. + /// Optional. Specifies whether we are using Apache Arrow, rather than XML, + /// to list blobs. Defaults to false. /// public bool UseApacheArrow { get; set; } } diff --git a/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs b/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs index 3564111ce54f..2d05d9900ca3 100644 --- a/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs +++ b/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs @@ -2840,7 +2840,7 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow() { UseApacheArrow = true }; - await foreach (Page page in test.Container.GetBlobsAsync(options).AsPages()) + await foreach (Page page in test.Container.GetBlobsAsync(options: options).AsPages()) { blobs.AddRange(page.Values); } @@ -2874,13 +2874,461 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Tags() }; // Act - IList blobItems = await test.Container.GetBlobsAsync(getBlobsOptions).ToListAsync(); + IList blobItems = await test.Container.GetBlobsAsync(options: getBlobsOptions).ToListAsync(); // Assert AssertDictionaryEquality(tags, blobItems[0].Tags); Assert.AreEqual(tags.Count, blobItems[0].Properties.TagCount); } + [Ignore("Feature not supported in current test environment")] + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2019_12_12)] + [TestCase(null)] + [TestCase(RehydratePriority.Standard)] + [TestCase(RehydratePriority.High)] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_RehydratePriority(RehydratePriority? rehydratePriority) + { + // Arrange + await using DisposingContainer test = await GetTestContainerAsync(); + + BlockBlobClient blockBlob = InstrumentClient(test.Container.GetBlockBlobClient(GetNewBlobName())); + byte[] data = GetRandomBuffer(Constants.KB); + using Stream stream = new MemoryStream(data); + await blockBlob.UploadAsync(stream); + + if (rehydratePriority.HasValue) + { + await blockBlob.SetAccessTierAsync( + AccessTier.Archive); + + await blockBlob.SetAccessTierAsync( + AccessTier.Hot, + rehydratePriority: rehydratePriority.Value); + } + + // Act + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true + }; + IList blobItems = await test.Container.GetBlobsAsync(options: options).ToListAsync(); + + // Assert + Assert.AreEqual(rehydratePriority, blobItems[0].Properties.RehydratePriority); + } + + [RecordedTest] + [AsyncOnly] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_MaxResults() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + await SetUpContainerForListing(test.Container); + + // Act + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true + }; + Page page = await test.Container.GetBlobsAsync(options: options).AsPages(pageSizeHint: 2).FirstAsync(); + + // Assert + Assert.AreEqual(2, page.Values.Count); + Assert.IsTrue(page.Values.All(b => b.Metadata.Count == 0)); + } + + [RecordedTest] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Metadata() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + AppendBlobClient blob = InstrumentClient(test.Container.GetAppendBlobClient(GetNewBlobName())); + IDictionary metadata = BuildMetadata(); + await blob.CreateIfNotExistsAsync(metadata: metadata); + + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true, + Traits = BlobTraits.Metadata + }; + + // Act + IList blobs = await test.Container.GetBlobsAsync(options: options).ToListAsync(); + + // Assert + AssertDictionaryEquality(metadata, blobs.First().Metadata); + } + + [Ignore("Feature not supported in current test environment")] + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2019_07_07)] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_EncryptionScope() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + AppendBlobClient blob = InstrumentClient(test.Container.GetAppendBlobClient(GetNewBlobName())); + blob = InstrumentClient(blob.WithEncryptionScope(TestConfigDefault.EncryptionScope)); + + await blob.CreateIfNotExistsAsync(); + + // Act + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true + }; + IList blobs = await test.Container.GetBlobsAsync(options: options).ToListAsync(); + + // Assert + Assert.AreEqual(TestConfigDefault.EncryptionScope, blobs.First().Properties.EncryptionScope); + } + + [RecordedTest] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Deleted() + { + // Arrange + BlobServiceClient blobServiceClient = BlobsClientBuilder.GetServiceClient_SoftDelete(); + await using DisposingContainer test = await GetTestContainerAsync(blobServiceClient); + string blobName = GetNewBlobName(); + AppendBlobClient blob = InstrumentClient(test.Container.GetAppendBlobClient(blobName)); + await blob.CreateIfNotExistsAsync(); + await blob.DeleteIfExistsAsync(); + + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true, + States = BlobStates.Deleted + }; + + // Act + IList blobs = await test.Container.GetBlobsAsync(options: options).ToListAsync(); + + // Assert + Assert.AreEqual(blobName, blobs[0].Name); + Assert.IsTrue(blobs[0].Deleted); + } + + [RecordedTest] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Uncommited() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + var blobName = GetNewBlobName(); + BlockBlobClient blob = InstrumentClient(test.Container.GetBlockBlobClient(blobName)); + var data = GetRandomBuffer(Constants.KB); + var blockId = ToBase64(GetNewBlockName()); + + using (var stream = new MemoryStream(data)) + { + await blob.StageBlockAsync( + base64BlockId: blockId, + content: stream); + } + + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true, + States = BlobStates.Uncommitted + }; + + // Act + IList blobs = await test.Container.GetBlobsAsync(options: options).ToListAsync(); + + // Assert + Assert.AreEqual(1, blobs.Count); + Assert.AreEqual(blobName, blobs.First().Name); + } + + [RecordedTest] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Snapshot() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + AppendBlobClient blob = InstrumentClient(test.Container.GetAppendBlobClient(GetNewBlobName())); + await blob.CreateIfNotExistsAsync(); + Response snapshotResponse = await blob.CreateSnapshotAsync(); + + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true, + States = BlobStates.Snapshots + }; + + // Act + IList blobs = await test.Container.GetBlobsAsync(options: options).ToListAsync(); + + // Assert + Assert.AreEqual(2, blobs.Count); + Assert.AreEqual(snapshotResponse.Value.Snapshot.ToString(), blobs.First().Snapshot); + } + + [RecordedTest] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Prefix() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + await SetUpContainerForListing(test.Container); + + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true, + Prefix = "foo" + }; + + // Act + IList blobs = await test.Container.GetBlobsAsync(options: options).ToListAsync(); + + // Assert + Assert.AreEqual(3, blobs.Count); + } + + [RecordedTest] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Error() + { + // Arrange + BlobServiceClient service = GetServiceClient_SharedKey(); + BlobContainerClient container = InstrumentClient(service.GetBlobContainerClient(GetNewContainerName())); + var id = Recording.Random.NewGuid().ToString(); + + // Act + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true + }; + await TestHelper.AssertExpectedExceptionAsync( + container.GetBlobsAsync(options: options).ToListAsync(), + e => Assert.AreEqual("ContainerNotFound", e.ErrorCode)); + } + + [RecordedTest] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_PreservesWhitespace() + { + await VerifyBlobNameWhitespaceRoundtrips(" prefix"); + await VerifyBlobNameWhitespaceRoundtrips("suffix "); + await VerifyBlobNameWhitespaceRoundtrips(" "); + + async Task VerifyBlobNameWhitespaceRoundtrips(string blobName) + { + await using DisposingContainer test = await GetTestContainerAsync(); + BlockBlobClient blob = InstrumentClient(test.Container.GetBlockBlobClient(blobName)); + await blob.UploadAsync(new MemoryStream(Encoding.UTF8.GetBytes("data"))); + + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true + }; + BlobItem blobItem = await test.Container.GetBlobsAsync(options: options).FirstAsync(); + Assert.AreEqual(blobName, blobItem.Name); + } + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2019_12_12)] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_VersionId() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + AppendBlobClient blob = InstrumentClient(test.Container.GetAppendBlobClient(GetNewBlobName())); + Response createResponse = await blob.CreateAsync(); + IDictionary metadata = BuildMetadata(); + Response setMetadataResponse = await blob.SetMetadataAsync(metadata); + + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true, + States = BlobStates.Version + }; + + // Act + var blobs = new List(); + await foreach (Page page in test.Container.GetBlobsAsync(options: options).AsPages()) + { + blobs.AddRange(page.Values); + } + + // Assert + Assert.AreEqual(1, blobs.Count); + Assert.IsNull(blobs[0].IsLatestVersion); + Assert.AreEqual(createResponse.Value.VersionId, blobs[0].VersionId); + } + + [Ignore("Feature not supported in current test environment")] + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2019_12_12)] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_ObjectReplication() + { + // TODO: The tests will temporarily use designated account, containers and blobs to check the + // existence of OR Metadata + BlobServiceClient sourceServiceClient = GetServiceClient_SharedKey(); + + // This is a recorded ONLY test with a special container we previously setup, as we can't auto setup policies yet + BlobContainerClient sourceContainer = InstrumentClient(sourceServiceClient.GetBlobContainerClient("test1")); + + // Act + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true + }; + IList blobs = await sourceContainer.GetBlobsAsync(options: options).ToListAsync(); + + // Assert + // Since this is a PLAYBACK ONLY test. We expect all the blobs in this source container/account + // to have OrMetadata + Assert.IsNotNull(blobs.First().ObjectReplicationSourceProperties); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2020_02_10)] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_LastAccessed() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + await SetUpContainerForListing(test.Container); + + // Act + var blobs = new List(); + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true + }; + await foreach (Page page in test.Container.GetBlobsAsync(options: options).AsPages()) + { + blobs.AddRange(page.Values); + } + + // Assert + Assert.AreNotEqual(DateTimeOffset.MinValue, blobs.FirstOrDefault().Properties.LastAccessedOn); + } + + [Ignore("Feature not supported in current test environment")] + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2020_10_02)] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_DeletedWithVersions() + { + // Arrange + await using DisposingContainer test = await GetTestContainerAsync(); + + AppendBlobClient blob = InstrumentClient(test.Container.GetAppendBlobClient(GetNewBlobName())); + Response createResponse = await blob.CreateAsync(); + IDictionary metadata = BuildMetadata(); + Response setMetadataResponse = await blob.SetMetadataAsync(metadata); + await blob.DeleteAsync(); + + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true, + States = BlobStates.DeletedWithVersions + }; + + // Act + List blobItems = new List(); + await foreach (BlobItem blobItem in test.Container.GetBlobsAsync(options)) + { + blobItems.Add(blobItem); + } + + // Assert + Assert.AreEqual(1, blobItems.Count); + Assert.AreEqual(blob.Name, blobItems[0].Name); + Assert.IsTrue(blobItems[0].HasVersionsOnly); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2021_02_12)] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_EncodedBlobName() + { + // Arrange + await using DisposingContainer test = await GetTestContainerAsync(); + string blobName = "dir1/dir2/file\uFFFF.blob"; + AppendBlobClient blob = InstrumentClient(test.Container.GetAppendBlobClient(blobName)); + await blob.CreateAsync(); + + // Act + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true + }; + BlobItem blobItem = await test.Container.GetBlobsAsync(options: options).FirstAsync(); + + // Assert + Assert.AreEqual(blobName, blobItem.Name); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_02_06)] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_StartFrom() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + await SetUpContainerForListing(test.Container); + + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true, + StartFrom = "foo" + }; + + // Act + IList blobs = await test.Container.GetBlobsAsync(options: options).ToListAsync(); + + // Assert + Assert.AreEqual(3, blobs.Count); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_02_06)] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_EndBefore() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + await SetUpContainerForListing(test.Container); + + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true, + EndBefore = "foo" + }; + + // Act + IList blobs = await test.Container.GetBlobsAsync(options: options).ToListAsync(); + + // Assert + Assert.AreEqual(5, blobs.Count); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_02_06)] + public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_StartFromEndBefore() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + await SetUpContainerForListing(test.Container); + + GetBlobsOptions options = new GetBlobsOptions + { + UseApacheArrow = true, + StartFrom = "foo", + EndBefore = "foo/foo" + }; + + // Act + IList blobs = await test.Container.GetBlobsAsync(options: options).ToListAsync(); + + // Assert + Assert.AreEqual(2, blobs.Count); + } + [RecordedTest] [PlaybackOnly("Service bug - https://github.com/Azure/azure-sdk-for-net/issues/16516")] public async Task ListBlobsHierarchySegmentAsync() @@ -3389,6 +3837,54 @@ public async Task ListBlobsHierarchySegmentAsync_StartFrom() Assert.AreEqual(3, blobHierachyItems.Count); } + [RecordedTest] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + await SetUpContainerForListing(test.Container); + + var blobs = new List(); + var prefixes = new List(); + var delimiter = "/"; + + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true, + Delimiter = delimiter + }; + + await foreach (Page page in test.Container.GetBlobsByHierarchyAsync(options: options).AsPages()) + { + blobs.AddRange(page.Values.Where(item => item.IsBlob).Select(item => item.Blob)); + prefixes.AddRange(page.Values.Where(item => item.IsPrefix).Select(item => item.Prefix)); + } + + Assert.AreEqual(3, blobs.Count); + Assert.AreEqual(2, prefixes.Count); + + var foundBlobNames = blobs.Select(blob => blob.Name).ToArray(); + var foundBlobPrefixes = prefixes.ToArray(); + IEnumerable expectedPrefixes = + BlobNames + .Where(blobName => blobName.Contains(delimiter)) + .Select(blobName => blobName.Split(new[] { delimiter[0] })[0] + delimiter) + .Distinct() + ; + + Assert.IsTrue( + BlobNames + .Where(blobName => !blobName.Contains(delimiter)) + .All(blobName => foundBlobNames.Contains(blobName)) + ); + + Assert.IsTrue( + expectedPrefixes + .All(prefix => foundBlobPrefixes.Contains(prefix)) + ); + } + [RecordedTest] public async Task UploadBlobAsync() { From 1f51539717b50770e17b516709585939adf0636e Mon Sep 17 00:00:00 2001 From: nickliu-msft Date: Mon, 30 Mar 2026 17:55:01 -0400 Subject: [PATCH 6/9] Tests for Listblob Hierarchy via Arrow --- .../src/BlobContainerClient.cs | 12 +- .../GetBlobsByHierarchyAsyncCollection.cs | 4 +- .../tests/ClientBuilderExtensions.cs | 3 + .../tests/ContainerClientTests.cs | 587 +++++++++++++++++- 4 files changed, 584 insertions(+), 22 deletions(-) diff --git a/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs b/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs index 63794182cf28..d99fb39bf71c 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs +++ b/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs @@ -3008,14 +3008,14 @@ private async Task ParseArrowListBlobsFlatResponse creationTime: creationTimeCol?.GetTimestamp(i), lastModified: lastModifiedCol?.GetTimestamp(i) ?? default, etag: etagCol?.GetString(i), - contentLength: ReadNullableInt64(contentLengthCol, i), + contentLength: ReadNullableLong(contentLengthCol, i), contentType: contentTypeCol?.GetString(i), contentEncoding: contentEncodingCol?.GetString(i), contentLanguage: contentLanguageCol?.GetString(i), contentMD5: contentMD5, contentDisposition: contentDispositionCol?.GetString(i), cacheControl: cacheControlCol?.GetString(i), - blobSequenceNumber: ReadNullableInt64(blobSequenceNumberCol, i), + blobSequenceNumber: ReadNullableLong(blobSequenceNumberCol, i), blobType: ReadEnum(blobTypeCol, i, s => s.ToBlobType()), leaseStatus: ReadEnum(leaseStatusCol, i, s => s.ToLeaseStatus()), leaseState: ReadEnum(leaseStateCol, i, s => s.ToLeaseState()), @@ -3030,7 +3030,7 @@ private async Task ParseArrowListBlobsFlatResponse incrementalCopy: ReadNullableBool(incrementalCopyCol, i), destinationSnapshot: destinationSnapshotCol?.GetString(i), deletedTime: deletedTimeCol?.GetTimestamp(i), - remainingRetentionDays: ReadNullableInt32(remainingRetentionDaysCol, i), + remainingRetentionDays: ReadNullableInt(remainingRetentionDaysCol, i), accessTier: ReadEnum(accessTierCol, i, s => new AccessTier(s)), accessTierInferred: ReadNullableBool(accessTierInferredCol, i), archiveStatus: ReadEnum(archiveStatusCol, i, s => s.ToArchiveStatus()), @@ -3038,7 +3038,7 @@ private async Task ParseArrowListBlobsFlatResponse customerProvidedKeySha256: customerProvidedKeySha256Col?.GetString(i), encryptionScope: encryptionScopeCol?.GetString(i), accessTierChangeTime: accessTierChangeTimeCol?.GetTimestamp(i), - tagCount: ReadNullableInt32(tagCountCol, i), + tagCount: ReadNullableInt(tagCountCol, i), expiresOn: null, isSealed: ReadNullableBool(sealedCol, i), rehydratePriority: ReadEnum(rehydratePriorityCol, i, s => s.ToRehydratePriority().Value), @@ -3115,12 +3115,12 @@ private static IReadOnlyDictionary ReadArrowMap(MapArray mapArra return array != null && !array.IsNull(index) ? (bool?)array.GetValue(index) : null; } - private static long? ReadNullableInt64(UInt64Array array, int index) + private static long? ReadNullableLong(UInt64Array array, int index) { return array != null && !array.IsNull(index) ? (long?)array.GetValue(index) : null; } - private static int? ReadNullableInt32(UInt64Array array, int index) + private static int? ReadNullableInt(UInt64Array array, int index) { return array != null && !array.IsNull(index) ? (int?)array.GetValue(index) : null; } diff --git a/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsByHierarchyAsyncCollection.cs b/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsByHierarchyAsyncCollection.cs index b446b6cead4e..95b5f7a972bd 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsByHierarchyAsyncCollection.cs +++ b/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsByHierarchyAsyncCollection.cs @@ -13,13 +13,13 @@ namespace Azure.Storage.Blobs.Models internal class GetBlobsByHierarchyAsyncCollection : StorageCollectionEnumerator { private readonly BlobContainerClient _client; + private readonly bool _useApacheArrow; private readonly BlobTraits _traits; private readonly BlobStates _states; private readonly string _delimiter; private readonly string _prefix; private readonly string _startFrom; private readonly string _endBefore; - private readonly bool _useApacheArrow; public GetBlobsByHierarchyAsyncCollection( BlobContainerClient client, @@ -32,13 +32,13 @@ public GetBlobsByHierarchyAsyncCollection( string endBefore) { _client = client; + _useApacheArrow = useApacheArrow; _delimiter = delimiter; _traits = traits; _states = states; _prefix = prefix; _startFrom = startFrom; _endBefore = endBefore; - _useApacheArrow = useApacheArrow; } public override async ValueTask> GetNextPageAsync( diff --git a/sdk/storage/Azure.Storage.Blobs/tests/ClientBuilderExtensions.cs b/sdk/storage/Azure.Storage.Blobs/tests/ClientBuilderExtensions.cs index 5a3cb6859e03..b348a0c7d634 100644 --- a/sdk/storage/Azure.Storage.Blobs/tests/ClientBuilderExtensions.cs +++ b/sdk/storage/Azure.Storage.Blobs/tests/ClientBuilderExtensions.cs @@ -70,6 +70,9 @@ public static BlobServiceClient GetServiceClient_Hns(this BlobsClientBuilder cli public static BlobServiceClient GetServiceClient_SoftDelete(this BlobsClientBuilder clientBuilder) => clientBuilder.GetServiceClientFromSharedKeyConfig(clientBuilder.Tenants.TestConfigSoftDelete); + public static BlobServiceClient GetServiceClient_SoftDelete_OAuth(this BlobsClientBuilder clientBuilder, TokenCredential tokenCredential) => + clientBuilder.GetServiceClientFromOauthConfig(clientBuilder.Tenants.TestConfigSoftDelete, tokenCredential); + public static async Task GetTestContainerAsync( this BlobsClientBuilder clientBuilder, BlobServiceClient service = default, diff --git a/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs b/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs index 2d05d9900ca3..671bf364b5dc 100644 --- a/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs +++ b/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs @@ -2827,6 +2827,7 @@ public async Task ListBlobsFlatSegmentAsync_StartFrom() } [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow() { await using DisposingContainer test = await GetTestContainerAsync(); @@ -2854,7 +2855,7 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow() } [RecordedTest] - [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2019_12_12)] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Tags() { // Arrange @@ -2883,7 +2884,7 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Tags() [Ignore("Feature not supported in current test environment")] [RecordedTest] - [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2019_12_12)] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] [TestCase(null)] [TestCase(RehydratePriority.Standard)] [TestCase(RehydratePriority.High)] @@ -2919,7 +2920,7 @@ await blockBlob.SetAccessTierAsync( } [RecordedTest] - [AsyncOnly] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_MaxResults() { await using DisposingContainer test = await GetTestContainerAsync(); @@ -2940,6 +2941,7 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_MaxResults() } [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Metadata() { await using DisposingContainer test = await GetTestContainerAsync(); @@ -2964,7 +2966,7 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Metadata() [Ignore("Feature not supported in current test environment")] [RecordedTest] - [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2019_07_07)] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_EncryptionScope() { await using DisposingContainer test = await GetTestContainerAsync(); @@ -2987,10 +2989,11 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_EncryptionScope() } [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Deleted() { // Arrange - BlobServiceClient blobServiceClient = BlobsClientBuilder.GetServiceClient_SoftDelete(); + BlobServiceClient blobServiceClient = BlobsClientBuilder.GetServiceClient_SoftDelete_OAuth(TestEnvironment.Credential); await using DisposingContainer test = await GetTestContainerAsync(blobServiceClient); string blobName = GetNewBlobName(); AppendBlobClient blob = InstrumentClient(test.Container.GetAppendBlobClient(blobName)); @@ -3012,6 +3015,7 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Deleted() } [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Uncommited() { await using DisposingContainer test = await GetTestContainerAsync(); @@ -3044,6 +3048,7 @@ await blob.StageBlockAsync( } [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Snapshot() { await using DisposingContainer test = await GetTestContainerAsync(); @@ -3068,6 +3073,7 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Snapshot() } [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Prefix() { await using DisposingContainer test = await GetTestContainerAsync(); @@ -3089,6 +3095,7 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Prefix() } [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_Error() { // Arrange @@ -3107,6 +3114,7 @@ await TestHelper.AssertExpectedExceptionAsync( } [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_PreservesWhitespace() { await VerifyBlobNameWhitespaceRoundtrips(" prefix"); @@ -3129,7 +3137,7 @@ async Task VerifyBlobNameWhitespaceRoundtrips(string blobName) } [RecordedTest] - [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2019_12_12)] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_VersionId() { await using DisposingContainer test = await GetTestContainerAsync(); @@ -3159,9 +3167,9 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_VersionId() Assert.AreEqual(createResponse.Value.VersionId, blobs[0].VersionId); } - [Ignore("Feature not supported in current test environment")] + [PlaybackOnly("Object Replication policies is only enabled on certain storage accounts")] [RecordedTest] - [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2019_12_12)] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_ObjectReplication() { // TODO: The tests will temporarily use designated account, containers and blobs to check the @@ -3185,7 +3193,7 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_ObjectReplication() } [RecordedTest] - [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2020_02_10)] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_LastAccessed() { await using DisposingContainer test = await GetTestContainerAsync(); @@ -3210,7 +3218,7 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_LastAccessed() [Ignore("Feature not supported in current test environment")] [RecordedTest] - [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2020_10_02)] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_DeletedWithVersions() { // Arrange @@ -3242,7 +3250,7 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_DeletedWithVersions() } [RecordedTest] - [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2021_02_12)] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_EncodedBlobName() { // Arrange @@ -3263,7 +3271,7 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_EncodedBlobName() } [RecordedTest] - [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_02_06)] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_StartFrom() { await using DisposingContainer test = await GetTestContainerAsync(); @@ -3285,7 +3293,7 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_StartFrom() } [RecordedTest] - [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_02_06)] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_EndBefore() { await using DisposingContainer test = await GetTestContainerAsync(); @@ -3307,7 +3315,7 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_EndBefore() } [RecordedTest] - [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_02_06)] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_StartFromEndBefore() { await using DisposingContainer test = await GetTestContainerAsync(); @@ -3838,6 +3846,7 @@ public async Task ListBlobsHierarchySegmentAsync_StartFrom() } [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow() { await using DisposingContainer test = await GetTestContainerAsync(); @@ -3885,6 +3894,556 @@ public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow() ); } + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_Tags() + { + // Arrange + await using DisposingContainer test = await GetTestContainerAsync(); + AppendBlobClient appendBlob = InstrumentClient(test.Container.GetAppendBlobClient(GetNewBlobName())); + IDictionary tags = BuildTags(); + AppendBlobCreateOptions options = new AppendBlobCreateOptions + { + Tags = tags + }; + await appendBlob.CreateAsync(options); + + GetBlobsByHierarchyOptions getBlobsByHierarchyOptions = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true, + Traits = BlobTraits.Tags + }; + + // Act + IList blobHierachyItems = await test.Container.GetBlobsByHierarchyAsync(options: getBlobsByHierarchyOptions).ToListAsync(); + + // Assert + AssertDictionaryEquality(tags, blobHierachyItems[0].Blob.Tags); + Assert.AreEqual(tags.Count, blobHierachyItems[0].Blob.Properties.TagCount); + } + + [Ignore("Feature not supported in current test environment")] + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + [TestCase(null)] + [TestCase(RehydratePriority.Standard)] + [TestCase(RehydratePriority.High)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_RehydratePriority(RehydratePriority? rehydratePriority) + { + // Arrange + await using DisposingContainer test = await GetTestContainerAsync(); + + BlockBlobClient blockBlob = InstrumentClient(test.Container.GetBlockBlobClient(GetNewBlobName())); + byte[] data = GetRandomBuffer(Constants.KB); + using Stream stream = new MemoryStream(data); + await blockBlob.UploadAsync(stream); + + if (rehydratePriority.HasValue) + { + await blockBlob.SetAccessTierAsync( + AccessTier.Archive); + + await blockBlob.SetAccessTierAsync( + AccessTier.Hot, + rehydratePriority: rehydratePriority.Value); + } + + // Act + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true + }; + IList blobItems = await test.Container.GetBlobsByHierarchyAsync(options: options).ToListAsync(); + + // Assert + Assert.AreEqual(rehydratePriority, blobItems[0].Blob.Properties.RehydratePriority); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_MaxResults() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + await SetUpContainerForListing(test.Container); + + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true, + Delimiter = "/" + }; + + // Act + Page page = await test.Container.GetBlobsByHierarchyAsync(options: options) + .AsPages(pageSizeHint: 2) + .FirstAsync(); + + // Assert + Assert.AreEqual(2, page.Values.Count); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_Metadata() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + AppendBlobClient blob = InstrumentClient(test.Container.GetAppendBlobClient(GetNewBlobName())); + IDictionary metadata = BuildMetadata(); + await blob.CreateIfNotExistsAsync(metadata: metadata); + + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true, + Traits = BlobTraits.Metadata + }; + + // Act + BlobHierarchyItem item = await test.Container.GetBlobsByHierarchyAsync(options: options).FirstAsync(); + + // Assert + AssertDictionaryEquality(metadata, item.Blob.Metadata); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_Metadata_NoMetadata() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + AppendBlobClient blob = InstrumentClient(test.Container.GetAppendBlobClient(GetNewBlobName())); + await blob.CreateAsync(); + + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true, + Traits = BlobTraits.Metadata + }; + + // Act + BlobHierarchyItem item = await test.Container.GetBlobsByHierarchyAsync(options: options).FirstAsync(); + + // Assert + Assert.IsNotNull(item.Blob.Metadata); + Assert.AreEqual(0, item.Blob.Metadata.Count); + } + + [Ignore("Feature not supported in current test environment")] + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_EncryptionScope() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + AppendBlobClient blob = InstrumentClient(test.Container.GetAppendBlobClient(GetNewBlobName())); + blob = InstrumentClient(blob.WithEncryptionScope(TestConfigDefault.EncryptionScope)); + await blob.CreateIfNotExistsAsync(); + + // Act + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true + }; + BlobHierarchyItem item = await test.Container.GetBlobsByHierarchyAsync(options: options).FirstAsync(); + + // Assert + Assert.AreEqual(TestConfigDefault.EncryptionScope, item.Blob.Properties.EncryptionScope); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_Deleted() + { + // Arrange + BlobServiceClient blobServiceClient = BlobsClientBuilder.GetServiceClient_SoftDelete_OAuth(TestEnvironment.Credential); + await using DisposingContainer test = await GetTestContainerAsync(blobServiceClient); + string blobName = GetNewBlobName(); + AppendBlobClient blob = InstrumentClient(test.Container.GetAppendBlobClient(blobName)); + await blob.CreateAsync(); + await blob.DeleteAsync(); + + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true, + States = BlobStates.Deleted + }; + + // Act + IList blobs = await test.Container.GetBlobsByHierarchyAsync(options: options).ToListAsync(); + + // Assert + Assert.AreEqual(blobName, blobs[0].Blob.Name); + Assert.IsTrue(blobs[0].Blob.Deleted); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_Uncommited() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + var blobName = GetNewBlobName(); + BlockBlobClient blob = InstrumentClient(test.Container.GetBlockBlobClient(blobName)); + var data = GetRandomBuffer(Constants.KB); + var blockId = ToBase64(GetNewBlockName()); + + using (var stream = new MemoryStream(data)) + { + await blob.StageBlockAsync( + base64BlockId: blockId, + content: stream); + } + + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true, + States = BlobStates.Uncommitted + }; + + // Act + IList blobs = await test.Container.GetBlobsByHierarchyAsync(options: options).ToListAsync(); + + // Assert + Assert.AreEqual(1, blobs.Count); + Assert.AreEqual(blobName, blobs.First().Blob.Name); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_Snapshot() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + AppendBlobClient blob = InstrumentClient(test.Container.GetAppendBlobClient(GetNewBlobName())); + await blob.CreateIfNotExistsAsync(); + Response snapshotResponse = await blob.CreateSnapshotAsync(); + + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true, + States = BlobStates.Snapshots + }; + + // Act + IList blobs = await test.Container.GetBlobsByHierarchyAsync(options: options).ToListAsync(); + + // Assert + Assert.AreEqual(2, blobs.Count); + Assert.AreEqual(snapshotResponse.Value.Snapshot.ToString(), blobs.First().Blob.Snapshot); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_VersionId() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + AppendBlobClient blob = InstrumentClient(test.Container.GetAppendBlobClient(GetNewBlobName())); + Response createResponse = await blob.CreateAsync(); + IDictionary metadata = BuildMetadata(); + Response setMetadataResponse = await blob.SetMetadataAsync(metadata); + + // Act + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true, + States = BlobStates.Version + }; + + var blobs = new List(); + await foreach (Page page in test.Container.GetBlobsByHierarchyAsync(options: options).AsPages()) + { + blobs.AddRange(page.Values); + } + + // Assert + Assert.AreEqual(1, blobs.Count); + Assert.IsNull(blobs[0].Blob.IsLatestVersion); + Assert.AreEqual(createResponse.Value.VersionId, blobs[0].Blob.VersionId); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_Prefix() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + await SetUpContainerForListing(test.Container); + + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true, + Prefix = "foo" + }; + + // Act + IList blobs = await test.Container.GetBlobsByHierarchyAsync(options: options).ToListAsync(); + + // Assert + Assert.AreEqual(3, blobs.Count); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_Error() + { + // Arrange + BlobServiceClient service = GetServiceClient_SharedKey(); + BlobContainerClient container = InstrumentClient(service.GetBlobContainerClient(GetNewContainerName())); + var id = Recording.Random.NewGuid().ToString(); + + // Act + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true + }; + await TestHelper.AssertExpectedExceptionAsync( + container.GetBlobsByHierarchyAsync(options: options).ToListAsync(), + e => Assert.AreEqual("ContainerNotFound", e.ErrorCode)); + } + + [PlaybackOnly("Object Replication policies is only enabled on certain storage accounts")] + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_ObjectReplication() + { + // TODO: The tests will temporarily use designated account, containers and blobs to check the + // existence of OR Metadata + BlobServiceClient sourceServiceClient = GetServiceClient_SharedKey(); + + // This is a recorded ONLY test with a special container we previously setup, as we can't auto setup policies yet + BlobContainerClient sourceContainer = InstrumentClient(sourceServiceClient.GetBlobContainerClient("test1")); + + // Act + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true + }; + BlobHierarchyItem item = await sourceContainer.GetBlobsByHierarchyAsync(options: options).FirstAsync(); + + // Assert + // Since this is a PLAYBACK ONLY test. We expect all the blobs in this source container/account + // to have OrMetadata + Assert.IsNotNull(item.Blob.ObjectReplicationSourceProperties); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_LastAccessed() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + BlockBlobClient blob = InstrumentClient(test.Container.GetBlockBlobClient(GetNewBlobName())); + var data = GetRandomBuffer(Constants.KB); + using Stream stream = new MemoryStream(data); + await blob.UploadAsync(content: stream); + + // Act + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true + }; + BlobHierarchyItem item = await test.Container.GetBlobsByHierarchyAsync(options: options).FirstAsync(); + + // Assert + Assert.IsNotNull(item.Blob.Properties.LastAccessedOn); + } + + [Ignore("Feature not supported in current test environment")] + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_DeletedWithVersions() + { + // Arrange + await using DisposingContainer test = await GetTestContainerAsync(); + + AppendBlobClient blob = InstrumentClient(test.Container.GetAppendBlobClient(GetNewBlobName())); + Response createResponse = await blob.CreateAsync(); + IDictionary metadata = BuildMetadata(); + Response setMetadataResponse = await blob.SetMetadataAsync(metadata); + await blob.DeleteAsync(); + + // Act + List blobHierarchyItems = new List(); + + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true, + States = BlobStates.DeletedWithVersions + }; + + await foreach (BlobHierarchyItem blobItem in test.Container.GetBlobsByHierarchyAsync(options: options)) + { + blobHierarchyItems.Add(blobItem); + } + + // Assert + Assert.AreEqual(1, blobHierarchyItems.Count); + Assert.AreEqual(blob.Name, blobHierarchyItems[0].Blob.Name); + Assert.IsTrue(blobHierarchyItems[0].Blob.HasVersionsOnly); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + [TestCase(false)] + [TestCase(true)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_EncodedBlobName(bool delimiter) + { + // Arrange + await using DisposingContainer test = await GetTestContainerAsync(); + string blobName = "dir1/dir2/file\uFFFF.blob"; + AppendBlobClient blob = InstrumentClient(test.Container.GetAppendBlobClient(blobName)); + await blob.CreateAsync(); + + // Act + BlobHierarchyItem item; + if (delimiter) + { + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true + }; + item = await test.Container.GetBlobsByHierarchyAsync(options: options).FirstAsync(); + + // Assert + Assert.IsTrue(item.IsBlob); + Assert.AreEqual(blobName, item.Blob.Name); + } + else + { + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true, + Delimiter = ".b" + }; + + item = await test.Container.GetBlobsByHierarchyAsync( + options: options).FirstAsync(); + + // Assert + Assert.IsTrue(item.IsPrefix); + Assert.AreEqual("dir1/dir2/file\uffff.b", item.Prefix); + } + } + + [Ignore("Feature not supported in current test environment")] + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_VersionPrefixDelimiter() + { + // Arrange + await using DisposingContainer test = await GetTestContainerAsync(); + await SetUpContainerForListing(test.Container); + + var blobs = new List(); + var prefixes = new List(); + + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true, + States = BlobStates.Version, + Delimiter = "/", + Prefix = "baz" + }; + + await foreach (BlobHierarchyItem blobItem in test.Container.GetBlobsByHierarchyAsync( + options: options)) + { + if (blobItem.IsBlob) + { + blobs.Add(blobItem.Blob); + } + else + { + prefixes.Add(blobItem.Prefix); + } + } + + Assert.AreEqual(1, blobs.Count); + Assert.AreEqual(1, prefixes.Count); + + Assert.AreEqual("baz", blobs[0].Name); + Assert.IsNotNull(blobs[0].VersionId); + + Assert.AreEqual("baz/", prefixes[0]); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_StartFrom() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + await SetUpContainerForListing(test.Container); + + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true, + StartFrom = "foo" + }; + + // Act + IList blobHierachyItems = await test.Container.GetBlobsByHierarchyAsync(options: options).ToListAsync(); + + // Assert + Assert.AreEqual(3, blobHierachyItems.Count); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_EndBefore() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + await SetUpContainerForListing(test.Container); + + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true, + EndBefore = "foo" + }; + + // Act + IList blobHierachyItems = await test.Container.GetBlobsByHierarchyAsync(options: options).ToListAsync(); + + // Assert + Assert.AreEqual(5, blobHierachyItems.Count); + } + + [RecordedTest] + [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] + public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_StartFromEndBefore() + { + await using DisposingContainer test = await GetTestContainerAsync(); + + // Arrange + await SetUpContainerForListing(test.Container); + + GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions + { + UseApacheArrow = true, + StartFrom = "foo", + EndBefore = "foo/foo" + }; + + // Act + IList blobHierachyItems = await test.Container.GetBlobsByHierarchyAsync(options: options).ToListAsync(); + + // Assert + Assert.AreEqual(2, blobHierachyItems.Count); + } + [RecordedTest] public async Task UploadBlobAsync() { From 9ef5eae48d6a32b54b53405db201fbef3fcd8bc2 Mon Sep 17 00:00:00 2001 From: nickliu-msft Date: Tue, 31 Mar 2026 15:14:23 -0400 Subject: [PATCH 7/9] Exported APIs + updated autorest + some test changes --- .../api/Azure.Storage.Blobs.net10.0.cs | 4 +++ .../api/Azure.Storage.Blobs.net8.0.cs | 4 +++ .../api/Azure.Storage.Blobs.netstandard2.0.cs | 4 +++ .../api/Azure.Storage.Blobs.netstandard2.1.cs | 4 +++ .../src/BlobContainerClient.cs | 1 - .../src/Models/GetBlobsAsyncCollection.cs | 4 +-- .../Azure.Storage.Blobs/src/autorest.md | 2 +- .../tests/ContainerClientTests.cs | 34 +++++++++++++------ 8 files changed, 42 insertions(+), 15 deletions(-) diff --git a/sdk/storage/Azure.Storage.Blobs/api/Azure.Storage.Blobs.net10.0.cs b/sdk/storage/Azure.Storage.Blobs/api/Azure.Storage.Blobs.net10.0.cs index b4377602322d..36acdd95455a 100644 --- a/sdk/storage/Azure.Storage.Blobs/api/Azure.Storage.Blobs.net10.0.cs +++ b/sdk/storage/Azure.Storage.Blobs/api/Azure.Storage.Blobs.net10.0.cs @@ -1299,18 +1299,22 @@ public partial class GetBlobsByHierarchyOptions { public GetBlobsByHierarchyOptions() { } public string Delimiter { get { throw null; } set { } } + public string EndBefore { get { throw null; } set { } } public string Prefix { get { throw null; } set { } } public string StartFrom { get { throw null; } set { } } public Azure.Storage.Blobs.Models.BlobStates States { get { throw null; } set { } } public Azure.Storage.Blobs.Models.BlobTraits Traits { get { throw null; } set { } } + public bool UseApacheArrow { get { throw null; } set { } } } public partial class GetBlobsOptions { public GetBlobsOptions() { } + public string EndBefore { get { throw null; } set { } } public string Prefix { get { throw null; } set { } } public string StartFrom { get { throw null; } set { } } public Azure.Storage.Blobs.Models.BlobStates States { get { throw null; } set { } } public Azure.Storage.Blobs.Models.BlobTraits Traits { get { throw null; } set { } } + public bool UseApacheArrow { get { throw null; } set { } } } public partial class GetBlobTagResult { diff --git a/sdk/storage/Azure.Storage.Blobs/api/Azure.Storage.Blobs.net8.0.cs b/sdk/storage/Azure.Storage.Blobs/api/Azure.Storage.Blobs.net8.0.cs index b4377602322d..36acdd95455a 100644 --- a/sdk/storage/Azure.Storage.Blobs/api/Azure.Storage.Blobs.net8.0.cs +++ b/sdk/storage/Azure.Storage.Blobs/api/Azure.Storage.Blobs.net8.0.cs @@ -1299,18 +1299,22 @@ public partial class GetBlobsByHierarchyOptions { public GetBlobsByHierarchyOptions() { } public string Delimiter { get { throw null; } set { } } + public string EndBefore { get { throw null; } set { } } public string Prefix { get { throw null; } set { } } public string StartFrom { get { throw null; } set { } } public Azure.Storage.Blobs.Models.BlobStates States { get { throw null; } set { } } public Azure.Storage.Blobs.Models.BlobTraits Traits { get { throw null; } set { } } + public bool UseApacheArrow { get { throw null; } set { } } } public partial class GetBlobsOptions { public GetBlobsOptions() { } + public string EndBefore { get { throw null; } set { } } public string Prefix { get { throw null; } set { } } public string StartFrom { get { throw null; } set { } } public Azure.Storage.Blobs.Models.BlobStates States { get { throw null; } set { } } public Azure.Storage.Blobs.Models.BlobTraits Traits { get { throw null; } set { } } + public bool UseApacheArrow { get { throw null; } set { } } } public partial class GetBlobTagResult { diff --git a/sdk/storage/Azure.Storage.Blobs/api/Azure.Storage.Blobs.netstandard2.0.cs b/sdk/storage/Azure.Storage.Blobs/api/Azure.Storage.Blobs.netstandard2.0.cs index 2e9275801452..53302bc9d7e6 100644 --- a/sdk/storage/Azure.Storage.Blobs/api/Azure.Storage.Blobs.netstandard2.0.cs +++ b/sdk/storage/Azure.Storage.Blobs/api/Azure.Storage.Blobs.netstandard2.0.cs @@ -1299,18 +1299,22 @@ public partial class GetBlobsByHierarchyOptions { public GetBlobsByHierarchyOptions() { } public string Delimiter { get { throw null; } set { } } + public string EndBefore { get { throw null; } set { } } public string Prefix { get { throw null; } set { } } public string StartFrom { get { throw null; } set { } } public Azure.Storage.Blobs.Models.BlobStates States { get { throw null; } set { } } public Azure.Storage.Blobs.Models.BlobTraits Traits { get { throw null; } set { } } + public bool UseApacheArrow { get { throw null; } set { } } } public partial class GetBlobsOptions { public GetBlobsOptions() { } + public string EndBefore { get { throw null; } set { } } public string Prefix { get { throw null; } set { } } public string StartFrom { get { throw null; } set { } } public Azure.Storage.Blobs.Models.BlobStates States { get { throw null; } set { } } public Azure.Storage.Blobs.Models.BlobTraits Traits { get { throw null; } set { } } + public bool UseApacheArrow { get { throw null; } set { } } } public partial class GetBlobTagResult { diff --git a/sdk/storage/Azure.Storage.Blobs/api/Azure.Storage.Blobs.netstandard2.1.cs b/sdk/storage/Azure.Storage.Blobs/api/Azure.Storage.Blobs.netstandard2.1.cs index 2e9275801452..53302bc9d7e6 100644 --- a/sdk/storage/Azure.Storage.Blobs/api/Azure.Storage.Blobs.netstandard2.1.cs +++ b/sdk/storage/Azure.Storage.Blobs/api/Azure.Storage.Blobs.netstandard2.1.cs @@ -1299,18 +1299,22 @@ public partial class GetBlobsByHierarchyOptions { public GetBlobsByHierarchyOptions() { } public string Delimiter { get { throw null; } set { } } + public string EndBefore { get { throw null; } set { } } public string Prefix { get { throw null; } set { } } public string StartFrom { get { throw null; } set { } } public Azure.Storage.Blobs.Models.BlobStates States { get { throw null; } set { } } public Azure.Storage.Blobs.Models.BlobTraits Traits { get { throw null; } set { } } + public bool UseApacheArrow { get { throw null; } set { } } } public partial class GetBlobsOptions { public GetBlobsOptions() { } + public string EndBefore { get { throw null; } set { } } public string Prefix { get { throw null; } set { } } public string StartFrom { get { throw null; } set { } } public Azure.Storage.Blobs.Models.BlobStates States { get { throw null; } set { } } public Azure.Storage.Blobs.Models.BlobTraits Traits { get { throw null; } set { } } + public bool UseApacheArrow { get { throw null; } set { } } } public partial class GetBlobTagResult { diff --git a/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs b/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs index d99fb39bf71c..81828f48576e 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs +++ b/sdk/storage/Azure.Storage.Blobs/src/BlobContainerClient.cs @@ -6,7 +6,6 @@ using System.ComponentModel; using System.IO; using System.Linq; -using System.Runtime.InteropServices.ComTypes; using System.Threading; using System.Threading.Tasks; using System.Xml.Linq; diff --git a/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsAsyncCollection.cs b/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsAsyncCollection.cs index bd0d42929d53..9c24beb94f01 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsAsyncCollection.cs +++ b/sdk/storage/Azure.Storage.Blobs/src/Models/GetBlobsAsyncCollection.cs @@ -15,12 +15,12 @@ namespace Azure.Storage.Blobs.Models internal class GetBlobsAsyncCollection : StorageCollectionEnumerator { private readonly BlobContainerClient _client; + private readonly bool _useApacheArrow; private readonly BlobTraits _traits; private readonly BlobStates _states; private readonly string _prefix; private readonly string _startFrom; private readonly string _endBefore; - private readonly bool _useApacheArrow; public GetBlobsAsyncCollection( BlobContainerClient client, @@ -32,12 +32,12 @@ public GetBlobsAsyncCollection( string endBefore) { _client = client; + _useApacheArrow = useApacheArrow; _traits = traits; _states = states; _prefix = prefix; _startFrom = startFrom; _endBefore = endBefore; - _useApacheArrow = useApacheArrow; } public override async ValueTask> GetNextPageAsync( diff --git a/sdk/storage/Azure.Storage.Blobs/src/autorest.md b/sdk/storage/Azure.Storage.Blobs/src/autorest.md index db1ff4ad7027..96cafcc2e38c 100644 --- a/sdk/storage/Azure.Storage.Blobs/src/autorest.md +++ b/sdk/storage/Azure.Storage.Blobs/src/autorest.md @@ -4,7 +4,7 @@ Run `dotnet build /t:GenerateCode` to generate code. ``` yaml input-file: - - C:\azure-rest-api-specs\specification\storage\data-plane\Microsoft.BlobStorage\stable\2026-10-06\blob.json + - https://raw.githubusercontent.com/nickliu-msft/azure-rest-api-specs/ab1ec63862fdf4506cfb1cdd4c8105281b5de3f0/specification/storage/data-plane/Microsoft.BlobStorage/stable/2026-10-06/blob.json generation1-convenience-client: true # https://github.com/Azure/autorest/issues/4075 skip-semantics-validation: true diff --git a/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs b/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs index 671bf364b5dc..d1907c429111 100644 --- a/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs +++ b/sdk/storage/Azure.Storage.Blobs/tests/ContainerClientTests.cs @@ -2933,11 +2933,19 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_MaxResults() { UseApacheArrow = true }; - Page page = await test.Container.GetBlobsAsync(options: options).AsPages(pageSizeHint: 2).FirstAsync(); + + int numPages = 0; + // Act + await foreach (Page page in test.Container.GetBlobsAsync(options: options) + .AsPages(pageSizeHint: 2)) + { + // Assert + Assert.AreEqual(2, page.Values.Count); + ++numPages; + } // Assert - Assert.AreEqual(2, page.Values.Count); - Assert.IsTrue(page.Values.All(b => b.Metadata.Count == 0)); + Assert.AreEqual(4, numPages); } [RecordedTest] @@ -3167,7 +3175,7 @@ public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_VersionId() Assert.AreEqual(createResponse.Value.VersionId, blobs[0].VersionId); } - [PlaybackOnly("Object Replication policies is only enabled on certain storage accounts")] + [Ignore("Feature not supported in current test environment")] [RecordedTest] [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsFlatSegmentAsync_UseApacheArrow_ObjectReplication() @@ -3970,17 +3978,21 @@ public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_MaxResults() GetBlobsByHierarchyOptions options = new GetBlobsByHierarchyOptions { - UseApacheArrow = true, - Delimiter = "/" + UseApacheArrow = true }; + int numPages = 0; // Act - Page page = await test.Container.GetBlobsByHierarchyAsync(options: options) - .AsPages(pageSizeHint: 2) - .FirstAsync(); + await foreach (Page page in test.Container.GetBlobsByHierarchyAsync(options: options) + .AsPages(pageSizeHint: 2)) + { + // Assert + Assert.AreEqual(2, page.Values.Count); + ++numPages; + } // Assert - Assert.AreEqual(2, page.Values.Count); + Assert.AreEqual(4, numPages); } [RecordedTest] @@ -4210,7 +4222,7 @@ await TestHelper.AssertExpectedExceptionAsync( e => Assert.AreEqual("ContainerNotFound", e.ErrorCode)); } - [PlaybackOnly("Object Replication policies is only enabled on certain storage accounts")] + [Ignore("Feature not supported in current test environment")] [RecordedTest] [ServiceVersion(Min = BlobClientOptions.ServiceVersion.V2026_06_06)] public async Task ListBlobsHierarchySegmentAsync_UseApacheArrow_ObjectReplication() From ee2aae00e60f0c54514db9fb80b07c1f76461ce4 Mon Sep 17 00:00:00 2001 From: nickliu-msft Date: Tue, 31 Mar 2026 15:34:56 -0400 Subject: [PATCH 8/9] Recorded tests --- sdk/storage/Azure.Storage.Blobs/assets.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/storage/Azure.Storage.Blobs/assets.json b/sdk/storage/Azure.Storage.Blobs/assets.json index 78e5eeba71c2..8901a970d80f 100644 --- a/sdk/storage/Azure.Storage.Blobs/assets.json +++ b/sdk/storage/Azure.Storage.Blobs/assets.json @@ -2,5 +2,5 @@ "AssetsRepo": "Azure/azure-sdk-assets", "AssetsRepoPrefixPath": "net", "TagPrefix": "net/storage/Azure.Storage.Blobs", - "Tag": "net/storage/Azure.Storage.Blobs_632dc57c2e" + "Tag": "net/storage/Azure.Storage.Blobs_e7ef41304f" } From 9e0d3aefad092faa4a93d99e3d381518729af74b Mon Sep 17 00:00:00 2001 From: nickliu-msft Date: Tue, 31 Mar 2026 19:22:46 -0400 Subject: [PATCH 9/9] Moved dependecy to new Azure.Storage.Blobs override file --- eng/centralpackagemanagement/Directory.Packages.props | 1 - .../overrides/Azure.Storage.Blobs.Packages.props | 5 +++++ 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 eng/centralpackagemanagement/overrides/Azure.Storage.Blobs.Packages.props diff --git a/eng/centralpackagemanagement/Directory.Packages.props b/eng/centralpackagemanagement/Directory.Packages.props index 3b8bffade67f..0e1499c4e749 100644 --- a/eng/centralpackagemanagement/Directory.Packages.props +++ b/eng/centralpackagemanagement/Directory.Packages.props @@ -179,7 +179,6 @@ - diff --git a/eng/centralpackagemanagement/overrides/Azure.Storage.Blobs.Packages.props b/eng/centralpackagemanagement/overrides/Azure.Storage.Blobs.Packages.props new file mode 100644 index 000000000000..7c33b85bf879 --- /dev/null +++ b/eng/centralpackagemanagement/overrides/Azure.Storage.Blobs.Packages.props @@ -0,0 +1,5 @@ + + + + +