Skip to content

Commit 5513ab2

Browse files
fix(csharp): preserve schema for empty result sets in SEA mode
When executing a query against an empty table via the Statement Execution API, the ResultManifest always contains column schema even when totalRowCount is 0. Previously, CreateReader fell through to EmptyArrowArrayStream which always returned new Schema.Builder().Build() (zero fields), discarding the manifest schema entirely. Follow the JDBC driver pattern (DatabricksResultSetMetaData is always built from ResultManifest independently of data presence) by: - Refactoring GetSchemaFromManifest into a null-returning TryGetSchemaFromManifest helper - Extracting schema from the manifest in the no-data branch and passing it to EmptyArrowArrayStream - Making EmptyArrowArrayStream accept an optional Schema parameter Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent d73d253 commit 5513ab2

File tree

2 files changed

+241
-4
lines changed

2 files changed

+241
-4
lines changed

csharp/src/StatementExecution/StatementExecutionStatement.cs

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -318,8 +318,13 @@ private IArrowArrayStream CreateReader(ExecuteStatementResponse response, Cancel
318318
}
319319
else
320320
{
321-
// No inline data - return empty reader
322-
return new EmptyArrowArrayStream();
321+
// No data rows, but the manifest contains schema information.
322+
// Preserve the schema so callers get correct column metadata even
323+
// when the queried table is empty — following the same pattern as
324+
// the JDBC driver where ResultManifest schema is always extracted
325+
// independently of data presence.
326+
Schema schema = TryGetSchemaFromManifest(response.Manifest) ?? new Schema.Builder().Build();
327+
return new EmptyArrowArrayStream(schema);
323328
}
324329
}
325330

@@ -355,12 +360,24 @@ private IArrowArrayStream CreateCloudFetchReader(ExecuteStatementResponse respon
355360

356361
/// <summary>
357362
/// Extracts the Arrow schema from the result manifest.
363+
/// Throws <see cref="AdbcException"/> if the manifest contains no column definitions.
358364
/// </summary>
359365
private Schema GetSchemaFromManifest(ResultManifest manifest)
366+
{
367+
return TryGetSchemaFromManifest(manifest)
368+
?? throw new AdbcException("Result manifest does not contain schema information");
369+
}
370+
371+
/// <summary>
372+
/// Tries to extract the Arrow schema from the result manifest.
373+
/// Returns <c>null</c> when the manifest contains no column definitions,
374+
/// allowing callers to decide on a fallback (e.g. empty schema for no-data results).
375+
/// </summary>
376+
private Schema? TryGetSchemaFromManifest(ResultManifest manifest)
360377
{
361378
if (manifest.Schema == null || manifest.Schema.Columns == null || manifest.Schema.Columns.Count == 0)
362379
{
363-
throw new AdbcException("Result manifest does not contain schema information");
380+
return null;
364381
}
365382

366383
var fields = new List<Field>();
@@ -525,10 +542,17 @@ public override void Dispose()
525542

526543
/// <summary>
527544
/// Empty Arrow array stream for queries with no results.
545+
/// Accepts an optional schema so that column metadata is preserved
546+
/// even when the result contains zero rows (e.g. querying an empty table).
528547
/// </summary>
529548
private class EmptyArrowArrayStream : IArrowArrayStream
530549
{
531-
public Schema Schema => new Schema.Builder().Build();
550+
public EmptyArrowArrayStream(Schema? schema = null)
551+
{
552+
Schema = schema ?? new Schema.Builder().Build();
553+
}
554+
555+
public Schema Schema { get; }
532556

533557
public ValueTask<RecordBatch?> ReadNextRecordBatchAsync(CancellationToken cancellationToken = default)
534558
{
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
/*
2+
* Copyright (c) 2025 ADBC Drivers Contributors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
using System.Collections.Generic;
18+
using System.Net;
19+
using System.Net.Http;
20+
using System.Text.Json;
21+
using System.Threading;
22+
using System.Threading.Tasks;
23+
using AdbcDrivers.Databricks.StatementExecution;
24+
using AdbcDrivers.HiveServer2;
25+
using AdbcDrivers.HiveServer2.Spark;
26+
using Apache.Arrow;
27+
using Apache.Arrow.Types;
28+
using Microsoft.IO;
29+
using Moq;
30+
using Moq.Protected;
31+
using Xunit;
32+
33+
namespace AdbcDrivers.Databricks.Tests.Unit.StatementExecution
34+
{
35+
/// <summary>
36+
/// Tests that schema metadata is correctly preserved for empty result sets in SEA mode.
37+
///
38+
/// In SEA (Statement Execution API / REST) mode, the schema lives in ResultManifest.schema
39+
/// and must be returned by IArrowArrayStream.Schema even when the result contains zero rows
40+
/// (e.g. SELECT * FROM empty_table). This mirrors the JDBC driver behaviour where
41+
/// DatabricksResultSetMetaData is always constructed from ResultManifest independently
42+
/// of data presence.
43+
/// </summary>
44+
public class StatementExecutionEmptyResultSchemaTests
45+
{
46+
private const string StatementId = "stmt-empty-schema-test";
47+
48+
private static StatementExecutionStatement CreateStatement(
49+
IStatementExecutionClient client)
50+
{
51+
var properties = new Dictionary<string, string>
52+
{
53+
{ SparkParameters.HostName, "test.databricks.com" },
54+
{ DatabricksParameters.WarehouseId, "wh-1" },
55+
{ SparkParameters.AccessToken, "token" },
56+
};
57+
58+
var handlerMock = new Mock<HttpMessageHandler>();
59+
handlerMock.Protected()
60+
.Setup<Task<HttpResponseMessage>>("SendAsync",
61+
ItExpr.IsAny<HttpRequestMessage>(),
62+
ItExpr.IsAny<CancellationToken>())
63+
.ReturnsAsync(new HttpResponseMessage(HttpStatusCode.OK)
64+
{
65+
Content = new StringContent(
66+
JsonSerializer.Serialize(new { session_id = "s1" }))
67+
});
68+
var httpClient = new HttpClient(handlerMock.Object);
69+
70+
var connection = new StatementExecutionConnection(properties, httpClient);
71+
return new StatementExecutionStatement(
72+
client,
73+
sessionId: "session-1",
74+
warehouseId: "wh-1",
75+
catalog: null,
76+
schema: null,
77+
resultDisposition: "INLINE_OR_EXTERNAL_LINKS",
78+
resultFormat: "ARROW_STREAM",
79+
resultCompression: null,
80+
waitTimeoutSeconds: 0,
81+
pollingIntervalMs: 50,
82+
properties: properties,
83+
recyclableMemoryStreamManager: new RecyclableMemoryStreamManager(),
84+
lz4BufferPool: System.Buffers.ArrayPool<byte>.Shared,
85+
httpClient: httpClient,
86+
connection: connection);
87+
}
88+
89+
private static ResultManifest BuildManifest(params (string name, string typeName)[] columns)
90+
{
91+
var columnInfos = new List<ColumnInfo>();
92+
foreach (var (name, typeName) in columns)
93+
{
94+
columnInfos.Add(new ColumnInfo { Name = name, TypeName = typeName });
95+
}
96+
97+
return new ResultManifest
98+
{
99+
Format = "ARROW_STREAM",
100+
Schema = new ResultSchema { Columns = columnInfos },
101+
TotalRowCount = 0,
102+
Chunks = new List<ResultChunk>(),
103+
};
104+
}
105+
106+
[Fact]
107+
public async Task ExecuteQuery_EmptyTable_SchemaContainsCorrectColumns()
108+
{
109+
// Arrange: server returns SUCCEEDED with schema but no data
110+
var manifest = BuildManifest(("id", "INT"), ("name", "STRING"), ("score", "DOUBLE"));
111+
112+
var mockClient = new Mock<IStatementExecutionClient>();
113+
mockClient
114+
.Setup(c => c.ExecuteStatementAsync(
115+
It.IsAny<ExecuteStatementRequest>(),
116+
It.IsAny<CancellationToken>()))
117+
.ReturnsAsync(new ExecuteStatementResponse
118+
{
119+
StatementId = StatementId,
120+
Status = new StatementStatus { State = "SUCCEEDED" },
121+
Manifest = manifest,
122+
Result = new ResultData { Attachment = null },
123+
});
124+
125+
using var stmt = CreateStatement(mockClient.Object);
126+
stmt.SqlQuery = "SELECT id, name, score FROM empty_table";
127+
128+
// Act
129+
var queryResult = await stmt.ExecuteQueryAsync(CancellationToken.None);
130+
131+
// Assert: schema must reflect the table columns, not an empty schema
132+
var stream = queryResult.Stream;
133+
Assert.NotNull(stream);
134+
Assert.Equal(3, stream.Schema.FieldsList.Count);
135+
Assert.Equal("id", stream.Schema.FieldsList[0].Name);
136+
Assert.Equal("name", stream.Schema.FieldsList[1].Name);
137+
Assert.Equal("score", stream.Schema.FieldsList[2].Name);
138+
139+
// No record batches (empty table)
140+
var batch = await stream.ReadNextRecordBatchAsync(CancellationToken.None);
141+
Assert.Null(batch);
142+
}
143+
144+
[Fact]
145+
public async Task ExecuteQuery_EmptyTable_ArrowTypesAreMappedCorrectly()
146+
{
147+
// Arrange: various Databricks SQL types
148+
var manifest = BuildManifest(
149+
("a", "INT"),
150+
("b", "BIGINT"),
151+
("c", "STRING"),
152+
("d", "BOOLEAN"),
153+
("e", "DOUBLE"),
154+
("f", "DATE"),
155+
("g", "TIMESTAMP"));
156+
157+
var mockClient = new Mock<IStatementExecutionClient>();
158+
mockClient
159+
.Setup(c => c.ExecuteStatementAsync(
160+
It.IsAny<ExecuteStatementRequest>(),
161+
It.IsAny<CancellationToken>()))
162+
.ReturnsAsync(new ExecuteStatementResponse
163+
{
164+
StatementId = StatementId,
165+
Status = new StatementStatus { State = "SUCCEEDED" },
166+
Manifest = manifest,
167+
Result = new ResultData { Attachment = null },
168+
});
169+
170+
using var stmt = CreateStatement(mockClient.Object);
171+
stmt.SqlQuery = "SELECT * FROM typed_empty_table";
172+
173+
var queryResult = await stmt.ExecuteQueryAsync(CancellationToken.None);
174+
var fields = queryResult.Stream!.Schema.FieldsList;
175+
176+
Assert.Equal(7, fields.Count);
177+
Assert.IsType<Int32Type>(fields[0].DataType);
178+
Assert.IsType<Int64Type>(fields[1].DataType);
179+
Assert.IsType<StringType>(fields[2].DataType);
180+
Assert.IsType<BooleanType>(fields[3].DataType);
181+
Assert.IsType<DoubleType>(fields[4].DataType);
182+
Assert.IsType<Date32Type>(fields[5].DataType);
183+
Assert.IsType<TimestampType>(fields[6].DataType);
184+
}
185+
186+
[Fact]
187+
public async Task ExecuteQuery_NullManifest_ReturnsEmptySchema()
188+
{
189+
// Arrange: server returns null manifest (no results at all, e.g. DDL)
190+
var mockClient = new Mock<IStatementExecutionClient>();
191+
mockClient
192+
.Setup(c => c.ExecuteStatementAsync(
193+
It.IsAny<ExecuteStatementRequest>(),
194+
It.IsAny<CancellationToken>()))
195+
.ReturnsAsync(new ExecuteStatementResponse
196+
{
197+
StatementId = StatementId,
198+
Status = new StatementStatus { State = "SUCCEEDED" },
199+
Manifest = null,
200+
Result = null,
201+
});
202+
203+
using var stmt = CreateStatement(mockClient.Object);
204+
stmt.SqlQuery = "CREATE TABLE foo (id INT)";
205+
206+
var queryResult = await stmt.ExecuteQueryAsync(CancellationToken.None);
207+
208+
// No schema and no rows — just no exception
209+
Assert.NotNull(queryResult.Stream);
210+
Assert.Empty(queryResult.Stream!.Schema.FieldsList);
211+
}
212+
}
213+
}

0 commit comments

Comments
 (0)