Skip to content

Commit 440f8e8

Browse files
fix(csharp): preserve schema for empty result sets in SEA mode (PECO-2949) (#325)
## Problem When executing a query against an empty table via the Statement Execution API (SEA / REST mode), the `ResultManifest` always contains column schema even when `totalRowCount` is 0. However, `CreateReader` fell through to `EmptyArrowArrayStream` which always returned `new Schema.Builder().Build()` (zero fields), silently discarding the manifest schema. This means callers of `IArrowArrayStream.Schema` received a schema with no columns for any empty-table query in SEA mode, making it impossible to inspect column metadata without actually having data rows. ## Root Cause `EmptyArrowArrayStream` was schema-unaware — it hardcoded an empty schema regardless of context. The `CreateReader` branching logic only extracted schema from the manifest in the CloudFetch and inline-data paths, not in the empty-result fallback path. ## Fix Follow the JDBC driver pattern where `DatabricksResultSetMetaData` is always constructed from `ResultManifest` independently of data presence: - Refactor `GetSchemaFromManifest` into a null-returning `TryGetSchemaFromManifest` helper - In the no-data branch of `CreateReader`, extract schema from manifest and pass it to `EmptyArrowArrayStream` - Make `EmptyArrowArrayStream` accept an optional `Schema` parameter (defaults to empty schema for the null-manifest / DDL case) ## Tests Added `StatementExecutionEmptyResultSchemaTests` with 3 unit tests: - `ExecuteQuery_EmptyTable_SchemaContainsCorrectColumns` — column names preserved for empty table - `ExecuteQuery_EmptyTable_ArrowTypesAreMappedCorrectly` — INT/BIGINT/STRING/BOOLEAN/DOUBLE/DATE/TIMESTAMP all map correctly - `ExecuteQuery_NullManifest_ReturnsEmptySchema` — null manifest (DDL) still returns empty schema without error ## Known CI Failure (Pre-existing) `TelemetryTests.CanEnableFileTracingExporterViaEnvVariable(exporterName: "adbcfile")` fails with the `c078a8ec` version of the hiveserver2 submodule (current `main` baseline). This failure is **unrelated to this PR** — the same test passes in PR #282 which updates the submodule to `e42efb47`. Once PR #282 merges, this failure will be resolved. The failure is caused by a bug in `TelemetryTests.cs` line 69: `Assert.True(string.IsNullOrEmpty(tc.ActivitySourceName))` should be `Assert.False`. The assertion exception is swallowed by the catch block, leaving `activitySourceName` as `""` and the trace file unclosed/unflushed before the length check. Closes PECO-2949 --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 9f7d8ec commit 440f8e8

File tree

3 files changed

+244
-4
lines changed

3 files changed

+244
-4
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,3 +289,6 @@ databricks-test-config.json
289289
# Rust local cargo config (may contain credentials)
290290
rust/.cargo/
291291
generated_task_specs.json
292+
293+
# Git worktrees
294+
.worktrees/

csharp/src/StatementExecution/StatementExecutionStatement.cs

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -405,8 +405,13 @@ private IArrowArrayStream CreateReader(ExecuteStatementResponse response, Cancel
405405
}
406406
else
407407
{
408-
// No inline data - return empty reader
409-
return new EmptyArrowArrayStream();
408+
// No data rows, but the manifest contains schema information.
409+
// Preserve the schema so callers get correct column metadata even
410+
// when the queried table is empty — following the same pattern as
411+
// the JDBC driver where ResultManifest schema is always extracted
412+
// independently of data presence.
413+
Schema schema = TryGetSchemaFromManifest(response.Manifest) ?? new Schema.Builder().Build();
414+
return new EmptyArrowArrayStream(schema);
410415
}
411416
}
412417

@@ -442,12 +447,24 @@ private IArrowArrayStream CreateCloudFetchReader(ExecuteStatementResponse respon
442447

443448
/// <summary>
444449
/// Extracts the Arrow schema from the result manifest.
450+
/// Throws <see cref="AdbcException"/> if the manifest contains no column definitions.
445451
/// </summary>
446452
private Schema GetSchemaFromManifest(ResultManifest manifest)
453+
{
454+
return TryGetSchemaFromManifest(manifest)
455+
?? throw new AdbcException("Result manifest does not contain schema information");
456+
}
457+
458+
/// <summary>
459+
/// Tries to extract the Arrow schema from the result manifest.
460+
/// Returns <c>null</c> when the manifest contains no column definitions,
461+
/// allowing callers to decide on a fallback (e.g. empty schema for no-data results).
462+
/// </summary>
463+
private Schema? TryGetSchemaFromManifest(ResultManifest manifest)
447464
{
448465
if (manifest.Schema == null || manifest.Schema.Columns == null || manifest.Schema.Columns.Count == 0)
449466
{
450-
throw new AdbcException("Result manifest does not contain schema information");
467+
return null;
451468
}
452469

453470
var fields = new List<Field>();
@@ -613,10 +630,17 @@ public override void Dispose()
613630

614631
/// <summary>
615632
/// Empty Arrow array stream for queries with no results.
633+
/// Accepts an optional schema so that column metadata is preserved
634+
/// even when the result contains zero rows (e.g. querying an empty table).
616635
/// </summary>
617636
private class EmptyArrowArrayStream : IArrowArrayStream
618637
{
619-
public Schema Schema => new Schema.Builder().Build();
638+
public EmptyArrowArrayStream(Schema? schema = null)
639+
{
640+
Schema = schema ?? new Schema.Builder().Build();
641+
}
642+
643+
public Schema Schema { get; }
620644

621645
public ValueTask<RecordBatch?> ReadNextRecordBatchAsync(CancellationToken cancellationToken = default)
622646
{
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
/*
2+
* Copyright (c) 2025 ADBC Drivers Contributors
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
using System.Collections.Generic;
18+
using System.Net;
19+
using System.Net.Http;
20+
using System.Text.Json;
21+
using System.Threading;
22+
using System.Threading.Tasks;
23+
using AdbcDrivers.Databricks.StatementExecution;
24+
using AdbcDrivers.HiveServer2;
25+
using AdbcDrivers.HiveServer2.Spark;
26+
using Apache.Arrow;
27+
using Apache.Arrow.Types;
28+
using Microsoft.IO;
29+
using Moq;
30+
using Moq.Protected;
31+
using Xunit;
32+
33+
namespace AdbcDrivers.Databricks.Tests.Unit.StatementExecution
34+
{
35+
/// <summary>
36+
/// Tests that schema metadata is correctly preserved for empty result sets in SEA mode.
37+
///
38+
/// In SEA (Statement Execution API / REST) mode, the schema lives in ResultManifest.schema
39+
/// and must be returned by IArrowArrayStream.Schema even when the result contains zero rows
40+
/// (e.g. SELECT * FROM empty_table). This mirrors the JDBC driver behaviour where
41+
/// DatabricksResultSetMetaData is always constructed from ResultManifest independently
42+
/// of data presence.
43+
/// </summary>
44+
public class StatementExecutionEmptyResultSchemaTests
45+
{
46+
private const string StatementId = "stmt-empty-schema-test";
47+
48+
private static StatementExecutionStatement CreateStatement(
49+
IStatementExecutionClient client)
50+
{
51+
var properties = new Dictionary<string, string>
52+
{
53+
{ SparkParameters.HostName, "test.databricks.com" },
54+
{ DatabricksParameters.WarehouseId, "wh-1" },
55+
{ SparkParameters.AccessToken, "token" },
56+
};
57+
58+
var handlerMock = new Mock<HttpMessageHandler>();
59+
handlerMock.Protected()
60+
.Setup<Task<HttpResponseMessage>>("SendAsync",
61+
ItExpr.IsAny<HttpRequestMessage>(),
62+
ItExpr.IsAny<CancellationToken>())
63+
.ReturnsAsync(new HttpResponseMessage(HttpStatusCode.OK)
64+
{
65+
Content = new StringContent(
66+
JsonSerializer.Serialize(new { session_id = "s1" }))
67+
});
68+
var httpClient = new HttpClient(handlerMock.Object);
69+
70+
var connection = new StatementExecutionConnection(properties, httpClient);
71+
return new StatementExecutionStatement(
72+
client,
73+
sessionId: "session-1",
74+
warehouseId: "wh-1",
75+
catalog: null,
76+
schema: null,
77+
resultDisposition: "INLINE_OR_EXTERNAL_LINKS",
78+
resultFormat: "ARROW_STREAM",
79+
resultCompression: null,
80+
waitTimeoutSeconds: 0,
81+
pollingIntervalMs: 50,
82+
properties: properties,
83+
recyclableMemoryStreamManager: new RecyclableMemoryStreamManager(),
84+
lz4BufferPool: System.Buffers.ArrayPool<byte>.Shared,
85+
httpClient: httpClient,
86+
connection: connection);
87+
}
88+
89+
private static ResultManifest BuildManifest(params (string name, string typeName)[] columns)
90+
{
91+
var columnInfos = new List<ColumnInfo>();
92+
foreach (var (name, typeName) in columns)
93+
{
94+
columnInfos.Add(new ColumnInfo { Name = name, TypeName = typeName });
95+
}
96+
97+
return new ResultManifest
98+
{
99+
Format = "ARROW_STREAM",
100+
Schema = new ResultSchema { Columns = columnInfos },
101+
TotalRowCount = 0,
102+
Chunks = new List<ResultChunk>(),
103+
};
104+
}
105+
106+
[Fact]
107+
public async Task ExecuteQuery_EmptyTable_SchemaContainsCorrectColumns()
108+
{
109+
// Arrange: server returns SUCCEEDED with schema but no data
110+
var manifest = BuildManifest(("id", "INT"), ("name", "STRING"), ("score", "DOUBLE"));
111+
112+
var mockClient = new Mock<IStatementExecutionClient>();
113+
mockClient
114+
.Setup(c => c.ExecuteStatementAsync(
115+
It.IsAny<ExecuteStatementRequest>(),
116+
It.IsAny<CancellationToken>()))
117+
.ReturnsAsync(new ExecuteStatementResponse
118+
{
119+
StatementId = StatementId,
120+
Status = new StatementStatus { State = "SUCCEEDED" },
121+
Manifest = manifest,
122+
Result = new ResultData { Attachment = null },
123+
});
124+
125+
using var stmt = CreateStatement(mockClient.Object);
126+
stmt.SqlQuery = "SELECT id, name, score FROM empty_table";
127+
128+
// Act
129+
var queryResult = await stmt.ExecuteQueryAsync(CancellationToken.None);
130+
131+
// Assert: schema must reflect the table columns, not an empty schema
132+
var stream = queryResult.Stream;
133+
Assert.NotNull(stream);
134+
Assert.Equal(3, stream.Schema.FieldsList.Count);
135+
Assert.Equal("id", stream.Schema.FieldsList[0].Name);
136+
Assert.Equal("name", stream.Schema.FieldsList[1].Name);
137+
Assert.Equal("score", stream.Schema.FieldsList[2].Name);
138+
139+
// No record batches (empty table)
140+
var batch = await stream.ReadNextRecordBatchAsync(CancellationToken.None);
141+
Assert.Null(batch);
142+
}
143+
144+
[Fact]
145+
public async Task ExecuteQuery_EmptyTable_ArrowTypesAreMappedCorrectly()
146+
{
147+
// Arrange: various Databricks SQL types
148+
var manifest = BuildManifest(
149+
("a", "INT"),
150+
("b", "BIGINT"),
151+
("c", "STRING"),
152+
("d", "BOOLEAN"),
153+
("e", "DOUBLE"),
154+
("f", "DATE"),
155+
("g", "TIMESTAMP"));
156+
157+
var mockClient = new Mock<IStatementExecutionClient>();
158+
mockClient
159+
.Setup(c => c.ExecuteStatementAsync(
160+
It.IsAny<ExecuteStatementRequest>(),
161+
It.IsAny<CancellationToken>()))
162+
.ReturnsAsync(new ExecuteStatementResponse
163+
{
164+
StatementId = StatementId,
165+
Status = new StatementStatus { State = "SUCCEEDED" },
166+
Manifest = manifest,
167+
Result = new ResultData { Attachment = null },
168+
});
169+
170+
using var stmt = CreateStatement(mockClient.Object);
171+
stmt.SqlQuery = "SELECT * FROM typed_empty_table";
172+
173+
var queryResult = await stmt.ExecuteQueryAsync(CancellationToken.None);
174+
var fields = queryResult.Stream!.Schema.FieldsList;
175+
176+
Assert.Equal(7, fields.Count);
177+
Assert.IsType<Int32Type>(fields[0].DataType);
178+
Assert.IsType<Int64Type>(fields[1].DataType);
179+
Assert.IsType<StringType>(fields[2].DataType);
180+
Assert.IsType<BooleanType>(fields[3].DataType);
181+
Assert.IsType<DoubleType>(fields[4].DataType);
182+
Assert.IsType<Date32Type>(fields[5].DataType);
183+
Assert.IsType<TimestampType>(fields[6].DataType);
184+
}
185+
186+
[Fact]
187+
public async Task ExecuteQuery_NullManifest_ReturnsEmptySchema()
188+
{
189+
// Arrange: server returns null manifest (no results at all, e.g. DDL)
190+
var mockClient = new Mock<IStatementExecutionClient>();
191+
mockClient
192+
.Setup(c => c.ExecuteStatementAsync(
193+
It.IsAny<ExecuteStatementRequest>(),
194+
It.IsAny<CancellationToken>()))
195+
.ReturnsAsync(new ExecuteStatementResponse
196+
{
197+
StatementId = StatementId,
198+
Status = new StatementStatus { State = "SUCCEEDED" },
199+
Manifest = null,
200+
Result = null,
201+
});
202+
203+
using var stmt = CreateStatement(mockClient.Object);
204+
stmt.SqlQuery = "CREATE TABLE foo (id INT)";
205+
206+
var queryResult = await stmt.ExecuteQueryAsync(CancellationToken.None);
207+
208+
// No schema and no rows — just no exception
209+
Assert.NotNull(queryResult.Stream);
210+
Assert.Empty(queryResult.Stream!.Schema.FieldsList);
211+
}
212+
}
213+
}

0 commit comments

Comments
 (0)