Skip to content

Commit c47f605

Browse files
authored
GH-46481: [C++][Python] Allow nullable schema in FlightInfo (#46489)
### Rationale for this change The underlying Protobuf field is always nullable and other implementations allow this. ### What changes are included in this PR? Allow empty/nonpresent schema in FlightInfo ### Are these changes tested? Yes ### Are there any user-facing changes? Yes, the getter may now be None. (Previously, this would instead crash, so I believe this is not a breaking change.) Closes #37677. Closes #46481. * GitHub Issue: #46481 Authored-by: David Li <[email protected]> Signed-off-by: David Li <[email protected]>
1 parent f45594d commit c47f605

File tree

8 files changed

+78
-7
lines changed

8 files changed

+78
-7
lines changed

cpp/src/arrow/flight/flight_internals_test.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ TEST(FlightTypes, FlightInfo) {
238238
MakeFlightInfo(schema1, desc1, {endpoint1}, -1, 42, true, ""),
239239
MakeFlightInfo(schema1, desc2, {endpoint1, endpoint2}, 64, -1, false,
240240
"\xDE\xAD\xC0\xDE"),
241+
MakeFlightInfo(desc1, {}, -1, -1, false, ""),
241242
};
242243
std::vector<std::string> reprs = {
243244
"<FlightInfo schema=(serialized) descriptor=<FlightDescriptor cmd='foo'> "
@@ -257,6 +258,8 @@ TEST(FlightTypes, FlightInfo) {
257258
"locations=[grpc+tcp://localhost:1234] expiration_time=null "
258259
"app_metadata='CAFED00D'>] "
259260
"total_records=64 total_bytes=-1 ordered=false app_metadata='DEADC0DE'>",
261+
"<FlightInfo schema=(empty) descriptor=<FlightDescriptor cmd='foo'> "
262+
"endpoints=[] total_records=-1 total_bytes=-1 ordered=false app_metadata=''>",
260263
};
261264

262265
ASSERT_NO_FATAL_FAILURE(TestRoundtrip<pb::FlightInfo>(values, reprs));

cpp/src/arrow/flight/test_util.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,16 @@ FlightInfo MakeFlightInfo(const Schema& schema, const FlightDescriptor& descript
7777
return info;
7878
}
7979

80+
FlightInfo MakeFlightInfo(const FlightDescriptor& descriptor,
81+
const std::vector<FlightEndpoint>& endpoints,
82+
int64_t total_records, int64_t total_bytes, bool ordered,
83+
std::string app_metadata) {
84+
EXPECT_OK_AND_ASSIGN(auto info,
85+
FlightInfo::Make(nullptr, descriptor, endpoints, total_records,
86+
total_bytes, ordered, std::move(app_metadata)));
87+
return info;
88+
}
89+
8090
NumberingStream::NumberingStream(std::unique_ptr<FlightDataStream> stream)
8191
: counter_(0), stream_(std::move(stream)) {}
8292

cpp/src/arrow/flight/test_util.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,12 @@ FlightInfo MakeFlightInfo(const Schema& schema, const FlightDescriptor& descript
182182
int64_t total_records, int64_t total_bytes, bool ordered,
183183
std::string app_metadata);
184184

185+
ARROW_FLIGHT_EXPORT
186+
FlightInfo MakeFlightInfo(const FlightDescriptor& descriptor,
187+
const std::vector<FlightEndpoint>& endpoints,
188+
int64_t total_records, int64_t total_bytes, bool ordered,
189+
std::string app_metadata);
190+
185191
ARROW_FLIGHT_EXPORT
186192
Status ExampleTlsCertificates(std::vector<CertKeyPair>* out);
187193

cpp/src/arrow/flight/types.cc

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,10 +280,31 @@ arrow::Result<FlightInfo> FlightInfo::Make(const Schema& schema,
280280
return FlightInfo(std::move(data));
281281
}
282282

283+
arrow::Result<FlightInfo> FlightInfo::Make(const std::shared_ptr<Schema>& schema,
284+
const FlightDescriptor& descriptor,
285+
const std::vector<FlightEndpoint>& endpoints,
286+
int64_t total_records, int64_t total_bytes,
287+
bool ordered, std::string app_metadata) {
288+
FlightInfo::Data data;
289+
data.descriptor = descriptor;
290+
data.endpoints = endpoints;
291+
data.total_records = total_records;
292+
data.total_bytes = total_bytes;
293+
data.ordered = ordered;
294+
data.app_metadata = std::move(app_metadata);
295+
if (schema) {
296+
RETURN_NOT_OK(internal::SchemaToString(*schema, &data.schema));
297+
}
298+
return FlightInfo(std::move(data));
299+
}
300+
283301
arrow::Result<std::shared_ptr<Schema>> FlightInfo::GetSchema(
284302
ipc::DictionaryMemo* dictionary_memo) const {
285303
if (reconstructed_schema_) {
286304
return schema_;
305+
} else if (data_.schema.empty()) {
306+
reconstructed_schema_ = true;
307+
return schema_;
287308
}
288309
// Create a non-owned Buffer to avoid copying
289310
io::BufferReader schema_reader(std::make_shared<Buffer>(data_.schema));
@@ -305,7 +326,9 @@ arrow::Status FlightInfo::Deserialize(std::string_view serialized,
305326
std::string FlightInfo::ToString() const {
306327
std::stringstream ss;
307328
ss << "<FlightInfo schema=";
308-
if (schema_) {
329+
if (data_.schema.empty()) {
330+
ss << "(empty)";
331+
} else if (schema_) {
309332
ss << schema_->ToString();
310333
} else {
311334
ss << "(serialized)";

cpp/src/arrow/flight/types.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -638,12 +638,21 @@ class ARROW_FLIGHT_EXPORT FlightInfo
638638
bool ordered = false,
639639
std::string app_metadata = "");
640640

641+
/// \brief Factory method to construct a FlightInfo.
642+
static arrow::Result<FlightInfo> Make(const std::shared_ptr<Schema>& schema,
643+
const FlightDescriptor& descriptor,
644+
const std::vector<FlightEndpoint>& endpoints,
645+
int64_t total_records, int64_t total_bytes,
646+
bool ordered = false,
647+
std::string app_metadata = "");
648+
641649
/// \brief Deserialize the Arrow schema of the dataset. Populate any
642650
/// dictionary encoded fields into a DictionaryMemo for
643651
/// bookkeeping
644652
/// \param[in,out] dictionary_memo for dictionary bookkeeping, will
645653
/// be modified
646-
/// \return Arrow result with the reconstructed Schema
654+
/// \return Arrow result with the reconstructed Schema. Note that the schema
655+
/// may be nullptr, as the schema is optional.
647656
arrow::Result<std::shared_ptr<Schema>> GetSchema(
648657
ipc::DictionaryMemo* dictionary_memo) const;
649658

python/pyarrow/_flight.pyx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -890,7 +890,7 @@ cdef class FlightInfo(_Weakrefable):
890890
891891
Parameters
892892
----------
893-
schema : Schema
893+
schema : Schema, optional
894894
the schema of the data in this flight.
895895
descriptor : FlightDescriptor
896896
the descriptor for this flight.
@@ -961,6 +961,8 @@ cdef class FlightInfo(_Weakrefable):
961961
CDictionaryMemo dummy_memo
962962

963963
check_flight_status(self.info.get().GetSchema(&dummy_memo).Value(&schema))
964+
if schema.get() == NULL:
965+
return None
964966
return pyarrow_wrap_schema(schema)
965967

966968
@property

python/pyarrow/src/arrow/python/flight.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ Status CreateFlightInfo(const std::shared_ptr<arrow::Schema>& schema,
373373
const std::string& app_metadata,
374374
std::unique_ptr<arrow::flight::FlightInfo>* out) {
375375
ARROW_ASSIGN_OR_RAISE(auto result, arrow::flight::FlightInfo::Make(
376-
*schema, descriptor, endpoints, total_records,
376+
schema, descriptor, endpoints, total_records,
377377
total_bytes, ordered, app_metadata));
378378
*out = std::unique_ptr<arrow::flight::FlightInfo>(
379379
new arrow::flight::FlightInfo(std::move(result)));

python/pyarrow/tests/test_flight.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -620,9 +620,10 @@ def __init__(self, factory):
620620

621621
def received_headers(self, headers):
622622
auth_header = case_insensitive_header_lookup(headers, 'Authorization')
623-
self.factory.set_call_credential([
624-
b'authorization',
625-
auth_header[0].encode("utf-8")])
623+
if auth_header:
624+
self.factory.set_call_credential([
625+
b'authorization',
626+
auth_header[0].encode("utf-8")])
626627

627628

628629
class HeaderAuthServerMiddlewareFactory(ServerMiddlewareFactory):
@@ -916,6 +917,23 @@ def test_repr():
916917
assert repr(flight.SchemaResult(pa.schema([("int", "int64")]))) == \
917918
"<pyarrow.flight.SchemaResult schema=(int: int64)>"
918919
assert repr(flight.Ticket(b"foo")) == ticket_repr
920+
assert info.schema == pa.schema([])
921+
922+
info = flight.FlightInfo(
923+
None, flight.FlightDescriptor.for_path(), [],
924+
1, 42, True, b"test app metadata"
925+
)
926+
info_repr = (
927+
"<pyarrow.flight.FlightInfo "
928+
"schema=None "
929+
"descriptor=<pyarrow.flight.FlightDescriptor path=[]> "
930+
"endpoints=[] "
931+
"total_records=1 "
932+
"total_bytes=42 "
933+
"ordered=True "
934+
"app_metadata=b'test app metadata'>")
935+
assert repr(info) == info_repr
936+
assert info.schema is None
919937

920938
with pytest.raises(TypeError):
921939
flight.Action("foo", None)

0 commit comments

Comments
 (0)