Skip to content

Commit 4248d7c

Browse files
authored
feat: Spec multi-result-set API (apache#3871)
Extracted from apache#3607 with influence by the comments there and apache/arrow-adbc@main...CurtHagenlocher:arrow-adbc:MoreResults, this contains a proposal for handling multi-result set query execution via ADBC by adding a new function for drivers, `AdbcStatementNextResultSet`. This also includes the necessary changes for an ADBC API Revision 1.2.0 (macro defines and so on). The comment above the function includes all the semantic definitions of the behavior.
1 parent 31b4c44 commit 4248d7c

File tree

4 files changed

+453
-30
lines changed

4 files changed

+453
-30
lines changed

c/driver_manager/adbc_version_100_compatibility_test.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,11 @@ class AdbcVersion : public ::testing::Test {
5757
TEST_F(AdbcVersion, StructSize) {
5858
ASSERT_EQ(sizeof(AdbcErrorVersion100), ADBC_ERROR_1_0_0_SIZE);
5959
ASSERT_EQ(sizeof(AdbcError), ADBC_ERROR_1_1_0_SIZE);
60+
ASSERT_EQ(sizeof(AdbcError), ADBC_ERROR_1_2_0_SIZE);
6061

6162
ASSERT_EQ(sizeof(AdbcDriverVersion100), ADBC_DRIVER_1_0_0_SIZE);
62-
ASSERT_EQ(sizeof(AdbcDriver), ADBC_DRIVER_1_1_0_SIZE);
63+
ASSERT_EQ(offsetof(struct AdbcDriver, StatementExecuteMulti), ADBC_DRIVER_1_1_0_SIZE);
64+
ASSERT_EQ(sizeof(AdbcDriver), ADBC_DRIVER_1_2_0_SIZE);
6365
}
6466

6567
// Initialize a version 1.0.0 driver with the version 1.1.0 driver struct.

c/include/arrow-adbc/adbc.h

Lines changed: 224 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,15 @@ struct ADBC_EXPORT AdbcError {
355355
/// \since ADBC API revision 1.1.0
356356
#define ADBC_ERROR_1_1_0_SIZE (sizeof(struct AdbcError))
357357

358+
/// \brief The size of the AdbcError structure in ADBC 1.2.0.
359+
///
360+
/// Drivers written for ADBC 1.2.0 and later should never touch more than this
361+
/// portion of an AdbcDriver struct when vendor_code is
362+
/// ADBC_ERROR_VENDOR_CODE_PRIVATE_DATA.
363+
///
364+
/// \since ADBC API revision 1.2.0
365+
#define ADBC_ERROR_1_2_0_SIZE (sizeof(struct AdbcError))
366+
358367
/// \brief Extra key-value metadata for an error.
359368
///
360369
/// The fields here are owned by the driver and should not be freed. The
@@ -423,6 +432,14 @@ const struct AdbcError* AdbcErrorFromArrayStream(struct ArrowArrayStream* stream
423432
/// \since ADBC API revision 1.1.0
424433
#define ADBC_VERSION_1_1_0 1001000
425434

435+
/// \brief ADBC revision 1.2.0
436+
///
437+
/// When passed to an AdbcDriverInitFunc(), the driver parameter must
438+
/// point to an AdbcDriver.
439+
///
440+
/// \since ADBC API revision 1.2.0
441+
#define ADBC_VERSION_1_2_0 1002000
442+
426443
/// \brief Canonical option value for enabling an option.
427444
///
428445
/// For use as the value in SetOption calls.
@@ -525,6 +542,7 @@ const struct AdbcError* AdbcErrorFromArrayStream(struct ArrowArrayStream* stream
525542
/// \see AdbcConnectionGetInfo
526543
/// \see ADBC_VERSION_1_0_0
527544
/// \see ADBC_VERSION_1_1_0
545+
/// \see ADBC_VERSION_1_2_0
528546
#define ADBC_INFO_DRIVER_ADBC_VERSION 103
529547

530548
/// \brief Return metadata on catalogs, schemas, tables, and columns.
@@ -973,6 +991,107 @@ struct AdbcPartitions {
973991

974992
/// @}
975993

994+
/// \defgroup adbc-statement-multi Multiple Result Set Execution
995+
/// Some databases support executing a statement that returns multiple
996+
/// result sets. This section defines the API for working with such
997+
/// statements and result sets.
998+
/// @{
999+
1000+
/// \brief A struct for handling a potentially multi-result set execution
1001+
///
1002+
/// This struct is populated by AdbcStatementExecuteMulti and can be used to iterate
1003+
/// through the result sets of the execution. The caller can use the MultiResultSetNext
1004+
/// or MultiResultSetNextPartitions functions on the AdbcMultiResultSet struct to iterate
1005+
/// through the result sets. The caller is responsible for calling the release function
1006+
/// when finished with the result set.
1007+
///
1008+
/// \since ADBC API revision 1.2.0
1009+
struct ADBC_EXPORT AdbcMultiResultSet {
1010+
/// \brief opaque implementation-defined state
1011+
void* private_data;
1012+
1013+
/// \brief The associated driver
1014+
struct AdbcDriver* private_driver;
1015+
};
1016+
1017+
/// \brief Release the AdbcMultiResultSet and any associated resources.
1018+
///
1019+
/// \since ADBC API revision 1.2.0
1020+
///
1021+
/// If all the result sets have not been completely consumed, then the driver
1022+
/// should cancel any remaining work if this is called.
1023+
///
1024+
/// \param[in] result_set The result set to release.
1025+
/// \param[out] error An optional location to return an error message if necessary.
1026+
///
1027+
/// \return ADBC_STATUS_OK on success or an appropriate error code.
1028+
AdbcStatusCode AdbcMultiResultSetRelease(struct AdbcMultiResultSet* result_set,
1029+
struct AdbcError* error);
1030+
1031+
/// \brief Get the next ArrowArrayStream from an AdbcMultiResultSet.
1032+
///
1033+
/// \since ADBC API revision 1.2.0
1034+
///
1035+
/// The driver can decide whether to allow fetching the next result set
1036+
/// as a single stream or as a set of partitions. If the driver does not
1037+
/// support fetching the next result set as a stream (indicating it should
1038+
/// be fetched as partitions), it should return ADBC_STATUS_NOT_IMPLEMENTED.
1039+
///
1040+
/// To indicate that no additional result sets are available, this should return
1041+
/// ADBC_STATUS_OK and set the release callback on out to NULL. The expected
1042+
/// pattern is that after calling `StatementExecuteMulti`, the caller would
1043+
/// then call `MultiResultSetNext` repeatedly until it returns ADBC_STATUS_OK and
1044+
/// sets the release callback to NULL, indicating that there are no more result sets.
1045+
/// It is not an error to repeatedly call `MultiResultSetNext` after the last result set
1046+
/// has been reached; it should simply continue to return ADBC_STATUS_OK with a
1047+
/// NULL release callback.
1048+
///
1049+
/// \param[in] result_set The result set struct to fetch the next result from.
1050+
/// \param[out] out The result stream to populate
1051+
/// \param[out] rows_affected The number of rows affected if known, else -
1052+
/// \param[out] error An optional location to return an error message if necessary.
1053+
///
1054+
/// \return ADBC_STATUS_NOT_IMPLEMENTED if the driver only supports fetching results
1055+
/// as partitions or ADBC_STATUS_OK (or an appropriate error code) otherwise.
1056+
AdbcStatusCode AdbcMultiResultSetNext(struct AdbcMultiResultSet* result_set,
1057+
struct ArrowArrayStream* out,
1058+
int64_t* rows_affected, struct AdbcError* error);
1059+
1060+
/// \brief Get the next result set from a multi-result-set execution as partitions.
1061+
///
1062+
/// \since ADBC API revision 1.2.0
1063+
///
1064+
/// The driver can decide whether to allow fetching the next result set
1065+
/// as a single stream or as a set of partitions. If the driver does not
1066+
/// support fetching the next result set as partitions (indicating it should
1067+
/// be fetched as a stream), it should return ADBC_STATUS_NOT_IMPLEMENTED.
1068+
///
1069+
/// To indicate that no additional result sets are available, this should return
1070+
/// ADBC_STATUS_OK and set the release callback on partitions to NULL. The expected
1071+
/// pattern is that after calling `StatementExecuteMulti`, the caller would
1072+
/// then call `MultiResultSetNextPartitions` repeatedly until it returns ADBC_STATUS_OK
1073+
/// and sets the release callback to NULL, indicating that there are no more result sets.
1074+
/// It is not an error to repeatedly call `MultiResultSetNextPartitions` after the last
1075+
/// result set has been reached; it should simply continue to return ADBC_STATUS_OK with
1076+
/// a NULL release callback.
1077+
///
1078+
/// \param[in] result_set The result set struct to fetch the next result from.
1079+
/// \param[out] schema The schema of the result set to populate
1080+
/// \param[out] partitions The partitions to populate
1081+
/// \param[out] rows_affected The number of rows affected if known, else -1. Pass NULL
1082+
/// if the client does not want this information.
1083+
/// \param[out] error An optional location to return an error message if necessary.
1084+
///
1085+
/// \return ADBC_STATUS_NOT_IMPLEMENTED if the driver only supports fetching results
1086+
/// as a stream, ADBC_STATUS_INVALID_STATE if called at an inappropriate time, and
1087+
/// ADBC_STATUS_OK (or an appropriate error code) otherwise.
1088+
AdbcStatusCode AdbcMultiResultSetNextPartitions(struct AdbcMultiResultSet* result_set,
1089+
struct ArrowSchema* schema,
1090+
struct AdbcPartitions* partitions,
1091+
int64_t* rows_affected,
1092+
struct AdbcError* error);
1093+
/// @}
1094+
9761095
/// \defgroup adbc-driver Driver Initialization
9771096
///
9781097
/// These functions are intended to help support integration between a
@@ -1059,19 +1178,6 @@ struct ADBC_EXPORT AdbcDriver {
10591178
/// the AdbcDriverInitFunc is greater than or equal to
10601179
/// ADBC_VERSION_1_1_0.
10611180
///
1062-
/// For a 1.0.0 driver being loaded by a 1.1.0 driver manager: the
1063-
/// 1.1.0 manager will allocate the new, expanded AdbcDriver struct
1064-
/// and attempt to have the driver initialize it with
1065-
/// ADBC_VERSION_1_1_0. This must return an error, after which the
1066-
/// driver will try again with ADBC_VERSION_1_0_0. The driver must
1067-
/// not access the new fields, which will carry undefined values.
1068-
///
1069-
/// For a 1.1.0 driver being loaded by a 1.0.0 driver manager: the
1070-
/// 1.0.0 manager will allocate the old AdbcDriver struct and
1071-
/// attempt to have the driver initialize it with
1072-
/// ADBC_VERSION_1_0_0. The driver must not access the new fields,
1073-
/// and should initialize the old fields.
1074-
///
10751181
/// @{
10761182

10771183
int (*ErrorGetDetailCount)(const struct AdbcError* error);
@@ -1135,6 +1241,36 @@ struct ADBC_EXPORT AdbcDriver {
11351241
struct AdbcError*);
11361242

11371243
/// @}
1244+
1245+
/// \defgroup adbc-1.2.0 ADBC API Revision 1.2.0
1246+
///
1247+
/// Functions added in ADBC 1.2.0. For backwards compatibility,
1248+
/// these members must not be accessed unless the version passed to
1249+
/// the AdbcDriverInitFunc is greater than or equal to
1250+
/// ADBC_VERSION_1_2_0.
1251+
///
1252+
/// When the driver manager attempts to initialize a driver at a particular
1253+
/// version, such as the case where the driver manager and driver are using different
1254+
/// versions of the ADBC spec, the driver should not try to access any functions defined
1255+
/// in the spec after that version.
1256+
///
1257+
/// @{
1258+
1259+
AdbcStatusCode (*MultiResultSetNext)(struct AdbcMultiResultSet*,
1260+
struct ArrowArrayStream*, int64_t*,
1261+
struct AdbcError*);
1262+
AdbcStatusCode (*MultiResultSetNextPartitions)(struct AdbcMultiResultSet*,
1263+
struct ArrowSchema*,
1264+
struct AdbcPartitions*, int64_t*,
1265+
struct AdbcError*);
1266+
AdbcStatusCode (*MultiResultSetRelease)(struct AdbcMultiResultSet*, struct AdbcError*);
1267+
AdbcStatusCode (*StatementExecuteSchemaMulti)(struct AdbcStatement*,
1268+
struct AdbcMultiResultSet*,
1269+
struct AdbcError*);
1270+
AdbcStatusCode (*StatementExecuteMulti)(struct AdbcStatement*,
1271+
struct AdbcMultiResultSet*, struct AdbcError*);
1272+
1273+
/// @}
11381274
};
11391275

11401276
/// \brief The size of the AdbcDriver structure in ADBC 1.0.0.
@@ -1151,7 +1287,15 @@ struct ADBC_EXPORT AdbcDriver {
11511287
/// ADBC_VERSION_1_1_0.
11521288
///
11531289
/// \since ADBC API revision 1.1.0
1154-
#define ADBC_DRIVER_1_1_0_SIZE (sizeof(struct AdbcDriver))
1290+
#define ADBC_DRIVER_1_1_0_SIZE (offsetof(struct AdbcDriver, StatementExecuteMulti))
1291+
1292+
/// \brief The size of the AdbcDriver structure in ADBC 1.2.0.
1293+
/// Drivers written for ADBC 1.2.0 and later should never touch more
1294+
/// than this portion of an AdbcDriver struct when given
1295+
/// ADBC_VERSION_1_2_0.
1296+
///
1297+
/// \since ADBC API revision 1.2.0
1298+
#define ADBC_DRIVER_1_2_0_SIZE (sizeof(struct AdbcDriver))
11551299

11561300
/// @}
11571301

@@ -2018,6 +2162,72 @@ AdbcStatusCode AdbcStatementExecuteQuery(struct AdbcStatement* statement,
20182162
struct ArrowArrayStream* out,
20192163
int64_t* rows_affected, struct AdbcError* error);
20202164

2165+
/// \defgroup adbc-statement-multi Multiple Result Set Execution
2166+
/// Some databases support executing a statement that returns multiple
2167+
/// result sets. This section defines the API for working with such
2168+
/// statements and result sets.
2169+
/// @{
2170+
2171+
/// \brief Retrieve schema for statement that potentially returns multiple result sets
2172+
///
2173+
/// \since ADBC API revision 1.2.0
2174+
///
2175+
/// This can be used to retrieve the schemas of all result sets without
2176+
/// executing the statement. If the driver does not support this, it should return
2177+
/// ADBC_STATUS_NOT_IMPLEMENTED.
2178+
///
2179+
/// The ArrowArrayStream objects populated by calls to `MultiResultSetNext` with the
2180+
/// results struct returned by this function should have a valid schema but no data (i.e.
2181+
/// `get_next` should return EOS immediately). This allows clients to inspect the schemas
2182+
/// of all result sets before consuming any data, which can be useful for certain
2183+
/// applications such as query planning or UI display of results.
2184+
///
2185+
/// \param[in] statement The statement to execute.
2186+
/// \param[out] results The result set struct to populate with the schemas of the result
2187+
/// sets.
2188+
/// \param[out] error An optional location to return an error message if necessary.
2189+
///
2190+
/// \return ADBC_STATUS_NOT_IMPLEMENTED if the driver does not support this,
2191+
/// and ADBC_STATUS_OK (or an appropriate error code) otherwise.
2192+
ADBC_EXPORT
2193+
AdbcStatusCode AdbcStatementExecuteSchemaMulti(struct AdbcStatement* statement,
2194+
struct AdbcMultiResultSet* results,
2195+
struct AdbcError* error);
2196+
2197+
/// \brief Execute a statement that potentially returns multiple result sets
2198+
///
2199+
/// \since ADBC API revision 1.2.0
2200+
///
2201+
/// To execute a statement which might potentially return multiple result sets,
2202+
/// this can be called in place of AdbcStatementExecuteQuery if the driver supports it.
2203+
/// If supported, the driver will populate the AdbcMultiResultSet structure with all
2204+
/// necessary information to iterate through the result sets. The caller can then
2205+
/// use the MultiResultSetNext or MultiResultSetNextPartitions functions on the
2206+
/// AdbcMultiResultSet struct to iterate through the result sets.
2207+
///
2208+
/// A driver MAY support executing this function while the previous result set is
2209+
/// still being consumed (i.e. before the previous ArrowArrayStream is released), but
2210+
/// this is not required. If the driver does not support this, it should return
2211+
/// ADBC_STATUS_INVALID_STATE if the previous result set is still active.
2212+
///
2213+
/// A driver implementing this function must also implement the AdbcMultiResultSet struct
2214+
/// and its associated functions.
2215+
///
2216+
/// \param[in] statement The statement to execute.
2217+
/// \param[out] results The result set struct to populate with the results of the
2218+
/// execution.
2219+
/// \param[out] error An optional location to return an error message if necessary.
2220+
///
2221+
/// \return ADBC_STATUS_NOT_IMPLEMENTED if the driver does not support multi-result set
2222+
/// execution,
2223+
/// and ADBC_STATUS_OK (or an appropriate error code) otherwise.
2224+
ADBC_EXPORT
2225+
AdbcStatusCode AdbcStatementExecuteMulti(struct AdbcStatement* statement,
2226+
struct AdbcMultiResultSet* results,
2227+
struct AdbcError* error);
2228+
2229+
/// @}
2230+
20212231
/// \brief Get the schema of the result set of a query without
20222232
/// executing it.
20232233
///

0 commit comments

Comments
 (0)