Skip to content

Commit d6a66b5

Browse files
author
David Coe
committed
merge
2 parents a96ff8a + aa1053a commit d6a66b5

File tree

71 files changed

+2869
-472
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+2869
-472
lines changed

.asf.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,22 @@ github:
2121
collaborators:
2222
- krlmlr
2323
- nbenn
24-
- ywc88
2524
enabled_merge_buttons:
2625
merge: false
2726
rebase: false
2827
squash: true
2928
features:
29+
discussions: true
3030
issues: true
31+
labels:
32+
- arrow
33+
- database
34+
protected_branches:
35+
main: {}
3136

3237
notifications:
3338
commits: commits@arrow.apache.org
39+
discussions: user@arrow.apache.org
3440
issues_status: issues@arrow.apache.org
3541
issues: github@arrow.apache.org
3642
pullrequests: github@arrow.apache.org

.github/workflows/packaging.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ jobs:
342342
mv apache-arrow-adbc-$VERSION.tar.gz adbc/ci/linux-packages/
343343
344344
- name: Set up Ruby
345-
uses: ruby/setup-ruby@922ebc4c5262cd14e07bb0e1db020984b6c064fe # v1.226.0
345+
uses: ruby/setup-ruby@354a1ad156761f5ee2b7b13fa8e09943a5e8d252 # v1.229.0
346346
with:
347347
ruby-version: ruby
348348

.github/workflows/r-check.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
- uses: actions/setup-go@v5
4848
with:
4949
go-version: "${{ env.GO_VERSION }}"
50-
- uses: r-lib/actions/setup-r@14a7e741c1cb130261263aa1593718ba42cf443b # v2
50+
- uses: r-lib/actions/setup-r@bd49c52ffe281809afa6f0fecbf37483c5dd0b93 # v2
5151
with:
5252
r-version: release
5353
use-public-rspm: true
@@ -72,7 +72,7 @@ jobs:
7272
popd
7373
shell: bash
7474

75-
- uses: r-lib/actions/setup-r-dependencies@f4937e0dc26f9b99c969cd3e4ca943b576e7f991
75+
- uses: r-lib/actions/setup-r-dependencies@bd49c52ffe281809afa6f0fecbf37483c5dd0b93
7676
with:
7777
extra-packages: any::rcmdcheck
7878
needs: check
@@ -94,7 +94,7 @@ jobs:
9494
ADBC_FLIGHTSQL_TEST_URI="grpc://localhost:8080"
9595
echo "ADBC_FLIGHTSQL_TEST_URI=${ADBC_FLIGHTSQL_TEST_URI}" >> $GITHUB_ENV
9696
97-
- uses: r-lib/actions/check-r-package@14a7e741c1cb130261263aa1593718ba42cf443b # v2
97+
- uses: r-lib/actions/check-r-package@bd49c52ffe281809afa6f0fecbf37483c5dd0b93 # v2
9898
env:
9999
ADBC_SNOWFLAKE_TEST_URI: ${{ secrets.SNOWFLAKE_URI }}
100100
R_KEEP_PKG_SOURCE: yes

.github/workflows/r-extended.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ jobs:
108108

109109
steps:
110110
- uses: actions/checkout@v4
111-
- uses: r-lib/actions/setup-r@14a7e741c1cb130261263aa1593718ba42cf443b # v2
111+
- uses: r-lib/actions/setup-r@bd49c52ffe281809afa6f0fecbf37483c5dd0b93 # v2
112112
with:
113113
rversion: ${{ matrix.rversion }}
114114
use-public-rspm: true
@@ -131,7 +131,7 @@ jobs:
131131
popd
132132
shell: bash
133133

134-
- uses: r-lib/actions/setup-r-dependencies@f4937e0dc26f9b99c969cd3e4ca943b576e7f991
134+
- uses: r-lib/actions/setup-r-dependencies@bd49c52ffe281809afa6f0fecbf37483c5dd0b93
135135
with:
136136
working-directory: r/${{ matrix.pkg }}
137137

c/driver/postgresql/statement.cc

Lines changed: 64 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <limits>
3131
#include <memory>
3232
#include <string>
33+
#include <string_view>
3334
#include <utility>
3435
#include <vector>
3536

@@ -219,55 +220,89 @@ void TupleReader::Release() {
219220
row_id_ = -1;
220221
}
221222

223+
// Instead of directly exporting the TupleReader, which is tied to the
224+
// lifetime of the Statement, we export a weak_ptr reference instead. That
225+
// way if the user accidentally closes the Statement before the
226+
// ArrowArrayStream, we can avoid a crash.
227+
// See https://github.com/apache/arrow-adbc/issues/2629
228+
struct ExportedTupleReader {
229+
std::weak_ptr<TupleReader> self;
230+
};
231+
222232
void TupleReader::ExportTo(struct ArrowArrayStream* stream) {
223233
stream->get_schema = &GetSchemaTrampoline;
224234
stream->get_next = &GetNextTrampoline;
225235
stream->get_last_error = &GetLastErrorTrampoline;
226236
stream->release = &ReleaseTrampoline;
227-
stream->private_data = this;
237+
stream->private_data = new ExportedTupleReader{weak_from_this()};
228238
}
229239

230-
const struct AdbcError* TupleReader::ErrorFromArrayStream(struct ArrowArrayStream* stream,
240+
const struct AdbcError* TupleReader::ErrorFromArrayStream(struct ArrowArrayStream* self,
231241
AdbcStatusCode* status) {
232-
if (!stream->private_data || stream->release != &ReleaseTrampoline) {
242+
if (!self->private_data || self->release != &ReleaseTrampoline) {
233243
return nullptr;
234244
}
235245

236-
TupleReader* reader = static_cast<TupleReader*>(stream->private_data);
237-
if (status) {
238-
*status = reader->status_;
246+
auto* wrapper = static_cast<ExportedTupleReader*>(self->private_data);
247+
auto maybe_reader = wrapper->self.lock();
248+
if (maybe_reader) {
249+
if (status) {
250+
*status = maybe_reader->status_;
251+
}
252+
return &maybe_reader->error_;
239253
}
240-
return &reader->error_;
254+
return nullptr;
241255
}
242256

243257
int TupleReader::GetSchemaTrampoline(struct ArrowArrayStream* self,
244258
struct ArrowSchema* out) {
245259
if (!self || !self->private_data) return EINVAL;
246260

247-
TupleReader* reader = static_cast<TupleReader*>(self->private_data);
248-
return reader->GetSchema(out);
261+
auto* wrapper = static_cast<ExportedTupleReader*>(self->private_data);
262+
auto maybe_reader = wrapper->self.lock();
263+
if (maybe_reader) {
264+
return maybe_reader->GetSchema(out);
265+
}
266+
// statement was closed or reader was otherwise invalidated
267+
return EINVAL;
249268
}
250269

251270
int TupleReader::GetNextTrampoline(struct ArrowArrayStream* self,
252271
struct ArrowArray* out) {
253272
if (!self || !self->private_data) return EINVAL;
254273

255-
TupleReader* reader = static_cast<TupleReader*>(self->private_data);
256-
return reader->GetNext(out);
274+
auto* wrapper = static_cast<ExportedTupleReader*>(self->private_data);
275+
auto maybe_reader = wrapper->self.lock();
276+
if (maybe_reader) {
277+
return maybe_reader->GetNext(out);
278+
}
279+
// statement was closed or reader was otherwise invalidated
280+
return EINVAL;
257281
}
258282

259283
const char* TupleReader::GetLastErrorTrampoline(struct ArrowArrayStream* self) {
260284
if (!self || !self->private_data) return nullptr;
285+
constexpr std::string_view kReaderInvalidated =
286+
"[libpq] Reader invalidated (statement or reader was closed)";
261287

262-
TupleReader* reader = static_cast<TupleReader*>(self->private_data);
263-
return reader->last_error();
288+
auto* wrapper = static_cast<ExportedTupleReader*>(self->private_data);
289+
auto maybe_reader = wrapper->self.lock();
290+
if (maybe_reader) {
291+
return maybe_reader->last_error();
292+
}
293+
// statement was closed or reader was otherwise invalidated
294+
return kReaderInvalidated.data();
264295
}
265296

266297
void TupleReader::ReleaseTrampoline(struct ArrowArrayStream* self) {
267298
if (!self || !self->private_data) return;
268299

269-
TupleReader* reader = static_cast<TupleReader*>(self->private_data);
270-
reader->Release();
300+
auto* wrapper = static_cast<ExportedTupleReader*>(self->private_data);
301+
auto maybe_reader = wrapper->self.lock();
302+
if (maybe_reader) {
303+
maybe_reader->Release();
304+
}
305+
delete wrapper;
271306
self->private_data = nullptr;
272307
self->release = nullptr;
273308
}
@@ -281,7 +316,7 @@ AdbcStatusCode PostgresStatement::New(struct AdbcConnection* connection,
281316
connection_ =
282317
*reinterpret_cast<std::shared_ptr<PostgresConnection>*>(connection->private_data);
283318
type_resolver_ = connection_->type_resolver();
284-
reader_.conn_ = connection_->conn();
319+
ClearResult();
285320
return ADBC_STATUS_OK;
286321
}
287322

@@ -514,24 +549,24 @@ AdbcStatusCode PostgresStatement::ExecuteQuery(struct ArrowArrayStream* stream,
514549
}
515550

516551
struct ArrowError na_error;
517-
reader_.copy_reader_ = std::make_unique<PostgresCopyStreamReader>();
518-
CHECK_NA(INTERNAL, reader_.copy_reader_->Init(root_type), error);
552+
reader_->copy_reader_ = std::make_unique<PostgresCopyStreamReader>();
553+
CHECK_NA(INTERNAL, reader_->copy_reader_->Init(root_type), error);
519554
CHECK_NA_DETAIL(INTERNAL,
520-
reader_.copy_reader_->InferOutputSchema(
555+
reader_->copy_reader_->InferOutputSchema(
521556
std::string(connection_->VendorName()), &na_error),
522557
&na_error, error);
523558

524-
CHECK_NA_DETAIL(INTERNAL, reader_.copy_reader_->InitFieldReaders(&na_error), &na_error,
559+
CHECK_NA_DETAIL(INTERNAL, reader_->copy_reader_->InitFieldReaders(&na_error), &na_error,
525560
error);
526561

527562
// Execute the COPY query
528563
RAISE_STATUS(error, helper.ExecuteCopy());
529564

530565
// We need the PQresult back for the reader
531-
reader_.result_ = helper.ReleaseResult();
566+
reader_->result_ = helper.ReleaseResult();
532567

533568
// Export to stream
534-
reader_.ExportTo(stream);
569+
reader_->ExportTo(stream);
535570
if (rows_affected) *rows_affected = -1;
536571
return ADBC_STATUS_OK;
537572
}
@@ -674,7 +709,7 @@ AdbcStatusCode PostgresStatement::GetOption(const char* key, char* value, size_t
674709
break;
675710
}
676711
} else if (std::strcmp(key, ADBC_POSTGRESQL_OPTION_BATCH_SIZE_HINT_BYTES) == 0) {
677-
result = std::to_string(reader_.batch_size_hint_bytes_);
712+
result = std::to_string(reader_->batch_size_hint_bytes_);
678713
} else if (std::strcmp(key, ADBC_POSTGRESQL_OPTION_USE_COPY) == 0) {
679714
if (UseCopy()) {
680715
result = "true";
@@ -710,7 +745,7 @@ AdbcStatusCode PostgresStatement::GetOptionInt(const char* key, int64_t* value,
710745
struct AdbcError* error) {
711746
std::string result;
712747
if (std::strcmp(key, ADBC_POSTGRESQL_OPTION_BATCH_SIZE_HINT_BYTES) == 0) {
713-
*value = reader_.batch_size_hint_bytes_;
748+
*value = reader_->batch_size_hint_bytes_;
714749
return ADBC_STATUS_OK;
715750
}
716751
SetError(error, "[libpq] Unknown statement option '%s'", key);
@@ -799,7 +834,7 @@ AdbcStatusCode PostgresStatement::SetOption(const char* key, const char* value,
799834
return ADBC_STATUS_INVALID_ARGUMENT;
800835
}
801836

802-
this->reader_.batch_size_hint_bytes_ = int_value;
837+
this->batch_size_hint_bytes_ = this->reader_->batch_size_hint_bytes_ = int_value;
803838
} else if (std::strcmp(key, ADBC_POSTGRESQL_OPTION_USE_COPY) == 0) {
804839
if (std::strcmp(value, ADBC_OPTION_VALUE_ENABLED) == 0) {
805840
use_copy_ = true;
@@ -836,7 +871,7 @@ AdbcStatusCode PostgresStatement::SetOptionInt(const char* key, int64_t value,
836871
return ADBC_STATUS_INVALID_ARGUMENT;
837872
}
838873

839-
this->reader_.batch_size_hint_bytes_ = value;
874+
this->batch_size_hint_bytes_ = this->reader_->batch_size_hint_bytes_ = value;
840875
return ADBC_STATUS_OK;
841876
}
842877
SetError(error, "[libpq] Unknown statement option '%s'", key);
@@ -845,7 +880,9 @@ AdbcStatusCode PostgresStatement::SetOptionInt(const char* key, int64_t value,
845880

846881
void PostgresStatement::ClearResult() {
847882
// TODO: we may want to synchronize here for safety
848-
reader_.Release();
883+
if (reader_) reader_->Release();
884+
reader_ = std::make_shared<TupleReader>(connection_->conn());
885+
reader_->batch_size_hint_bytes_ = batch_size_hint_bytes_;
849886
}
850887

851888
int PostgresStatement::UseCopy() {

c/driver/postgresql/statement.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,10 @@ namespace adbcpq {
3939
class PostgresConnection;
4040
class PostgresStatement;
4141

42+
constexpr static int64_t kDefaultBatchSizeHintBytes = 16777216;
43+
4244
/// \brief An ArrowArrayStream that reads tuples from a PGresult.
43-
class TupleReader final {
45+
class TupleReader final : public std::enable_shared_from_this<TupleReader> {
4446
public:
4547
TupleReader(PGconn* conn)
4648
: status_(ADBC_STATUS_OK),
@@ -50,7 +52,7 @@ class TupleReader final {
5052
pgbuf_(nullptr),
5153
copy_reader_(nullptr),
5254
row_id_(-1),
53-
batch_size_hint_bytes_(16777216),
55+
batch_size_hint_bytes_(kDefaultBatchSizeHintBytes),
5456
is_finished_(false) {
5557
ArrowErrorInit(&na_error_);
5658
data_.data.as_char = nullptr;
@@ -98,7 +100,8 @@ class PostgresStatement {
98100
query_(),
99101
prepared_(false),
100102
use_copy_(-1),
101-
reader_(nullptr) {
103+
reader_(nullptr),
104+
batch_size_hint_bytes_(kDefaultBatchSizeHintBytes) {
102105
std::memset(&bind_, 0, sizeof(bind_));
103106
}
104107

@@ -170,7 +173,8 @@ class PostgresStatement {
170173
bool temporary = false;
171174
} ingest_;
172175

173-
TupleReader reader_;
176+
std::shared_ptr<TupleReader> reader_;
177+
int64_t batch_size_hint_bytes_;
174178

175179
int UseCopy();
176180
};

c/integration/duckdb/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ include(FetchContent)
2020
if(ADBC_BUILD_TESTS)
2121
fetchcontent_declare(duckdb
2222
GIT_REPOSITORY https://github.com/duckdb/duckdb.git
23-
GIT_TAG 0d84ccf478578278b2d1168675b8b93c60f78a5e # v0.9.0
23+
GIT_TAG 8e52ec43959ab363643d63cb78ee214577111da4 # v1.2.1
2424
GIT_PROGRESS TRUE
2525
USES_TERMINAL_DOWNLOAD TRUE)
2626
set(BUILD_JEMALLOC_EXTENSION

c/integration/duckdb/duckdb_test.cc

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,21 @@
1616
// under the License.
1717

1818
#include <cstdlib>
19+
#include <stdexcept>
1920
#include <string>
21+
#include "gmock/gmock.h"
2022

2123
#include <arrow-adbc/adbc.h>
2224
#include <arrow-adbc/adbc_driver_manager.h>
25+
#define ADBC_EXPORTING // duckdb changed the include guard...
2326
#include <duckdb/common/adbc/adbc-init.hpp>
2427

2528
#include "validation/adbc_validation.h"
2629
#include "validation/adbc_validation_util.h"
2730

2831
// Convert between our definitions and DuckDB's
2932
AdbcStatusCode DuckDbDriverInitFunc(int version, void* driver, struct AdbcError* error) {
30-
return duckdb_adbc_init(static_cast<size_t>(version),
31-
reinterpret_cast<duckdb_adbc::AdbcDriver*>(driver),
32-
reinterpret_cast<duckdb_adbc::AdbcError*>(error));
33+
return duckdb_adbc_init(static_cast<size_t>(version), driver, error);
3334
}
3435

3536
class DuckDbQuirks : public adbc_validation::DriverQuirks {
@@ -109,10 +110,38 @@ class DuckDbStatementTest : public ::testing::Test,
109110
GTEST_SKIP() << "Cannot query rows affected in delete stream (not implemented)";
110111
}
111112

113+
void TestSqlQueryTrailingSemicolons() {
114+
ASSERT_THAT(AdbcStatementNew(&connection, &statement, &error),
115+
adbc_validation::IsOkStatus(&error));
116+
117+
ASSERT_THAT(AdbcStatementSetSqlQuery(&statement, "INSTALL icu", &error),
118+
adbc_validation::IsOkStatus(&error));
119+
120+
ASSERT_THAT(AdbcStatementExecuteQuery(&statement, nullptr, nullptr, &error),
121+
adbc_validation::IsOkStatus(&error));
122+
123+
ASSERT_THAT(AdbcStatementSetSqlQuery(&statement, "LOAD icu", &error),
124+
adbc_validation::IsOkStatus(&error));
125+
126+
ASSERT_THAT(AdbcStatementExecuteQuery(&statement, nullptr, nullptr, &error),
127+
adbc_validation::IsOkStatus(&error));
128+
129+
ASSERT_THAT(AdbcStatementRelease(&statement, &error),
130+
adbc_validation::IsOkStatus(&error));
131+
132+
adbc_validation::StatementTest::TestSqlQueryTrailingSemicolons();
133+
}
134+
112135
void TestErrorCompatibility() {
113136
GTEST_SKIP() << "DuckDB does not set AdbcError.release";
114137
}
115138

139+
void TestResultIndependence() {
140+
// DuckDB detects this by throwing
141+
ASSERT_THAT([this]() { adbc_validation::StatementTest::TestResultIndependence(); },
142+
::testing::Throws<std::runtime_error>());
143+
}
144+
116145
protected:
117146
DuckDbQuirks quirks_;
118147
};

0 commit comments

Comments
 (0)