Skip to content

Commit 239dc55

Browse files
committed
Fwd port of PR 18658: Load pandas in import cache before binding
1 parent 4308076 commit 239dc55

File tree

2 files changed

+37
-0
lines changed

2 files changed

+37
-0
lines changed

src/duckdb_py/pandas/bind.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "duckdb_python/pandas/pandas_bind.hpp"
22
#include "duckdb_python/pandas/pandas_analyzer.hpp"
33
#include "duckdb_python/pandas/column/pandas_numpy_column.hpp"
4+
#include "duckdb_python/pyconnection/pyconnection.hpp"
45

56
namespace duckdb {
67

@@ -123,6 +124,14 @@ void Pandas::Bind(const ClientContext &context, py::handle df_p, vector<PandasCo
123124
throw InvalidInputException("Need a DataFrame with at least one column");
124125
}
125126

127+
auto &import_cache = *DuckDBPyConnection::ImportCache();
128+
auto pandas = import_cache.pandas();
129+
if (!pandas) {
130+
throw InvalidInputException("'pandas' is required for this operation, but it wasn't installed");
131+
}
132+
(void)import_cache.pandas.NA();
133+
(void)import_cache.pandas.NaT();
134+
126135
return_types.reserve(column_count);
127136
names.reserve(column_count);
128137
// loop over every column
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
from conftest import NumpyPandas, ArrowPandas
2+
import duckdb
3+
import pytest
4+
5+
6+
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
7+
def test_import_cache_explicit_dtype(pandas):
8+
df = pandas.DataFrame(
9+
{
10+
'id': [1, 2, 3],
11+
'value': pandas.Series(['123.123', pandas.NaT, pandas.NA], dtype=pandas.StringDtype(storage='python')),
12+
}
13+
)
14+
con = duckdb.connect()
15+
result_df = con.query("select id, value from df").df()
16+
17+
assert result_df['value'][1] is None
18+
assert result_df['value'][2] is None
19+
20+
21+
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
22+
def test_import_cache_implicit_dtype(pandas):
23+
df = pandas.DataFrame({'id': [1, 2, 3], 'value': pandas.Series(['123.123', pandas.NaT, pandas.NA])})
24+
con = duckdb.connect()
25+
result_df = con.query("select id, value from df").df()
26+
27+
assert result_df['value'][1] is None
28+
assert result_df['value'][2] is None

0 commit comments

Comments
 (0)