From d6c877985794be494adaf7dcbbb043dc75860fc2 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Sun, 14 Sep 2025 05:07:12 +0000 Subject: [PATCH 01/40] chore: Add 3.14 and 3.14t builds: update GHA matrix, bump uv and cibuildwheel to include 3.14rc2, and handle new pandas warning, and mark unsupported packages as < 3.14. --- .github/workflows/cleanup_pypi.yml | 2 +- .github/workflows/coverage.yml | 2 +- .github/workflows/packaging_sdist.yml | 2 +- .github/workflows/packaging_wheels.yml | 9 ++++---- pyproject.toml | 32 +++++++++++++++++--------- tests/pytest.ini | 2 ++ 6 files changed, 31 insertions(+), 18 deletions(-) diff --git a/.github/workflows/cleanup_pypi.yml b/.github/workflows/cleanup_pypi.yml index c4300be3..e290faae 100644 --- a/.github/workflows/cleanup_pypi.yml +++ b/.github/workflows/cleanup_pypi.yml @@ -52,7 +52,7 @@ jobs: - name: Install Astral UV uses: astral-sh/setup-uv@v6 with: - version: "0.7.14" + version: "0.8.16" - name: Run Cleanup env: diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index fdd2a838..ab696897 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -70,7 +70,7 @@ jobs: - name: Install Astral UV and enable the cache uses: astral-sh/setup-uv@v6 with: - version: "0.7.14" + version: "0.8.16" python-version: 3.9 enable-cache: true cache-suffix: -${{ github.workflow }} diff --git a/.github/workflows/packaging_sdist.yml b/.github/workflows/packaging_sdist.yml index 2723b437..87923f4c 100644 --- a/.github/workflows/packaging_sdist.yml +++ b/.github/workflows/packaging_sdist.yml @@ -58,7 +58,7 @@ jobs: - name: Install Astral UV uses: astral-sh/setup-uv@v6 with: - version: "0.7.14" + version: "0.8.16" python-version: 3.11 - name: Build sdist diff --git a/.github/workflows/packaging_wheels.yml b/.github/workflows/packaging_wheels.yml index 4c7599a6..00e5cdea 100644 --- a/.github/workflows/packaging_wheels.yml +++ b/.github/workflows/packaging_wheels.yml @@ -30,7 +30,7 @@ jobs: strategy: fail-fast: false matrix: - python: [ cp39, cp310, cp311, cp312, cp313 ] + python: [ cp39, cp310, cp311, cp312, cp313, cp314, cp314t ] platform: - { os: windows-2025, arch: amd64, cibw_system: win } - { os: ubuntu-24.04, arch: x86_64, cibw_system: manylinux } @@ -79,16 +79,17 @@ jobs: # Install Astral UV, which will be used as build-frontend for cibuildwheel - uses: astral-sh/setup-uv@v6 with: - version: "0.7.14" + version: "0.8.16" enable-cache: false cache-suffix: -${{ matrix.python }}-${{ matrix.platform.cibw_system }}_${{ matrix.platform.arch }} + python-version: ${{ matrix.python }} - name: Build${{ inputs.testsuite != 'none' && ' and test ' || ' ' }}wheels - uses: pypa/cibuildwheel@v3.0 + uses: pypa/cibuildwheel@v3.1 env: CIBW_ARCHS: ${{ matrix.platform.arch == 'amd64' && 'AMD64' || matrix.platform.arch }} CIBW_BUILD: ${{ matrix.python }}-${{ matrix.platform.cibw_system }}_${{ matrix.platform.arch }} - + CIBW_ENVIRONMENT: PYTHON_GIL=1 - name: Upload wheel uses: actions/upload-artifact@v4 with: diff --git a/pyproject.toml b/pyproject.toml index 6291b811..edd71a02 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,8 +47,8 @@ all = [ # users can install duckdb with 'duckdb[all]', which will install this l "fsspec", # used in duckdb.filesystem "numpy", # used in duckdb.experimental.spark and in duckdb.fetchnumpy() "pandas", # used for pandas dataframes all over the place - "pyarrow", # used for pyarrow support - "adbc_driver_manager", # for the adbc driver (TODO: this should live under the duckdb package) + "pyarrow; python_version < '3.14'", # used for pyarrow support + "adbc_driver_manager; python_version < '3.14'", # for the adbc driver (TODO: this should live under the duckdb package) ] ###################################################################################################### @@ -123,6 +123,13 @@ if.env.COVERAGE = false inherit.cmake.define = "append" cmake.define.DISABLE_UNITY = "1" +[[tool.scikit-build.overrides]] +# Windows Free-Threading +if.platform-system = "^win32" +if.abi-flags = "t" +inherit.cmake.define = "append" +cmake.define.CMAKE_C_FLAGS="/DPy_MOD_GIL_USED /DPy_GIL_DISABLED" +cmake.define.CMAKE_CXX_FLAGS="/DPy_MOD_GIL_USED /DPy_GIL_DISABLED" [tool.scikit-build.sdist] include = [ @@ -204,6 +211,7 @@ required-environments = [ # ... but do always resolve for all of them "python_version >= '3.9' and sys_platform == 'linux' and platform_machine == 'x86_64'", "python_version >= '3.9' and sys_platform == 'linux' and platform_machine == 'aarch64'", ] +prerelease = "allow" # for 3.14 # We just need pytorch for tests, wihtout GPU acceleration. PyPI doesn't host a cpu-only version for Linux, so we have # to configure the index url for cpu-only pytorch manually @@ -220,8 +228,8 @@ torchvision = [ { index = "pytorch-cpu" } ] stubdeps = [ # dependencies used for typehints in the stubs "fsspec", "pandas", - "polars", - "pyarrow", + "polars; python_version < '3.14'", + "pyarrow; python_version < '3.14'", ] test = [ # dependencies used for running tests "pytest", @@ -229,21 +237,21 @@ test = [ # dependencies used for running tests "pytest-timeout", "mypy", "coverage", - "gcovr", + "gcovr; python_version < '3.14'", "gcsfs", "packaging", - "polars", + "polars; python_version < '3.14'", "psutil", "py4j", "pyotp", - "pyspark", + "pyspark; python_version < '3.14'", "pytz", "requests", "urllib3", "fsspec>=2022.11.0", "pandas>=2.0.0", - "pyarrow>=18.0.0", - "torch>=2.2.2; sys_platform != 'darwin' or platform_machine != 'x86_64' or python_version < '3.13'", + "pyarrow>=18.0.0; python_version < '3.14'", + "torch>=2.2.2; python_version < '3.14' and (sys_platform != 'darwin' or platform_machine != 'x86_64' or python_version < '3.13')", "tensorflow==2.14.0; sys_platform == 'darwin' and python_version < '3.12'", "tensorflow-cpu>=2.14.0; sys_platform == 'linux' and platform_machine != 'aarch64' and python_version < '3.12'", "tensorflow-cpu>=2.14.0; sys_platform == 'win32' and python_version < '3.12'", @@ -258,8 +266,8 @@ scripts = [ # dependencies used for running scripts "numpy", "pandas", "pcpp", - "polars", - "pyarrow", + "polars; python_version < '3.14'", + "pyarrow; python_version < '3.14'", "pytz" ] pypi = [ # dependencies used by the pypi cleanup script @@ -305,6 +313,7 @@ filterwarnings = [ # Pyspark is throwing these warnings "ignore:distutils Version classes are deprecated:DeprecationWarning", "ignore:is_datetime64tz_dtype is deprecated:DeprecationWarning", + "ignore:ChainedAssignmentError.*:FutureWarning" ] [tool.coverage.run] @@ -379,6 +388,7 @@ manylinux-x86_64-image = "manylinux_2_28" manylinux-pypy_x86_64-image = "manylinux_2_28" manylinux-aarch64-image = "manylinux_2_28" manylinux-pypy_aarch64-image = "manylinux_2_28" +enable = ["cpython-freethreading", "cpython-prerelease"] [tool.cibuildwheel.linux] before-build = ["yum install -y ccache"] diff --git a/tests/pytest.ini b/tests/pytest.ini index 0c17afd5..5081ee33 100644 --- a/tests/pytest.ini +++ b/tests/pytest.ini @@ -2,6 +2,8 @@ [pytest] filterwarnings = error + # Pandas ChainedAssignmentError warnings for 3.0 + ignore:ChainedAssignmentError.*:FutureWarning ignore::UserWarning ignore::DeprecationWarning # Jupyter is throwing DeprecationWarnings From 69415c1278ad5e5ada666f93fb7fbef17f7042be Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Sun, 14 Sep 2025 18:58:38 +0000 Subject: [PATCH 02/40] chore: remove pandas 3.0 warnings -> instead, disable pandas for 3.14 for now. --- pyproject.toml | 3 +- tests/conftest.py | 40 ++++++++++++++++++++----- tests/fast/numpy/test_numpy_new_path.py | 1 + tests/pytest.ini | 2 -- 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index edd71a02..657ab2b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -211,7 +211,7 @@ required-environments = [ # ... but do always resolve for all of them "python_version >= '3.9' and sys_platform == 'linux' and platform_machine == 'x86_64'", "python_version >= '3.9' and sys_platform == 'linux' and platform_machine == 'aarch64'", ] -prerelease = "allow" # for 3.14 +prerelease = "if-necessary-or-explicit" # for 3.14 # We just need pytorch for tests, wihtout GPU acceleration. PyPI doesn't host a cpu-only version for Linux, so we have # to configure the index url for cpu-only pytorch manually @@ -313,7 +313,6 @@ filterwarnings = [ # Pyspark is throwing these warnings "ignore:distutils Version classes are deprecated:DeprecationWarning", "ignore:is_datetime64tz_dtype is deprecated:DeprecationWarning", - "ignore:ChainedAssignmentError.*:FutureWarning" ] [tool.coverage.run] diff --git a/tests/conftest.py b/tests/conftest.py index ce2d0e68..6c3cb2fb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,24 +6,37 @@ import duckdb import warnings from importlib import import_module +import sys try: # need to ignore warnings that might be thrown deep inside pandas's import tree (from dateutil in this case) - warnings.simplefilter(action='ignore', category=DeprecationWarning) - pandas = import_module('pandas') + warnings.simplefilter(action="ignore", category=DeprecationWarning) + pandas = import_module("pandas") warnings.resetwarnings() - pyarrow_dtype = getattr(pandas, 'ArrowDtype', None) + pyarrow_dtype = getattr(pandas, "ArrowDtype", None) except ImportError: pandas = None pyarrow_dtype = None + # Only install mock after we've failed to import pandas for conftest.py + class MockPandas: + def __getattr__(self, name): + pytest.skip("pandas not available", allow_module_level=True) + + sys.modules["pandas"] = MockPandas() + sys.modules["pandas.testing"] = MockPandas() + sys.modules["pandas._testing"] = MockPandas() + # Check if pandas has arrow dtypes enabled -try: - from pandas.compat import pa_version_under7p0 +if pandas is not None: + try: + from pandas.compat import pa_version_under7p0 - pyarrow_dtypes_enabled = not pa_version_under7p0 -except ImportError: + pyarrow_dtypes_enabled = not pa_version_under7p0 + except (ImportError, AttributeError): + pyarrow_dtypes_enabled = False +else: pyarrow_dtypes_enabled = False @@ -31,7 +44,7 @@ def import_pandas(): if pandas: return pandas else: - pytest.skip("Couldn't import pandas") + pytest.skip("Couldn't import pandas", allow_module_level=True) # https://docs.pytest.org/en/latest/example/simple.html#control-skipping-of-tests-according-to-command-line-option @@ -39,6 +52,17 @@ def import_pandas(): def pytest_addoption(parser): parser.addoption("--skiplist", action="append", nargs="+", type=str, help="skip listed tests") +@pytest.hookimpl(hookwrapper=True) +def pytest_runtest_call(item): + """Convert pandas requirement exceptions to skips""" + outcome = yield + try: + outcome.get_result() + except Exception as e: + if "'pandas' is required for this operation but it was not installed" in str(e): + pytest.skip("pandas not available - test requires pandas functionality") + + def pytest_collection_modifyitems(config, items): tests_to_skip = config.getoption("--skiplist") diff --git a/tests/fast/numpy/test_numpy_new_path.py b/tests/fast/numpy/test_numpy_new_path.py index 4267085c..6e424c9f 100644 --- a/tests/fast/numpy/test_numpy_new_path.py +++ b/tests/fast/numpy/test_numpy_new_path.py @@ -6,6 +6,7 @@ import duckdb from datetime import timedelta import pytest +import pandas # https://github.com/duckdb/duckdb-python/issues/48 class TestScanNumpy(object): diff --git a/tests/pytest.ini b/tests/pytest.ini index 5081ee33..0c17afd5 100644 --- a/tests/pytest.ini +++ b/tests/pytest.ini @@ -2,8 +2,6 @@ [pytest] filterwarnings = error - # Pandas ChainedAssignmentError warnings for 3.0 - ignore:ChainedAssignmentError.*:FutureWarning ignore::UserWarning ignore::DeprecationWarning # Jupyter is throwing DeprecationWarnings From ded40ec1b1e981198e92344c12b23cc97926af39 Mon Sep 17 00:00:00 2001 From: paultiq <104510378+paultiq@users.noreply.github.com> Date: Sun, 14 Sep 2025 16:45:18 -0400 Subject: [PATCH 03/40] test: Disable Pandas for 3.14 Not yet available --- pyproject.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 657ab2b8..9a1cb980 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ all = [ # users can install duckdb with 'duckdb[all]', which will install this l "ipython", # used in duckdb.query_graph "fsspec", # used in duckdb.filesystem "numpy", # used in duckdb.experimental.spark and in duckdb.fetchnumpy() - "pandas", # used for pandas dataframes all over the place + "pandas; python_version < '3.14'", # used for pandas dataframes all over the place "pyarrow; python_version < '3.14'", # used for pyarrow support "adbc_driver_manager; python_version < '3.14'", # for the adbc driver (TODO: this should live under the duckdb package) ] @@ -227,7 +227,7 @@ torchvision = [ { index = "pytorch-cpu" } ] [dependency-groups] # used for development only, requires pip >=25.1.0 stubdeps = [ # dependencies used for typehints in the stubs "fsspec", - "pandas", + "pandas; python_version < '3.14'", "polars; python_version < '3.14'", "pyarrow; python_version < '3.14'", ] @@ -249,7 +249,7 @@ test = [ # dependencies used for running tests "requests", "urllib3", "fsspec>=2022.11.0", - "pandas>=2.0.0", + "pandas>=2.0.0; python_version < '3.14'", "pyarrow>=18.0.0; python_version < '3.14'", "torch>=2.2.2; python_version < '3.14' and (sys_platform != 'darwin' or platform_machine != 'x86_64' or python_version < '3.13')", "tensorflow==2.14.0; sys_platform == 'darwin' and python_version < '3.12'", @@ -264,7 +264,7 @@ scripts = [ # dependencies used for running scripts "ipython", "ipywidgets", "numpy", - "pandas", + "pandas; python_version < '3.14'", "pcpp", "polars; python_version < '3.14'", "pyarrow; python_version < '3.14'", From 2ad03500ac59a5188d84735b29db8223272e4286 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Mon, 15 Sep 2025 02:15:05 +0000 Subject: [PATCH 04/40] test: disable failing test "Windows fatal exception: access violation" --- tests/fast/api/test_connection_interrupt.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/fast/api/test_connection_interrupt.py b/tests/fast/api/test_connection_interrupt.py index 4efd68b5..eae6cbb8 100644 --- a/tests/fast/api/test_connection_interrupt.py +++ b/tests/fast/api/test_connection_interrupt.py @@ -1,12 +1,13 @@ import platform import threading import time - +import sys import duckdb import pytest class TestConnectionInterrupt(object): + @pytest.mark.xfail(sys.platform == "win32" and sys.version_info[:2] == (3, 14) and __import__('sysconfig').get_config_var("Py_GIL_DISABLED") == 1, reason="known issue on Windows 3.14t (free-threaded)", strict=False) @pytest.mark.xfail( condition=platform.system() == "Emscripten", reason="threads not allowed on Emscripten", From 557df244fb22c688cc463127bfea6fa1fba07632 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Mon, 15 Sep 2025 02:43:20 +0000 Subject: [PATCH 05/40] tests: skip, don't xfail --- tests/fast/api/test_connection_interrupt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fast/api/test_connection_interrupt.py b/tests/fast/api/test_connection_interrupt.py index eae6cbb8..931ceaeb 100644 --- a/tests/fast/api/test_connection_interrupt.py +++ b/tests/fast/api/test_connection_interrupt.py @@ -7,7 +7,7 @@ class TestConnectionInterrupt(object): - @pytest.mark.xfail(sys.platform == "win32" and sys.version_info[:2] == (3, 14) and __import__('sysconfig').get_config_var("Py_GIL_DISABLED") == 1, reason="known issue on Windows 3.14t (free-threaded)", strict=False) + @pytest.mark.skipif(sys.platform == "win32" and sys.version_info[:2] == (3, 14) and __import__('sysconfig').get_config_var("Py_GIL_DISABLED") == 1, reason="known issue on Windows 3.14t (free-threaded)") @pytest.mark.xfail( condition=platform.system() == "Emscripten", reason="threads not allowed on Emscripten", From b4e404e78fc8e8bcd3d75d7bf6b4b8b4ab92fef3 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Mon, 15 Sep 2025 03:22:05 +0000 Subject: [PATCH 06/40] exclude Windows --- .github/workflows/packaging_wheels.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/packaging_wheels.yml b/.github/workflows/packaging_wheels.yml index 00e5cdea..f1e9ddf0 100644 --- a/.github/workflows/packaging_wheels.yml +++ b/.github/workflows/packaging_wheels.yml @@ -45,6 +45,8 @@ jobs: - { minimal: true, python: cp311 } - { minimal: true, python: cp312 } - { minimal: true, platform: { arch: universal2 } } + - { python: cp314t, platform: { os: windows-2025 } } + runs-on: ${{ matrix.platform.os }} env: CIBW_TEST_SKIP: ${{ inputs.testsuite == 'none' && '*' || '*-macosx_universal2' }} From a540862c7c10de17863979adfe64b30dc4838980 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Mon, 15 Sep 2025 03:23:40 +0000 Subject: [PATCH 07/40] tests: revert the skip since we're excluding Windows 3.14t builds entirely. --- tests/fast/api/test_connection_interrupt.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/fast/api/test_connection_interrupt.py b/tests/fast/api/test_connection_interrupt.py index 931ceaeb..f9fa37d0 100644 --- a/tests/fast/api/test_connection_interrupt.py +++ b/tests/fast/api/test_connection_interrupt.py @@ -7,7 +7,6 @@ class TestConnectionInterrupt(object): - @pytest.mark.skipif(sys.platform == "win32" and sys.version_info[:2] == (3, 14) and __import__('sysconfig').get_config_var("Py_GIL_DISABLED") == 1, reason="known issue on Windows 3.14t (free-threaded)") @pytest.mark.xfail( condition=platform.system() == "Emscripten", reason="threads not allowed on Emscripten", From 3f8c7d7f95dfdfcc26c67f407e219e6aa4dd2bdf Mon Sep 17 00:00:00 2001 From: "paul@iqmo.com" Date: Sun, 14 Sep 2025 23:55:11 -0400 Subject: [PATCH 08/40] revert: import that was added, no longer needed --- tests/fast/api/test_connection_interrupt.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/fast/api/test_connection_interrupt.py b/tests/fast/api/test_connection_interrupt.py index f9fa37d0..ce9d2599 100644 --- a/tests/fast/api/test_connection_interrupt.py +++ b/tests/fast/api/test_connection_interrupt.py @@ -1,7 +1,6 @@ import platform import threading import time -import sys import duckdb import pytest From 8b11bc43a03ed8a4f0a3f0601d5216371b81cf72 Mon Sep 17 00:00:00 2001 From: "paul@iqmo.com" Date: Mon, 15 Sep 2025 00:34:12 -0400 Subject: [PATCH 09/40] revert: exactly to original --- tests/fast/api/test_connection_interrupt.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/fast/api/test_connection_interrupt.py b/tests/fast/api/test_connection_interrupt.py index ce9d2599..4efd68b5 100644 --- a/tests/fast/api/test_connection_interrupt.py +++ b/tests/fast/api/test_connection_interrupt.py @@ -1,6 +1,7 @@ import platform import threading import time + import duckdb import pytest From 6c35985b0be27e02d4ee0a5dcfe5ae7f700555dc Mon Sep 17 00:00:00 2001 From: "paul@iqmo.com" Date: Mon, 15 Sep 2025 07:22:11 -0400 Subject: [PATCH 10/40] test: Mark test xfail --- tests/fast/numpy/test_numpy_new_path.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/fast/numpy/test_numpy_new_path.py b/tests/fast/numpy/test_numpy_new_path.py index 6e424c9f..abc09ef5 100644 --- a/tests/fast/numpy/test_numpy_new_path.py +++ b/tests/fast/numpy/test_numpy_new_path.py @@ -2,14 +2,15 @@ Therefore, we only test the new codes and exec paths. """ +import sys import numpy as np import duckdb from datetime import timedelta import pytest -import pandas # https://github.com/duckdb/duckdb-python/issues/48 class TestScanNumpy(object): + @pytest.mark.skipif(sys.version_info[:2] == (3, 14), reason="Fails when testing without pandas https://github.com/duckdb/duckdb-python/issues/48") def test_scan_numpy(self, duckdb_cursor): z = np.array([1, 2, 3]) res = duckdb_cursor.sql("select * from z").fetchall() From 64b70d6008be7aaafce0233e79f261fa5e3eb0a0 Mon Sep 17 00:00:00 2001 From: "paul@iqmo.com" Date: Mon, 15 Sep 2025 07:23:15 -0400 Subject: [PATCH 11/40] test: mark test xfail --- tests/fast/numpy/test_numpy_new_path.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fast/numpy/test_numpy_new_path.py b/tests/fast/numpy/test_numpy_new_path.py index abc09ef5..c1122797 100644 --- a/tests/fast/numpy/test_numpy_new_path.py +++ b/tests/fast/numpy/test_numpy_new_path.py @@ -10,7 +10,7 @@ class TestScanNumpy(object): - @pytest.mark.skipif(sys.version_info[:2] == (3, 14), reason="Fails when testing without pandas https://github.com/duckdb/duckdb-python/issues/48") + @pytest.mark.xfail(sys.version_info[:2] == (3, 14), reason="Fails when testing without pandas https://github.com/duckdb/duckdb-python/issues/48") def test_scan_numpy(self, duckdb_cursor): z = np.array([1, 2, 3]) res = duckdb_cursor.sql("select * from z").fetchall() From 80c57b0068de4f1f8f55271d84c6bbe8be5e822a Mon Sep 17 00:00:00 2001 From: "paul@iqmo.com" Date: Mon, 15 Sep 2025 09:34:33 -0400 Subject: [PATCH 12/40] chore: Add comments and todo's for workflow changes --- .github/workflows/packaging_wheels.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/packaging_wheels.yml b/.github/workflows/packaging_wheels.yml index f1e9ddf0..463faea8 100644 --- a/.github/workflows/packaging_wheels.yml +++ b/.github/workflows/packaging_wheels.yml @@ -45,7 +45,9 @@ jobs: - { minimal: true, python: cp311 } - { minimal: true, python: cp312 } - { minimal: true, platform: { arch: universal2 } } - - { python: cp314t, platform: { os: windows-2025 } } + # Windows+cp314t disabled due to test failures in CI. + # TODO: Diagnose why tests fail (access violations) in some configurations + - { python: cp314t, platform: { os: windows-2025 } } runs-on: ${{ matrix.platform.os }} env: @@ -91,6 +93,8 @@ jobs: env: CIBW_ARCHS: ${{ matrix.platform.arch == 'amd64' && 'AMD64' || matrix.platform.arch }} CIBW_BUILD: ${{ matrix.python }}-${{ matrix.platform.cibw_system }}_${{ matrix.platform.arch }} + # PYTHON_GIL=1: Suppresses the RuntimeWarning that the GIL is enabled on free-threaded builds. + # TODO: Remove PYTHON_GIL=1 when free-threaded is supported. CIBW_ENVIRONMENT: PYTHON_GIL=1 - name: Upload wheel uses: actions/upload-artifact@v4 From e315bb77d6ce92a35bb4d1d08b3f66b03733625a Mon Sep 17 00:00:00 2001 From: "paul@iqmo.com" Date: Mon, 15 Sep 2025 09:38:57 -0400 Subject: [PATCH 13/40] chore: Remove unused section for Windows 3.14t builds. --- pyproject.toml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9a1cb980..bcbb24f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -123,14 +123,6 @@ if.env.COVERAGE = false inherit.cmake.define = "append" cmake.define.DISABLE_UNITY = "1" -[[tool.scikit-build.overrides]] -# Windows Free-Threading -if.platform-system = "^win32" -if.abi-flags = "t" -inherit.cmake.define = "append" -cmake.define.CMAKE_C_FLAGS="/DPy_MOD_GIL_USED /DPy_GIL_DISABLED" -cmake.define.CMAKE_CXX_FLAGS="/DPy_MOD_GIL_USED /DPy_GIL_DISABLED" - [tool.scikit-build.sdist] include = [ "README.md", From fd26187890c608cff891a96b4051c8d45773637b Mon Sep 17 00:00:00 2001 From: "paul@iqmo.com" Date: Mon, 15 Sep 2025 10:14:39 -0400 Subject: [PATCH 14/40] chore: Add version check to only allow no-Pandas for 3.14, plus a TODO --- tests/conftest.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 6c3cb2fb..e2f427c3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ import os +import sys import pytest import shutil from os.path import abspath, join, dirname, normpath @@ -52,16 +53,22 @@ def import_pandas(): def pytest_addoption(parser): parser.addoption("--skiplist", action="append", nargs="+", type=str, help="skip listed tests") + @pytest.hookimpl(hookwrapper=True) def pytest_runtest_call(item): """Convert pandas requirement exceptions to skips""" + outcome = yield - try: - outcome.get_result() - except Exception as e: - if "'pandas' is required for this operation but it was not installed" in str(e): - pytest.skip("pandas not available - test requires pandas functionality") + # TODO: Remove skip when Pandas releases for 3.14. After, consider bumping to 3.15 + if sys.version_info[:2] == (3, 14): + try: + outcome.get_result() + except Exception as e: + if "'pandas' is required for this operation but it was not installed" in str(e): + pytest.skip("pandas not available - test requires pandas functionality") + else: + raise e def pytest_collection_modifyitems(config, items): From 6044ea033df3472cdfddc22bc20fc45953d04aad Mon Sep 17 00:00:00 2001 From: "paul@iqmo.com" Date: Mon, 15 Sep 2025 16:44:55 -0400 Subject: [PATCH 15/40] tests: Narrow pandas not installed skip to duckdb.InvalidInputException --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index e2f427c3..5e297aee 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -64,7 +64,7 @@ def pytest_runtest_call(item): if sys.version_info[:2] == (3, 14): try: outcome.get_result() - except Exception as e: + except duckdb.InvalidInputException as e: if "'pandas' is required for this operation but it was not installed" in str(e): pytest.skip("pandas not available - test requires pandas functionality") else: From d899bcfc5285a9e722c83d6d2f2c71d09d65be0a Mon Sep 17 00:00:00 2001 From: "paul@iqmo.com" Date: Tue, 16 Sep 2025 07:07:27 -0400 Subject: [PATCH 16/40] tests: revert xfail for 3.14 now that #48 is merged --- tests/fast/numpy/test_numpy_new_path.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/fast/numpy/test_numpy_new_path.py b/tests/fast/numpy/test_numpy_new_path.py index c1122797..3735ff6e 100644 --- a/tests/fast/numpy/test_numpy_new_path.py +++ b/tests/fast/numpy/test_numpy_new_path.py @@ -10,7 +10,6 @@ class TestScanNumpy(object): - @pytest.mark.xfail(sys.version_info[:2] == (3, 14), reason="Fails when testing without pandas https://github.com/duckdb/duckdb-python/issues/48") def test_scan_numpy(self, duckdb_cursor): z = np.array([1, 2, 3]) res = duckdb_cursor.sql("select * from z").fetchall() From c535a2390107e4056bc376e56663a22ca5f8740e Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Wed, 17 Sep 2025 10:45:41 +0200 Subject: [PATCH 17/40] Packaging workflow should respect the 'minimal' input param --- .github/workflows/packaging.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/packaging.yml b/.github/workflows/packaging.yml index 507c7bda..16771deb 100644 --- a/.github/workflows/packaging.yml +++ b/.github/workflows/packaging.yml @@ -74,7 +74,7 @@ jobs: name: Build and test releases uses: ./.github/workflows/packaging_wheels.yml with: - minimal: false + minimal: ${{ inputs.minimal }} testsuite: all duckdb-python-sha: ${{ inputs.duckdb-python-sha != '' && inputs.duckdb-python-sha || github.sha }} duckdb-sha: ${{ inputs.duckdb-sha }} From c592c6e3a9fc37a3aca71e0773c82e214717e10e Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 11:37:10 +0000 Subject: [PATCH 18/40] tests: Use a unique file for each database, for concurrent test isolation --- tests/conftest.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 5e297aee..19586787 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -267,11 +267,9 @@ def spark(): @pytest.fixture(scope='function') -def duckdb_cursor(): - connection = duckdb.connect('') - yield connection - connection.close() - +def duckdb_cursor(tmp_path): + with duckdb.connect(tmp_path / "mytest") as connection: + yield connection @pytest.fixture(scope='function') def integers(duckdb_cursor): From 624891bbf312699a16c660eb623f63ad0ad57cba Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 11:39:48 +0000 Subject: [PATCH 19/40] tests: pytest plugins: randomly to randomize order, xdist for multiprocessing, run-parallel for multi-threaded --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index bcbb24f6..e8a342b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -227,6 +227,9 @@ test = [ # dependencies used for running tests "pytest", "pytest-reraise", "pytest-timeout", + "pytest-xdist", # multi-processed tests, if `-n | auto` + "pytest-randomly", # randomizes test order to ensure no test dependencies, enabled on install + "pytest-run-parallel", # multi-threaded tests, if `--parallel-threads=N --iterations=N` "mypy", "coverage", "gcovr; python_version < '3.14'", From 1d390695cb564e92ba5e35fc9c0533c2ffc79a4d Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 12:41:35 +0000 Subject: [PATCH 20/40] tests: using a tmp_path_factory for concurrent testing --- tests/slow/test_h2oai_arrow.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tests/slow/test_h2oai_arrow.py b/tests/slow/test_h2oai_arrow.py index 40bde07b..7ff37d01 100644 --- a/tests/slow/test_h2oai_arrow.py +++ b/tests/slow/test_h2oai_arrow.py @@ -194,8 +194,10 @@ def test_join(self, threads, function, large_data): @fixture(scope="module") -def arrow_dataset_register(): +def arrow_dataset_register(tmp_path_factory): """Single fixture to download files and register them on the given connection""" + temp_dir = tmp_path_factory.mktemp("h2oai_data") + session = requests.Session() retries = urllib3_util.Retry( allowed_methods={'GET'}, # only retry on GETs (all we do) @@ -212,19 +214,15 @@ def arrow_dataset_register(): respect_retry_after_header=True, # respect Retry-After headers ) session.mount('https://', requests_adapters.HTTPAdapter(max_retries=retries)) - saved_filenames = set() def _register(url, filename, con, tablename): + file_path = temp_dir / filename r = session.get(url) - with open(filename, 'wb') as f: - f.write(r.content) - con.register(tablename, read_csv(filename)) - saved_filenames.add(filename) + file_path.write_bytes(r.content) + con.register(tablename, read_csv(str(file_path))) yield _register - for filename in saved_filenames: - os.remove(filename) session.close() @@ -269,4 +267,4 @@ def group_by_data(arrow_dataset_register): "x", ) yield con - con.close() + con.close() \ No newline at end of file From b21a5bf80ae04873163d395c4b57dd15311e0699 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 12:56:27 +0000 Subject: [PATCH 21/40] tests: add a 2 minute test timeout. Tests can override this with @pytest.mark.timeout(60) --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index e8a342b1..fc8c862b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -309,6 +309,7 @@ filterwarnings = [ "ignore:distutils Version classes are deprecated:DeprecationWarning", "ignore:is_datetime64tz_dtype is deprecated:DeprecationWarning", ] +timeout = 300 # don't let individual tests run for more than 5 minutes [tool.coverage.run] branch = true From 98f36687167d0f3e3a2996d58a10b771acf42c28 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 12:56:34 +0000 Subject: [PATCH 22/40] tests: 2 minutes --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fc8c862b..accdc818 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -309,7 +309,7 @@ filterwarnings = [ "ignore:distutils Version classes are deprecated:DeprecationWarning", "ignore:is_datetime64tz_dtype is deprecated:DeprecationWarning", ] -timeout = 300 # don't let individual tests run for more than 5 minutes +timeout = 120 # don't let individual tests run for more than 5 minutes [tool.coverage.run] branch = true From f6e0cd914fc1ffda0d3db47440cb4c9ef8a02676 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 13:58:16 +0000 Subject: [PATCH 23/40] tests: modify tests to be threading safe *or* explicitly marked as unsafe --- tests/fast/api/test_duckdb_connection.py | 173 ++++++++++++----------- 1 file changed, 89 insertions(+), 84 deletions(-) diff --git a/tests/fast/api/test_duckdb_connection.py b/tests/fast/api/test_duckdb_connection.py index 4cb565c1..65900bfa 100644 --- a/tests/fast/api/test_duckdb_connection.py +++ b/tests/fast/api/test_duckdb_connection.py @@ -24,19 +24,21 @@ def tmp_database(tmp_path_factory): # wrapped by the 'duckdb' module, to execute with the 'default_connection' class TestDuckDBConnection(object): @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_append(self, pandas): - duckdb.execute("Create table integers (i integer)") + def test_append(self, pandas, duckdb_cursor): + duckdb_cursor.execute("Create table integers (i integer)") df_in = pandas.DataFrame( { 'numbers': [1, 2, 3, 4, 5], } ) - duckdb.append('integers', df_in) - assert duckdb.execute('select count(*) from integers').fetchone()[0] == 5 + duckdb_cursor.append('integers', df_in) + assert duckdb_cursor.execute('select count(*) from integers').fetchone()[0] == 5 # cleanup - duckdb.execute("drop table integers") + duckdb_cursor.execute("drop table integers") - def test_default_connection_from_connect(self): + # Not thread safe because it creates a table in the default connection + @pytest.mark.thread_unsafe + def test_default_connection_from_connect(self, duckdb_cursor): duckdb.sql('create or replace table connect_default_connect (i integer)') con = duckdb.connect(':default:') con.sql('select i from connect_default_connect') @@ -55,21 +57,24 @@ def test_arrow(self): duckdb.execute("select [1,2,3]") result = duckdb.fetch_arrow_table() - def test_begin_commit(self): - duckdb.begin() - duckdb.execute("create table tbl as select 1") - duckdb.commit() - res = duckdb.table("tbl") - duckdb.execute("drop table tbl") - - def test_begin_rollback(self): - duckdb.begin() - duckdb.execute("create table tbl as select 1") - duckdb.rollback() + def test_begin_commit(self, duckdb_cursor): + duckdb_cursor.begin() + duckdb_cursor.execute("create table tbl as select 1") + duckdb_cursor.commit() + res = duckdb_cursor.table("tbl") + duckdb_cursor.execute("drop table tbl") + + def test_begin_rollback(self, duckdb_cursor): + duckdb_cursor.begin() + duckdb_cursor.execute("create table tbl as select 1") + duckdb_cursor.rollback() with pytest.raises(duckdb.CatalogException): # Table does not exist - res = duckdb.table("tbl") + res = duckdb_cursor.table("tbl") + + # Not thread safe because it creates a table in the default connection + @pytest.mark.thread_unsafe def test_cursor(self): duckdb.execute("create table tbl as select 3") duckdb_cursor = duckdb.cursor() @@ -95,6 +100,7 @@ def use_cursors(): use_cursors() con.close() + @pytest.mark.thread_unsafe def test_df(self): ref = [([1, 2, 3],)] duckdb.execute("select [1,2,3]") @@ -102,11 +108,11 @@ def test_df(self): res = duckdb.query("select * from res_df").fetchall() assert res == ref - def test_duplicate(self): - duckdb.execute("create table tbl as select 5") - dup_conn = duckdb.duplicate() + def test_duplicate(self, duckdb_cursor): + duckdb_cursor.execute("create table tbl as select 5") + dup_conn = duckdb_cursor.duplicate() dup_conn.table("tbl").fetchall() - duckdb.execute("drop table tbl") + duckdb_cursor.execute("drop table tbl") with pytest.raises(duckdb.CatalogException): dup_conn.table("tbl").fetchall() @@ -120,20 +126,20 @@ def test_readonly_properties(self): def test_execute(self): assert [([4, 2],)] == duckdb.execute("select [4,2]").fetchall() - def test_executemany(self): + def test_executemany(self, duckdb_cursor): # executemany does not keep an open result set # TODO: shouldn't we also have a version that executes a query multiple times with different parameters, returning all of the results? - duckdb.execute("create table tbl (i integer, j varchar)") - duckdb.executemany("insert into tbl VALUES (?, ?)", [(5, 'test'), (2, 'duck'), (42, 'quack')]) - res = duckdb.table("tbl").fetchall() + duckdb_cursor.execute("create table tbl (i integer, j varchar)") + duckdb_cursor.executemany("insert into tbl VALUES (?, ?)", [(5, 'test'), (2, 'duck'), (42, 'quack')]) + res = duckdb_cursor.table("tbl").fetchall() assert res == [(5, 'test'), (2, 'duck'), (42, 'quack')] - duckdb.execute("drop table tbl") + duckdb_cursor.execute("drop table tbl") - def test_pystatement(self): + def test_pystatement(self, duckdb_cursor): with pytest.raises(duckdb.ParserException, match='seledct'): - statements = duckdb.extract_statements('seledct 42; select 21') + statements = duckdb_cursor.extract_statements('seledct 42; select 21') - statements = duckdb.extract_statements('select $1; select 21') + statements = duckdb_cursor.extract_statements('select $1; select 21') assert len(statements) == 2 assert statements[0].query == 'select $1' assert statements[0].type == duckdb.StatementType.SELECT @@ -148,23 +154,23 @@ def test_pystatement(self): duckdb.InvalidInputException, match='Please provide either a DuckDBPyStatement or a string representing the query', ): - rel = duckdb.query(statements) + rel = duckdb_cursor.query(statements) with pytest.raises(duckdb.BinderException, match="This type of statement can't be prepared!"): - rel = duckdb.query(statements[0]) + rel = duckdb_cursor.query(statements[0]) - assert duckdb.query(statements[1]).fetchall() == [(21,)] - assert duckdb.execute(statements[1]).fetchall() == [(21,)] + assert duckdb_cursor.query(statements[1]).fetchall() == [(21,)] + assert duckdb_cursor.execute(statements[1]).fetchall() == [(21,)] with pytest.raises( duckdb.InvalidInputException, match='Values were not provided for the following prepared statement parameters: 1', ): - duckdb.execute(statements[0]) - assert duckdb.execute(statements[0], {'1': 42}).fetchall() == [(42,)] + duckdb_cursor.execute(statements[0]) + assert duckdb_cursor.execute(statements[0], {'1': 42}).fetchall() == [(42,)] - duckdb.execute("create table tbl(a integer)") - statements = duckdb.extract_statements('insert into tbl select $1') + duckdb_cursor.execute("create table tbl(a integer)") + statements = duckdb_cursor.extract_statements('insert into tbl select $1') assert statements[0].expected_result_type == [ duckdb.ExpectedResultType.CHANGED_ROWS, duckdb.ExpectedResultType.QUERY_RESULT, @@ -172,36 +178,37 @@ def test_pystatement(self): with pytest.raises( duckdb.InvalidInputException, match='executemany requires a non-empty list of parameter sets to be provided' ): - duckdb.executemany(statements[0]) - duckdb.executemany(statements[0], [(21,), (22,), (23,)]) - assert duckdb.table('tbl').fetchall() == [(21,), (22,), (23,)] - duckdb.execute("drop table tbl") + duckdb_cursor.executemany(statements[0]) + duckdb_cursor.executemany(statements[0], [(21,), (22,), (23,)]) + assert duckdb_cursor.table('tbl').fetchall() == [(21,), (22,), (23,)] + duckdb_cursor.execute("drop table tbl") - def test_fetch_arrow_table(self): + def test_fetch_arrow_table(self, duckdb_cursor): # Needed for 'fetch_arrow_table' pyarrow = pytest.importorskip("pyarrow") - duckdb.execute("Create Table test (a integer)") + duckdb_cursor.execute("Create Table test (a integer)") for i in range(1024): for j in range(2): - duckdb.execute("Insert Into test values ('" + str(i) + "')") - duckdb.execute("Insert Into test values ('5000')") - duckdb.execute("Insert Into test values ('6000')") + duckdb_cursor.execute("Insert Into test values ('" + str(i) + "')") + duckdb_cursor.execute("Insert Into test values ('5000')") + duckdb_cursor.execute("Insert Into test values ('6000')") sql = ''' SELECT a, COUNT(*) AS repetitions FROM test GROUP BY a ''' - result_df = duckdb.execute(sql).df() + result_df = duckdb_cursor.execute(sql).df() - arrow_table = duckdb.execute(sql).fetch_arrow_table() + arrow_table = duckdb_cursor.execute(sql).fetch_arrow_table() arrow_df = arrow_table.to_pandas() assert result_df['repetitions'].sum() == arrow_df['repetitions'].sum() - duckdb.execute("drop table test") + duckdb_cursor.execute("drop table test") + @pytest.mark.thread_unsafe def test_fetch_df(self): ref = [([1, 2, 3],)] duckdb.execute("select [1,2,3]") @@ -209,30 +216,31 @@ def test_fetch_df(self): res = duckdb.query("select * from res_df").fetchall() assert res == ref - def test_fetch_df_chunk(self): - duckdb.execute("CREATE table t as select range a from range(3000);") - query = duckdb.execute("SELECT a FROM t") + def test_fetch_df_chunk(self, duckdb_cursor): + duckdb_cursor.execute("CREATE table t as select range a from range(3000);") + query = duckdb_cursor.execute("SELECT a FROM t") cur_chunk = query.fetch_df_chunk() assert cur_chunk['a'][0] == 0 assert len(cur_chunk) == 2048 cur_chunk = query.fetch_df_chunk() assert cur_chunk['a'][0] == 2048 assert len(cur_chunk) == 952 - duckdb.execute("DROP TABLE t") + duckdb_cursor.execute("DROP TABLE t") - def test_fetch_record_batch(self): + def test_fetch_record_batch(self, duckdb_cursor): # Needed for 'fetch_arrow_table' pyarrow = pytest.importorskip("pyarrow") - duckdb.execute("CREATE table t as select range a from range(3000);") - duckdb.execute("SELECT a FROM t") - record_batch_reader = duckdb.fetch_record_batch(1024) + duckdb_cursor.execute("CREATE table t as select range a from range(3000);") + duckdb_cursor.execute("SELECT a FROM t") + record_batch_reader = duckdb_cursor.fetch_record_batch(1024) chunk = record_batch_reader.read_all() assert len(chunk) == 3000 def test_fetchall(self): assert [([1, 2, 3],)] == duckdb.execute("select [1,2,3]").fetchall() + @pytest.mark.thread_unsafe def test_fetchdf(self): ref = [([1, 2, 3],)] duckdb.execute("select [1,2,3]") @@ -286,13 +294,12 @@ def test_query(self): def test_register(self): assert None != duckdb.register - def test_register_relation(self): - con = duckdb.connect() - rel = con.sql('select [5,4,3]') - con.register("relation", rel) + def test_register_relation(self, duckdb_cursor): + rel = duckdb_cursor.sql('select [5,4,3]') + duckdb_cursor.register("relation", rel) - con.sql("create table tbl as select * from relation") - assert con.table('tbl').fetchall() == [([5, 4, 3],)] + duckdb_cursor.sql("create table tbl as select * from relation") + assert duckdb_cursor.table('tbl').fetchall() == [([5, 4, 3],)] def test_unregister_problematic_behavior(self, duckdb_cursor): # We have a VIEW called 'vw' in the Catalog @@ -314,27 +321,25 @@ def test_unregister_problematic_behavior(self, duckdb_cursor): assert duckdb_cursor.execute("select * from vw").fetchone() == (0,) @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_relation_out_of_scope(self, pandas): + def test_relation_out_of_scope(self, pandas, duckdb_cursor): def temporary_scope(): # Create a connection, we will return this - con = duckdb.connect() # Create a dataframe df = pandas.DataFrame({'a': [1, 2, 3]}) # The dataframe has to be registered as well # making sure it does not go out of scope - con.register("df", df) - rel = con.sql('select * from df') - con.register("relation", rel) - return con + duckdb_cursor.register("df", df) + rel = duckdb_cursor.sql('select * from df') + duckdb_cursor.register("relation", rel) + return duckdb_cursor - con = temporary_scope() - res = con.sql('select * from relation').fetchall() + duckdb_cursor = temporary_scope() + res = duckdb_cursor.sql('select * from relation').fetchall() print(res) - def test_table(self): - con = duckdb.connect() - con.execute("create table tbl as select 1") - assert [(1,)] == con.table("tbl").fetchall() + def test_table(self, duckdb_cursor): + duckdb_cursor.execute("create table tbl as select 1") + assert [(1,)] == duckdb_cursor.table("tbl").fetchall() def test_table_function(self): assert None != duckdb.table_function @@ -345,16 +350,16 @@ def test_unregister(self): def test_values(self): assert None != duckdb.values - def test_view(self): - duckdb.execute("create view vw as select range(5)") - assert [([0, 1, 2, 3, 4],)] == duckdb.view("vw").fetchall() - duckdb.execute("drop view vw") + def test_view(self, duckdb_cursor): + duckdb_cursor.execute("create view vw as select range(5)") + assert [([0, 1, 2, 3, 4],)] == duckdb_cursor.view("vw").fetchall() + duckdb_cursor.execute("drop view vw") - def test_close(self): - assert None != duckdb.close + def test_close(self, duckdb_cursor): + assert None != duckdb_cursor.close - def test_interrupt(self): - assert None != duckdb.interrupt + def test_interrupt(self, duckdb_cursor): + assert None != duckdb_cursor.interrupt def test_wrap_shadowing(self): pd = NumpyPandas() From c0ec7f1273d09b03a471b653e989da1dddbef9a8 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 16:08:18 +0000 Subject: [PATCH 24/40] revert --- tests/fast/api/test_duckdb_connection.py | 173 +++++++++++------------ 1 file changed, 84 insertions(+), 89 deletions(-) diff --git a/tests/fast/api/test_duckdb_connection.py b/tests/fast/api/test_duckdb_connection.py index 65900bfa..4cb565c1 100644 --- a/tests/fast/api/test_duckdb_connection.py +++ b/tests/fast/api/test_duckdb_connection.py @@ -24,21 +24,19 @@ def tmp_database(tmp_path_factory): # wrapped by the 'duckdb' module, to execute with the 'default_connection' class TestDuckDBConnection(object): @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_append(self, pandas, duckdb_cursor): - duckdb_cursor.execute("Create table integers (i integer)") + def test_append(self, pandas): + duckdb.execute("Create table integers (i integer)") df_in = pandas.DataFrame( { 'numbers': [1, 2, 3, 4, 5], } ) - duckdb_cursor.append('integers', df_in) - assert duckdb_cursor.execute('select count(*) from integers').fetchone()[0] == 5 + duckdb.append('integers', df_in) + assert duckdb.execute('select count(*) from integers').fetchone()[0] == 5 # cleanup - duckdb_cursor.execute("drop table integers") + duckdb.execute("drop table integers") - # Not thread safe because it creates a table in the default connection - @pytest.mark.thread_unsafe - def test_default_connection_from_connect(self, duckdb_cursor): + def test_default_connection_from_connect(self): duckdb.sql('create or replace table connect_default_connect (i integer)') con = duckdb.connect(':default:') con.sql('select i from connect_default_connect') @@ -57,24 +55,21 @@ def test_arrow(self): duckdb.execute("select [1,2,3]") result = duckdb.fetch_arrow_table() - def test_begin_commit(self, duckdb_cursor): - duckdb_cursor.begin() - duckdb_cursor.execute("create table tbl as select 1") - duckdb_cursor.commit() - res = duckdb_cursor.table("tbl") - duckdb_cursor.execute("drop table tbl") - - def test_begin_rollback(self, duckdb_cursor): - duckdb_cursor.begin() - duckdb_cursor.execute("create table tbl as select 1") - duckdb_cursor.rollback() + def test_begin_commit(self): + duckdb.begin() + duckdb.execute("create table tbl as select 1") + duckdb.commit() + res = duckdb.table("tbl") + duckdb.execute("drop table tbl") + + def test_begin_rollback(self): + duckdb.begin() + duckdb.execute("create table tbl as select 1") + duckdb.rollback() with pytest.raises(duckdb.CatalogException): # Table does not exist - res = duckdb_cursor.table("tbl") - + res = duckdb.table("tbl") - # Not thread safe because it creates a table in the default connection - @pytest.mark.thread_unsafe def test_cursor(self): duckdb.execute("create table tbl as select 3") duckdb_cursor = duckdb.cursor() @@ -100,7 +95,6 @@ def use_cursors(): use_cursors() con.close() - @pytest.mark.thread_unsafe def test_df(self): ref = [([1, 2, 3],)] duckdb.execute("select [1,2,3]") @@ -108,11 +102,11 @@ def test_df(self): res = duckdb.query("select * from res_df").fetchall() assert res == ref - def test_duplicate(self, duckdb_cursor): - duckdb_cursor.execute("create table tbl as select 5") - dup_conn = duckdb_cursor.duplicate() + def test_duplicate(self): + duckdb.execute("create table tbl as select 5") + dup_conn = duckdb.duplicate() dup_conn.table("tbl").fetchall() - duckdb_cursor.execute("drop table tbl") + duckdb.execute("drop table tbl") with pytest.raises(duckdb.CatalogException): dup_conn.table("tbl").fetchall() @@ -126,20 +120,20 @@ def test_readonly_properties(self): def test_execute(self): assert [([4, 2],)] == duckdb.execute("select [4,2]").fetchall() - def test_executemany(self, duckdb_cursor): + def test_executemany(self): # executemany does not keep an open result set # TODO: shouldn't we also have a version that executes a query multiple times with different parameters, returning all of the results? - duckdb_cursor.execute("create table tbl (i integer, j varchar)") - duckdb_cursor.executemany("insert into tbl VALUES (?, ?)", [(5, 'test'), (2, 'duck'), (42, 'quack')]) - res = duckdb_cursor.table("tbl").fetchall() + duckdb.execute("create table tbl (i integer, j varchar)") + duckdb.executemany("insert into tbl VALUES (?, ?)", [(5, 'test'), (2, 'duck'), (42, 'quack')]) + res = duckdb.table("tbl").fetchall() assert res == [(5, 'test'), (2, 'duck'), (42, 'quack')] - duckdb_cursor.execute("drop table tbl") + duckdb.execute("drop table tbl") - def test_pystatement(self, duckdb_cursor): + def test_pystatement(self): with pytest.raises(duckdb.ParserException, match='seledct'): - statements = duckdb_cursor.extract_statements('seledct 42; select 21') + statements = duckdb.extract_statements('seledct 42; select 21') - statements = duckdb_cursor.extract_statements('select $1; select 21') + statements = duckdb.extract_statements('select $1; select 21') assert len(statements) == 2 assert statements[0].query == 'select $1' assert statements[0].type == duckdb.StatementType.SELECT @@ -154,23 +148,23 @@ def test_pystatement(self, duckdb_cursor): duckdb.InvalidInputException, match='Please provide either a DuckDBPyStatement or a string representing the query', ): - rel = duckdb_cursor.query(statements) + rel = duckdb.query(statements) with pytest.raises(duckdb.BinderException, match="This type of statement can't be prepared!"): - rel = duckdb_cursor.query(statements[0]) + rel = duckdb.query(statements[0]) - assert duckdb_cursor.query(statements[1]).fetchall() == [(21,)] - assert duckdb_cursor.execute(statements[1]).fetchall() == [(21,)] + assert duckdb.query(statements[1]).fetchall() == [(21,)] + assert duckdb.execute(statements[1]).fetchall() == [(21,)] with pytest.raises( duckdb.InvalidInputException, match='Values were not provided for the following prepared statement parameters: 1', ): - duckdb_cursor.execute(statements[0]) - assert duckdb_cursor.execute(statements[0], {'1': 42}).fetchall() == [(42,)] + duckdb.execute(statements[0]) + assert duckdb.execute(statements[0], {'1': 42}).fetchall() == [(42,)] - duckdb_cursor.execute("create table tbl(a integer)") - statements = duckdb_cursor.extract_statements('insert into tbl select $1') + duckdb.execute("create table tbl(a integer)") + statements = duckdb.extract_statements('insert into tbl select $1') assert statements[0].expected_result_type == [ duckdb.ExpectedResultType.CHANGED_ROWS, duckdb.ExpectedResultType.QUERY_RESULT, @@ -178,37 +172,36 @@ def test_pystatement(self, duckdb_cursor): with pytest.raises( duckdb.InvalidInputException, match='executemany requires a non-empty list of parameter sets to be provided' ): - duckdb_cursor.executemany(statements[0]) - duckdb_cursor.executemany(statements[0], [(21,), (22,), (23,)]) - assert duckdb_cursor.table('tbl').fetchall() == [(21,), (22,), (23,)] - duckdb_cursor.execute("drop table tbl") + duckdb.executemany(statements[0]) + duckdb.executemany(statements[0], [(21,), (22,), (23,)]) + assert duckdb.table('tbl').fetchall() == [(21,), (22,), (23,)] + duckdb.execute("drop table tbl") - def test_fetch_arrow_table(self, duckdb_cursor): + def test_fetch_arrow_table(self): # Needed for 'fetch_arrow_table' pyarrow = pytest.importorskip("pyarrow") - duckdb_cursor.execute("Create Table test (a integer)") + duckdb.execute("Create Table test (a integer)") for i in range(1024): for j in range(2): - duckdb_cursor.execute("Insert Into test values ('" + str(i) + "')") - duckdb_cursor.execute("Insert Into test values ('5000')") - duckdb_cursor.execute("Insert Into test values ('6000')") + duckdb.execute("Insert Into test values ('" + str(i) + "')") + duckdb.execute("Insert Into test values ('5000')") + duckdb.execute("Insert Into test values ('6000')") sql = ''' SELECT a, COUNT(*) AS repetitions FROM test GROUP BY a ''' - result_df = duckdb_cursor.execute(sql).df() + result_df = duckdb.execute(sql).df() - arrow_table = duckdb_cursor.execute(sql).fetch_arrow_table() + arrow_table = duckdb.execute(sql).fetch_arrow_table() arrow_df = arrow_table.to_pandas() assert result_df['repetitions'].sum() == arrow_df['repetitions'].sum() - duckdb_cursor.execute("drop table test") + duckdb.execute("drop table test") - @pytest.mark.thread_unsafe def test_fetch_df(self): ref = [([1, 2, 3],)] duckdb.execute("select [1,2,3]") @@ -216,31 +209,30 @@ def test_fetch_df(self): res = duckdb.query("select * from res_df").fetchall() assert res == ref - def test_fetch_df_chunk(self, duckdb_cursor): - duckdb_cursor.execute("CREATE table t as select range a from range(3000);") - query = duckdb_cursor.execute("SELECT a FROM t") + def test_fetch_df_chunk(self): + duckdb.execute("CREATE table t as select range a from range(3000);") + query = duckdb.execute("SELECT a FROM t") cur_chunk = query.fetch_df_chunk() assert cur_chunk['a'][0] == 0 assert len(cur_chunk) == 2048 cur_chunk = query.fetch_df_chunk() assert cur_chunk['a'][0] == 2048 assert len(cur_chunk) == 952 - duckdb_cursor.execute("DROP TABLE t") + duckdb.execute("DROP TABLE t") - def test_fetch_record_batch(self, duckdb_cursor): + def test_fetch_record_batch(self): # Needed for 'fetch_arrow_table' pyarrow = pytest.importorskip("pyarrow") - duckdb_cursor.execute("CREATE table t as select range a from range(3000);") - duckdb_cursor.execute("SELECT a FROM t") - record_batch_reader = duckdb_cursor.fetch_record_batch(1024) + duckdb.execute("CREATE table t as select range a from range(3000);") + duckdb.execute("SELECT a FROM t") + record_batch_reader = duckdb.fetch_record_batch(1024) chunk = record_batch_reader.read_all() assert len(chunk) == 3000 def test_fetchall(self): assert [([1, 2, 3],)] == duckdb.execute("select [1,2,3]").fetchall() - @pytest.mark.thread_unsafe def test_fetchdf(self): ref = [([1, 2, 3],)] duckdb.execute("select [1,2,3]") @@ -294,12 +286,13 @@ def test_query(self): def test_register(self): assert None != duckdb.register - def test_register_relation(self, duckdb_cursor): - rel = duckdb_cursor.sql('select [5,4,3]') - duckdb_cursor.register("relation", rel) + def test_register_relation(self): + con = duckdb.connect() + rel = con.sql('select [5,4,3]') + con.register("relation", rel) - duckdb_cursor.sql("create table tbl as select * from relation") - assert duckdb_cursor.table('tbl').fetchall() == [([5, 4, 3],)] + con.sql("create table tbl as select * from relation") + assert con.table('tbl').fetchall() == [([5, 4, 3],)] def test_unregister_problematic_behavior(self, duckdb_cursor): # We have a VIEW called 'vw' in the Catalog @@ -321,25 +314,27 @@ def test_unregister_problematic_behavior(self, duckdb_cursor): assert duckdb_cursor.execute("select * from vw").fetchone() == (0,) @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_relation_out_of_scope(self, pandas, duckdb_cursor): + def test_relation_out_of_scope(self, pandas): def temporary_scope(): # Create a connection, we will return this + con = duckdb.connect() # Create a dataframe df = pandas.DataFrame({'a': [1, 2, 3]}) # The dataframe has to be registered as well # making sure it does not go out of scope - duckdb_cursor.register("df", df) - rel = duckdb_cursor.sql('select * from df') - duckdb_cursor.register("relation", rel) - return duckdb_cursor + con.register("df", df) + rel = con.sql('select * from df') + con.register("relation", rel) + return con - duckdb_cursor = temporary_scope() - res = duckdb_cursor.sql('select * from relation').fetchall() + con = temporary_scope() + res = con.sql('select * from relation').fetchall() print(res) - def test_table(self, duckdb_cursor): - duckdb_cursor.execute("create table tbl as select 1") - assert [(1,)] == duckdb_cursor.table("tbl").fetchall() + def test_table(self): + con = duckdb.connect() + con.execute("create table tbl as select 1") + assert [(1,)] == con.table("tbl").fetchall() def test_table_function(self): assert None != duckdb.table_function @@ -350,16 +345,16 @@ def test_unregister(self): def test_values(self): assert None != duckdb.values - def test_view(self, duckdb_cursor): - duckdb_cursor.execute("create view vw as select range(5)") - assert [([0, 1, 2, 3, 4],)] == duckdb_cursor.view("vw").fetchall() - duckdb_cursor.execute("drop view vw") + def test_view(self): + duckdb.execute("create view vw as select range(5)") + assert [([0, 1, 2, 3, 4],)] == duckdb.view("vw").fetchall() + duckdb.execute("drop view vw") - def test_close(self, duckdb_cursor): - assert None != duckdb_cursor.close + def test_close(self): + assert None != duckdb.close - def test_interrupt(self, duckdb_cursor): - assert None != duckdb_cursor.interrupt + def test_interrupt(self): + assert None != duckdb.interrupt def test_wrap_shadowing(self): pd = NumpyPandas() From 8d8e06eddc0c542a0b50d899d73ecd7cbb410570 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 16:22:05 +0000 Subject: [PATCH 25/40] tests: add workflow to do random, multiprocess and multithreaded tests. --- .github/workflows/additional_testing.yml | 156 +++++++++++++++++++++++ 1 file changed, 156 insertions(+) create mode 100644 .github/workflows/additional_testing.yml diff --git a/.github/workflows/additional_testing.yml b/.github/workflows/additional_testing.yml new file mode 100644 index 00000000..54e21b0b --- /dev/null +++ b/.github/workflows/additional_testing.yml @@ -0,0 +1,156 @@ +name: Stress Tests +on: + workflow_dispatch: + inputs: + os: + description: Operating System + required: true + type: choice + options: + - windows-2025 + - ubuntu-24.04 + - ubuntu-24.04-arm + - macos-15 + - macos-13 + python_version: + description: Python Version + required: true + type: choice + options: + - cp39 + - cp310 + - cp311 + - cp312 + - cp313 + - cp314 + - cp314t + - cp315 + - cp315t + testsuite: + type: choice + description: Testsuite to run (fast, all) + required: true + default: fast + options: + - fast + - all + duckdb-python-sha: + type: string + description: The commit or ref to build against (defaults to latest commit of current ref) + required: false + duckdb-sha: + type: string + description: Override the DuckDB submodule commit or ref to build against + required: false + test_iterations: + type: number + description: Number of times to run each test phase + required: false + default: 3 + +jobs: + build: + name: 'Build wheel: ${{ inputs.python_version }}-${{ inputs.os }}' + runs-on: ${{ inputs.os }} + + steps: + - name: Checkout DuckDB Python + uses: actions/checkout@v4 + with: + ref: ${{ inputs.duckdb-python-sha }} + fetch-depth: 0 + submodules: true + + - name: Checkout DuckDB + shell: bash + if: ${{ inputs.duckdb-sha }} + run: | + cd external/duckdb + git fetch origin + git checkout ${{ inputs.duckdb-sha }} + + - uses: astral-sh/setup-uv@v6 + with: + version: "0.8.16" + enable-cache: false + cache-suffix: -${{ inputs.python_version }}-${{ inputs.os }} + python-version: ${{ inputs.python_version }} + + - name: Build wheel + uses: pypa/cibuildwheel@v3.1 + env: + CIBW_ARCHS: auto + CIBW_BUILD: ${{ inputs.python_version }}-* + CIBW_TEST_SKIP: '*' + + - name: Upload wheel + uses: actions/upload-artifact@v4 + with: + name: wheel-${{ inputs.python_version }}-${{ inputs.os }} + path: wheelhouse/ + + test: + name: 'Test: ${{ matrix.pytest_config.name }} - ${{ inputs.python_version }}-${{ inputs.os }}' + runs-on: ${{ inputs.os }} + needs: build + strategy: + fail-fast: false + matrix: + pytest_config: + - name: "Random Order" + args: "" + iterations: ${{ inputs.test_iterations || 3 }} + - name: "Multiprocess" + args: "-n auto" + iterations: ${{ inputs.test_iterations || 3 }} + - name: "Threaded" + args: "--parallel-threads=4 --iterations=8 --ignore" + iterations: 1 + + steps: + - name: Checkout DuckDB Python + uses: actions/checkout@v4 + with: + ref: ${{ inputs.duckdb-python-sha }} + fetch-depth: 0 + submodules: true + + - uses: astral-sh/setup-uv@v6 + with: + version: "0.8.16" + enable-cache: false + cache-suffix: -${{ inputs.python_version }}-${{ inputs.os }} + python-version: ${{ inputs.python_version }} + + - name: Download wheel + uses: actions/download-artifact@v4 + with: + name: wheel-${{ inputs.python_version }}-${{ inputs.os }} + path: wheelhouse/ + + - name: Install dependencies + shell: bash + run: | + uv export --only-group test --no-emit-project --output-file pylock.toml + uv pip install -r pylock.toml + uv pip install wheelhouse/*.whl + + - name: Run ${{ matrix.pytest_config.name }} tests + shell: bash + run: | + TEST_TARGET="${{ inputs.testsuite == 'fast' && 'tests/fast' || 'tests' }}" + ITERATIONS="${{ matrix.pytest_config.iterations }}" + PYTEST_ARGS="${{ matrix.pytest_config.args }}" + + echo "Running ${{ matrix.pytest_config.name }} pytest $ITERATIONS times against: $TEST_TARGET" + for i in $(seq 1 $ITERATIONS); do + echo "" + echo "${{ matrix.pytest_config.name }} Run $i/$ITERATIONS:" + echo "--------" + uv run pytest $PYTEST_ARGS "$TEST_TARGET" --verbose + if [ $? -ne 0 ]; then + echo "${{ matrix.pytest_config.name }} Run $i failed!" + else + echo "${{ matrix.pytest_config.name }} Run $i passed!" + fi + done \ No newline at end of file From fc1538d31353824afc03ae3ea5af71cc7272acaa Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 16:31:30 +0000 Subject: [PATCH 26/40] tests: Use unique table names for each test so tests can run in any order --- tests/fast/api/test_duckdb_connection.py | 74 ++++++++++++------------ 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/tests/fast/api/test_duckdb_connection.py b/tests/fast/api/test_duckdb_connection.py index 4cb565c1..428fae31 100644 --- a/tests/fast/api/test_duckdb_connection.py +++ b/tests/fast/api/test_duckdb_connection.py @@ -57,28 +57,28 @@ def test_arrow(self): def test_begin_commit(self): duckdb.begin() - duckdb.execute("create table tbl as select 1") + duckdb.execute("create table tbl_1 as select 1") duckdb.commit() - res = duckdb.table("tbl") - duckdb.execute("drop table tbl") + res = duckdb.table("tbl_1") + duckdb.execute("drop table tbl_1") def test_begin_rollback(self): duckdb.begin() - duckdb.execute("create table tbl as select 1") + duckdb.execute("create table tbl_1rb as select 1") duckdb.rollback() with pytest.raises(duckdb.CatalogException): # Table does not exist - res = duckdb.table("tbl") + res = duckdb.table("tbl_1rb") def test_cursor(self): - duckdb.execute("create table tbl as select 3") + duckdb.execute("create table tbl_3 as select 3") duckdb_cursor = duckdb.cursor() - res = duckdb_cursor.table("tbl").fetchall() + res = duckdb_cursor.table("tbl_3").fetchall() assert res == [(3,)] - duckdb_cursor.execute("drop table tbl") + duckdb_cursor.execute("drop table tbl_3") with pytest.raises(duckdb.CatalogException): # 'tbl' no longer exists - duckdb.table("tbl") + duckdb.table("tbl_3") def test_cursor_lifetime(self): con = duckdb.connect() @@ -103,12 +103,12 @@ def test_df(self): assert res == ref def test_duplicate(self): - duckdb.execute("create table tbl as select 5") + duckdb.execute("create table tbl_5 as select 5") dup_conn = duckdb.duplicate() - dup_conn.table("tbl").fetchall() - duckdb.execute("drop table tbl") + dup_conn.table("tbl_5").fetchall() + duckdb.execute("drop table tbl_5") with pytest.raises(duckdb.CatalogException): - dup_conn.table("tbl").fetchall() + dup_conn.table("tbl_5").fetchall() def test_readonly_properties(self): duckdb.execute("select 42") @@ -123,11 +123,11 @@ def test_execute(self): def test_executemany(self): # executemany does not keep an open result set # TODO: shouldn't we also have a version that executes a query multiple times with different parameters, returning all of the results? - duckdb.execute("create table tbl (i integer, j varchar)") - duckdb.executemany("insert into tbl VALUES (?, ?)", [(5, 'test'), (2, 'duck'), (42, 'quack')]) - res = duckdb.table("tbl").fetchall() + duckdb.execute("create table tbl_many (i integer, j varchar)") + duckdb.executemany("insert into tbl_many VALUES (?, ?)", [(5, 'test'), (2, 'duck'), (42, 'quack')]) + res = duckdb.table("tbl_many").fetchall() assert res == [(5, 'test'), (2, 'duck'), (42, 'quack')] - duckdb.execute("drop table tbl") + duckdb.execute("drop table tbl_many") def test_pystatement(self): with pytest.raises(duckdb.ParserException, match='seledct'): @@ -163,8 +163,8 @@ def test_pystatement(self): duckdb.execute(statements[0]) assert duckdb.execute(statements[0], {'1': 42}).fetchall() == [(42,)] - duckdb.execute("create table tbl(a integer)") - statements = duckdb.extract_statements('insert into tbl select $1') + duckdb.execute("create table tbl_a(a integer)") + statements = duckdb.extract_statements('insert into tbl_a select $1') assert statements[0].expected_result_type == [ duckdb.ExpectedResultType.CHANGED_ROWS, duckdb.ExpectedResultType.QUERY_RESULT, @@ -174,23 +174,23 @@ def test_pystatement(self): ): duckdb.executemany(statements[0]) duckdb.executemany(statements[0], [(21,), (22,), (23,)]) - assert duckdb.table('tbl').fetchall() == [(21,), (22,), (23,)] - duckdb.execute("drop table tbl") + assert duckdb.table('tbl_a').fetchall() == [(21,), (22,), (23,)] + duckdb.execute("drop table tbl_a") def test_fetch_arrow_table(self): # Needed for 'fetch_arrow_table' pyarrow = pytest.importorskip("pyarrow") - duckdb.execute("Create Table test (a integer)") + duckdb.execute("Create Table test_arrow_tble (a integer)") for i in range(1024): for j in range(2): - duckdb.execute("Insert Into test values ('" + str(i) + "')") - duckdb.execute("Insert Into test values ('5000')") - duckdb.execute("Insert Into test values ('6000')") + duckdb.execute("Insert Into test_arrow_tble values ('" + str(i) + "')") + duckdb.execute("Insert Into test_arrow_tble values ('5000')") + duckdb.execute("Insert Into test_arrow_tble values ('6000')") sql = ''' SELECT a, COUNT(*) AS repetitions - FROM test + FROM test_arrow_tble GROUP BY a ''' @@ -200,7 +200,7 @@ def test_fetch_arrow_table(self): arrow_df = arrow_table.to_pandas() assert result_df['repetitions'].sum() == arrow_df['repetitions'].sum() - duckdb.execute("drop table test") + duckdb.execute("drop table test_arrow_tble") def test_fetch_df(self): ref = [([1, 2, 3],)] @@ -210,22 +210,22 @@ def test_fetch_df(self): assert res == ref def test_fetch_df_chunk(self): - duckdb.execute("CREATE table t as select range a from range(3000);") - query = duckdb.execute("SELECT a FROM t") + duckdb.execute("CREATE table t_df_chunk as select range a from range(3000);") + query = duckdb.execute("SELECT a FROM t_df_chunk") cur_chunk = query.fetch_df_chunk() assert cur_chunk['a'][0] == 0 assert len(cur_chunk) == 2048 cur_chunk = query.fetch_df_chunk() assert cur_chunk['a'][0] == 2048 assert len(cur_chunk) == 952 - duckdb.execute("DROP TABLE t") + duckdb.execute("DROP TABLE t_df_chunk") def test_fetch_record_batch(self): # Needed for 'fetch_arrow_table' pyarrow = pytest.importorskip("pyarrow") - duckdb.execute("CREATE table t as select range a from range(3000);") - duckdb.execute("SELECT a FROM t") + duckdb.execute("CREATE table t_record_batch as select range a from range(3000);") + duckdb.execute("SELECT a FROM t_record_batch") record_batch_reader = duckdb.fetch_record_batch(1024) chunk = record_batch_reader.read_all() assert len(chunk) == 3000 @@ -289,10 +289,10 @@ def test_register(self): def test_register_relation(self): con = duckdb.connect() rel = con.sql('select [5,4,3]') - con.register("relation", rel) + con.register("relation_rr", rel) - con.sql("create table tbl as select * from relation") - assert con.table('tbl').fetchall() == [([5, 4, 3],)] + con.sql("create table tbl_reg_rel as select * from relation_rr") + assert con.table('tbl_reg_rel').fetchall() == [([5, 4, 3],)] def test_unregister_problematic_behavior(self, duckdb_cursor): # We have a VIEW called 'vw' in the Catalog @@ -333,8 +333,8 @@ def temporary_scope(): def test_table(self): con = duckdb.connect() - con.execute("create table tbl as select 1") - assert [(1,)] == con.table("tbl").fetchall() + con.execute("create table tbl_test_table as select 1") + assert [(1,)] == con.table("tbl_test_table").fetchall() def test_table_function(self): assert None != duckdb.table_function From 45e7ae364b9cc1575a46713415d0ccd973758e27 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 16:47:39 +0000 Subject: [PATCH 27/40] tests: Move the slow 10M test to tests/slow --- .github/workflows/additional_testing.yml | 2 +- tests/fast/test_relation.py | 8 -------- tests/slow/test_relation_slow.py | 20 ++++++++++++++++++++ 3 files changed, 21 insertions(+), 9 deletions(-) create mode 100644 tests/slow/test_relation_slow.py diff --git a/.github/workflows/additional_testing.yml b/.github/workflows/additional_testing.yml index 54e21b0b..1d4cfe1b 100644 --- a/.github/workflows/additional_testing.yml +++ b/.github/workflows/additional_testing.yml @@ -147,7 +147,7 @@ jobs: echo "" echo "${{ matrix.pytest_config.name }} Run $i/$ITERATIONS:" echo "--------" - uv run pytest $PYTEST_ARGS "$TEST_TARGET" --verbose + uv run pytest $PYTEST_ARGS "$TEST_TARGET" --durations=5 if [ $? -ne 0 ]; then echo "${{ matrix.pytest_config.name }} Run $i failed!" else diff --git a/tests/fast/test_relation.py b/tests/fast/test_relation.py index 8e68c149..2d9b3b4b 100644 --- a/tests/fast/test_relation.py +++ b/tests/fast/test_relation.py @@ -1,6 +1,5 @@ import duckdb import numpy as np -import platform import tempfile import os import pandas as pd @@ -527,13 +526,6 @@ def test_relation_print(self): 2048, 5000, 1000000, - pytest.param( - 10000000, - marks=pytest.mark.skipif( - condition=platform.system() == "Emscripten", - reason="Emscripten/Pyodide builds run out of memory at this scale, and error might not thrown reliably", - ), - ), ], ) def test_materialized_relation(self, duckdb_cursor, num_rows): diff --git a/tests/slow/test_relation_slow.py b/tests/slow/test_relation_slow.py new file mode 100644 index 00000000..cd892985 --- /dev/null +++ b/tests/slow/test_relation_slow.py @@ -0,0 +1,20 @@ +import platform +import pytest + + +class TestRelationSlow(object): + @pytest.mark.skipif( + condition=platform.system() == "Emscripten", + reason="Emscripten/Pyodide builds run out of memory at this scale, and error might not thrown reliably", + ) + def test_materialized_relation_large(self, duckdb_cursor): + """Test materialized relation with 10M rows - moved from fast tests due to 1+ minute runtime""" + # Import the implementation function from the fast test + import sys + import os + sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'fast')) + from test_relation import TestRelation + + # Create instance and call the test with large parameter + test_instance = TestRelation() + test_instance.test_materialized_relation(duckdb_cursor, 10000000) \ No newline at end of file From 543654b5e5622fd0b88bce0206e698b31a75fb71 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 16:53:06 +0000 Subject: [PATCH 28/40] tests: use tmp_path so each test gets unique test.db --- tests/fast/test_many_con_same_file.py | 29 +++++++++------------------ 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/tests/fast/test_many_con_same_file.py b/tests/fast/test_many_con_same_file.py index 6b7362a6..fd825c76 100644 --- a/tests/fast/test_many_con_same_file.py +++ b/tests/fast/test_many_con_same_file.py @@ -10,29 +10,20 @@ def get_tables(con): return tbls -def test_multiple_writes(): - try: - os.remove("test.db") - except: - pass - con1 = duckdb.connect("test.db") - con2 = duckdb.connect("test.db") +def test_multiple_writes(tmp_path): + con1 = duckdb.connect(tmp_path / "test.db") + con2 = duckdb.connect(tmp_path / "test.db") con1.execute("CREATE TABLE foo1 as SELECT 1 as a, 2 as b") con2.execute("CREATE TABLE bar1 as SELECT 2 as a, 3 as b") con2.close() con1.close() - con3 = duckdb.connect("test.db") + con3 = duckdb.connect(tmp_path / "test.db") tbls = get_tables(con3) assert tbls == ['bar1', 'foo1'] del con1 del con2 del con3 - try: - os.remove("test.db") - except: - pass - def test_multiple_writes_memory(): con1 = duckdb.connect() @@ -64,23 +55,23 @@ def test_multiple_writes_named_memory(): del con3 -def test_diff_config(): - con1 = duckdb.connect("test.db", False) +def test_diff_config(tmp_path): + con1 = duckdb.connect(tmp_path / "test.db", False) with pytest.raises( duckdb.ConnectionException, match="Can't open a connection to same database file with a different configuration than existing connections", ): - con2 = duckdb.connect("test.db", True) + con2 = duckdb.connect(tmp_path / "test.db", True) con1.close() del con1 -def test_diff_config_extended(): - con1 = duckdb.connect("test.db", config={'null_order': 'NULLS FIRST'}) +def test_diff_config_extended(tmp_path): + con1 = duckdb.connect(tmp_path / "test.db", config={'null_order': 'NULLS FIRST'}) with pytest.raises( duckdb.ConnectionException, match="Can't open a connection to same database file with a different configuration than existing connections", ): - con2 = duckdb.connect("test.db") + con2 = duckdb.connect(tmp_path / "test.db") con1.close() del con1 From beb03259ecea7d8ded7bfac94ef34b860360fe0a Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 16:58:29 +0000 Subject: [PATCH 29/40] tests: use a tmp_path to allow concurrent tests --- tests/fast/api/test_to_csv.py | 96 +++++++++++++++++++---------------- 1 file changed, 53 insertions(+), 43 deletions(-) diff --git a/tests/fast/api/test_to_csv.py b/tests/fast/api/test_to_csv.py index e48ae1b8..768906ef 100644 --- a/tests/fast/api/test_to_csv.py +++ b/tests/fast/api/test_to_csv.py @@ -1,5 +1,4 @@ import duckdb -import tempfile import os import pandas._testing as tm import datetime @@ -10,8 +9,8 @@ class TestToCSV(object): @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_basic_to_csv(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_basic_to_csv(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") df = pandas.DataFrame({'a': [5, 3, 23, 2], 'b': [45, 234, 234, 2]}) rel = duckdb.from_df(df) @@ -21,8 +20,8 @@ def test_basic_to_csv(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_sep(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_sep(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") df = pandas.DataFrame({'a': [5, 3, 23, 2], 'b': [45, 234, 234, 2]}) rel = duckdb.from_df(df) @@ -32,8 +31,8 @@ def test_to_csv_sep(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_na_rep(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_na_rep(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") df = pandas.DataFrame({'a': [5, None, 23, 2], 'b': [45, 234, 234, 2]}) rel = duckdb.from_df(df) @@ -43,8 +42,8 @@ def test_to_csv_na_rep(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_header(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_header(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") df = pandas.DataFrame({'a': [5, None, 23, 2], 'b': [45, 234, 234, 2]}) rel = duckdb.from_df(df) @@ -54,8 +53,8 @@ def test_to_csv_header(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_quotechar(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_quotechar(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") df = pandas.DataFrame({'a': ["\'a,b,c\'", None, "hello", "bye"], 'b': [45, 234, 234, 2]}) rel = duckdb.from_df(df) @@ -65,8 +64,8 @@ def test_to_csv_quotechar(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_escapechar(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_escapechar(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") df = pandas.DataFrame( { "c_bool": [True, False], @@ -81,8 +80,8 @@ def test_to_csv_escapechar(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_date_format(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_date_format(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") df = pandas.DataFrame(getTimeSeriesData()) dt_index = df.index df = pandas.DataFrame({"A": dt_index, "B": dt_index.shift(1)}, index=dt_index) @@ -94,8 +93,8 @@ def test_to_csv_date_format(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_timestamp_format(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_timestamp_format(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") data = [datetime.time(hour=23, minute=1, second=34, microsecond=234345)] df = pandas.DataFrame({'0': pandas.Series(data=data, dtype='object')}) rel = duckdb.from_df(df) @@ -106,8 +105,8 @@ def test_to_csv_timestamp_format(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_quoting_off(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_quoting_off(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']}) rel = duckdb.from_df(df) rel.to_csv(temp_file_name, quoting=None) @@ -116,8 +115,8 @@ def test_to_csv_quoting_off(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_quoting_on(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_quoting_on(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']}) rel = duckdb.from_df(df) rel.to_csv(temp_file_name, quoting="force") @@ -126,8 +125,9 @@ def test_to_csv_quoting_on(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_quoting_quote_all(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_quoting_quote_all(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']}) rel = duckdb.from_df(df) rel.to_csv(temp_file_name, quoting=csv.QUOTE_ALL) @@ -136,8 +136,9 @@ def test_to_csv_quoting_quote_all(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_encoding_incorrect(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_encoding_incorrect(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']}) rel = duckdb.from_df(df) with pytest.raises( @@ -146,8 +147,9 @@ def test_to_csv_encoding_incorrect(self, pandas): rel.to_csv(temp_file_name, encoding="nope") @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_encoding_correct(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_encoding_correct(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']}) rel = duckdb.from_df(df) rel.to_csv(temp_file_name, encoding="UTF-8") @@ -155,8 +157,9 @@ def test_to_csv_encoding_correct(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_compression_gzip(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_compression_gzip(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']}) rel = duckdb.from_df(df) rel.to_csv(temp_file_name, compression="gzip") @@ -164,8 +167,9 @@ def test_compression_gzip(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_partition(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_partition(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame( { "c_category": ['a', 'a', 'b', 'b'], @@ -190,8 +194,9 @@ def test_to_csv_partition(self, pandas): assert csv_rel.execute().fetchall() == expected @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_partition_with_columns_written(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_partition_with_columns_written(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame( { "c_category": ['a', 'a', 'b', 'b'], @@ -210,8 +215,9 @@ def test_to_csv_partition_with_columns_written(self, pandas): assert res.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_overwrite(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_overwrite(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame( { "c_category_1": ['a', 'a', 'b', 'b'], @@ -238,8 +244,9 @@ def test_to_csv_overwrite(self, pandas): assert csv_rel.execute().fetchall() == expected @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_overwrite_with_columns_written(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_overwrite_with_columns_written(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame( { "c_category_1": ['a', 'a', 'b', 'b'], @@ -264,8 +271,9 @@ def test_to_csv_overwrite_with_columns_written(self, pandas): assert res.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_overwrite_not_enabled(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_overwrite_not_enabled(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame( { "c_category_1": ['a', 'a', 'b', 'b'], @@ -282,8 +290,9 @@ def test_to_csv_overwrite_not_enabled(self, pandas): rel.to_csv(temp_file_name, header=True, partition_by=["c_category_1"]) @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_per_thread_output(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_per_thread_output(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + num_threads = duckdb.sql("select current_setting('threads')").fetchone()[0] print('num_threads:', num_threads) df = pandas.DataFrame( @@ -301,8 +310,9 @@ def test_to_csv_per_thread_output(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_use_tmp_file(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_use_tmp_file(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame( { "c_category_1": ['a', 'a', 'b', 'b'], From 90d746d63e637cf0fb0ffa2a81616c3f4131e534 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 17:07:56 +0000 Subject: [PATCH 30/40] tests: pytest.raises KeyboardInterrupt and use a long-running query --- tests/fast/api/test_query_interrupt.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/fast/api/test_query_interrupt.py b/tests/fast/api/test_query_interrupt.py index 6334e475..cb569f5a 100644 --- a/tests/fast/api/test_query_interrupt.py +++ b/tests/fast/api/test_query_interrupt.py @@ -19,17 +19,16 @@ class TestQueryInterruption(object): condition=platform.system() == "Emscripten", reason="Emscripten builds cannot use threads", ) + @pytest.mark.timeout(5) def test_query_interruption(self): con = duckdb.connect() thread = threading.Thread(target=send_keyboard_interrupt) # Start the thread thread.start() try: - res = con.execute('select count(*) from range(100000000000)').fetchall() - except RuntimeError: - # If this is not reached, we could not cancel the query before it completed - # indicating that the query interruption functionality is broken - assert True - except KeyboardInterrupt: - pytest.fail() - thread.join() + with pytest.raises(KeyboardInterrupt): + res = con.execute('select * from range(100000),range(100000)').fetchall() + + finally: + # Ensure the thread completes regardless of what happens + thread.join() From ac39dbbf7858297216b8c894a56b7df8d753cc65 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 17:17:34 +0000 Subject: [PATCH 31/40] test: fix test to handle keyboard & runtime error, and use a longer query with a timeout --- tests/fast/api/test_query_interrupt.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/fast/api/test_query_interrupt.py b/tests/fast/api/test_query_interrupt.py index cb569f5a..693945d1 100644 --- a/tests/fast/api/test_query_interrupt.py +++ b/tests/fast/api/test_query_interrupt.py @@ -1,34 +1,31 @@ import duckdb import time import pytest - import platform import threading import _thread as thread def send_keyboard_interrupt(): - # Wait a little, so we're sure the 'execute' has started time.sleep(0.1) - # Send an interrupt to the main thread thread.interrupt_main() class TestQueryInterruption(object): + @pytest.mark.xfail( condition=platform.system() == "Emscripten", reason="Emscripten builds cannot use threads", ) @pytest.mark.timeout(5) - def test_query_interruption(self): + def test_keyboard_interruption(self): con = duckdb.connect() thread = threading.Thread(target=send_keyboard_interrupt) # Start the thread thread.start() try: - with pytest.raises(KeyboardInterrupt): - res = con.execute('select * from range(100000),range(100000)').fetchall() - + with pytest.raises((KeyboardInterrupt, RuntimeError)): + res = con.execute('select * from range(100000) t1,range(100000) t2').fetchall() finally: # Ensure the thread completes regardless of what happens thread.join() From 8b1a564e11d7c309c4055ed31b8d43643c228d8d Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 17:22:06 +0000 Subject: [PATCH 32/40] also on PR --- .github/workflows/additional_testing.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/additional_testing.yml b/.github/workflows/additional_testing.yml index 1d4cfe1b..a60e761f 100644 --- a/.github/workflows/additional_testing.yml +++ b/.github/workflows/additional_testing.yml @@ -1,11 +1,13 @@ name: Stress Tests on: + pull_request: workflow_dispatch: inputs: os: description: Operating System required: true type: choice + default: ubuntu-24.04 options: - windows-2025 - ubuntu-24.04 @@ -16,6 +18,7 @@ on: description: Python Version required: true type: choice + default: cp314 options: - cp39 - cp310 From 6af1e9f6fa91a47e0a105fec165912649482c53d Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 17:28:01 +0000 Subject: [PATCH 33/40] set defaults for PR --- .github/workflows/additional_testing.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/additional_testing.yml b/.github/workflows/additional_testing.yml index a60e761f..7e0a8021 100644 --- a/.github/workflows/additional_testing.yml +++ b/.github/workflows/additional_testing.yml @@ -1,6 +1,15 @@ name: Stress Tests on: pull_request: + inputs: + os: + default: ubuntu-24.04 + python_version: + default: cp314 + testsuite: + default: fast + test_iterations: + default: 3 workflow_dispatch: inputs: os: From b97f5b58c974d67d236b39e9a91edfb3cb09c120 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 17:29:59 +0000 Subject: [PATCH 34/40] run on push --- .github/workflows/additional_testing.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/additional_testing.yml b/.github/workflows/additional_testing.yml index 7e0a8021..884170ff 100644 --- a/.github/workflows/additional_testing.yml +++ b/.github/workflows/additional_testing.yml @@ -1,6 +1,6 @@ name: Stress Tests on: - pull_request: + push: inputs: os: default: ubuntu-24.04 From 27e94885f3934dda5d36dbcad8855378e5b36119 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 17:32:39 +0000 Subject: [PATCH 35/40] set defaults sensibly --- .github/workflows/additional_testing.yml | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/.github/workflows/additional_testing.yml b/.github/workflows/additional_testing.yml index 884170ff..c2f2b9e5 100644 --- a/.github/workflows/additional_testing.yml +++ b/.github/workflows/additional_testing.yml @@ -1,15 +1,6 @@ name: Stress Tests on: push: - inputs: - os: - default: ubuntu-24.04 - python_version: - default: cp314 - testsuite: - default: fast - test_iterations: - default: 3 workflow_dispatch: inputs: os: @@ -60,10 +51,16 @@ on: required: false default: 3 +env: + OS_TO_USE: ${{ inputs.os || 'ubuntu-24.04' }} + PYTHON_VERSION_TO_USE: ${{ inputs.python_version || 'cp314' }} + TESTSUITE_TO_USE: ${{ inputs.testsuite || 'fast' }} + ITERATIONS_TO_USE: ${{ inputs.test_iterations || '3' }} + jobs: build: - name: 'Build wheel: ${{ inputs.python_version }}-${{ inputs.os }}' - runs-on: ${{ inputs.os }} + name: 'Build wheel: ${{ env.PYTHON_VERSION_TO_USE }}-${{ env.OS_TO_USE }}' + runs-on: ${{ env.OS_TO_USE }} steps: - name: Checkout DuckDB Python From 43af69a78d275e6d30e8665f97c644689bd044af Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 17:33:59 +0000 Subject: [PATCH 36/40] ci: fix yaml --- .github/workflows/additional_testing.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/additional_testing.yml b/.github/workflows/additional_testing.yml index c2f2b9e5..75fff1a3 100644 --- a/.github/workflows/additional_testing.yml +++ b/.github/workflows/additional_testing.yml @@ -59,8 +59,8 @@ env: jobs: build: - name: 'Build wheel: ${{ env.PYTHON_VERSION_TO_USE }}-${{ env.OS_TO_USE }}' - runs-on: ${{ env.OS_TO_USE }} + name: "Build wheel: ${{ inputs.python_version || 'cp314' }}-${{ inputs.os || 'ubuntu-24.04' }}" + runs-on: ${{ inputs.os || 'ubuntu-24.04' }} steps: - name: Checkout DuckDB Python From f5396ff9e4653f5174a6e3019c8a5548bb8c50ad Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 17:36:50 +0000 Subject: [PATCH 37/40] ci: env var --- .github/workflows/additional_testing.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/additional_testing.yml b/.github/workflows/additional_testing.yml index 75fff1a3..ea1c22bb 100644 --- a/.github/workflows/additional_testing.yml +++ b/.github/workflows/additional_testing.yml @@ -89,7 +89,7 @@ jobs: uses: pypa/cibuildwheel@v3.1 env: CIBW_ARCHS: auto - CIBW_BUILD: ${{ inputs.python_version }}-* + CIBW_BUILD: ${{ env.PYTHON_VERSION_TO_USE }}-* CIBW_TEST_SKIP: '*' - name: Upload wheel From 11880f397b423251322e9fb39324db17b1df2a54 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 18:10:29 +0000 Subject: [PATCH 38/40] ci: disable ccache install --- .github/workflows/additional_testing.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/additional_testing.yml b/.github/workflows/additional_testing.yml index ea1c22bb..1b84bfb4 100644 --- a/.github/workflows/additional_testing.yml +++ b/.github/workflows/additional_testing.yml @@ -56,6 +56,9 @@ env: PYTHON_VERSION_TO_USE: ${{ inputs.python_version || 'cp314' }} TESTSUITE_TO_USE: ${{ inputs.testsuite || 'fast' }} ITERATIONS_TO_USE: ${{ inputs.test_iterations || '3' }} + # Disable the ccache install + CIBW_BEFORE_BUILD: '' + jobs: build: From 8772bbe3b7d09001536b823a4dab2d2d7090cd71 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 18:17:17 +0000 Subject: [PATCH 39/40] ci: use defaults --- .github/workflows/additional_testing.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/additional_testing.yml b/.github/workflows/additional_testing.yml index 1b84bfb4..b73cea75 100644 --- a/.github/workflows/additional_testing.yml +++ b/.github/workflows/additional_testing.yml @@ -85,8 +85,8 @@ jobs: with: version: "0.8.16" enable-cache: false - cache-suffix: -${{ inputs.python_version }}-${{ inputs.os }} - python-version: ${{ inputs.python_version }} + cache-suffix: -${{ env.PYTHON_VERSION_TO_USE }}-${{ env.OS_TO_USE }} + python-version: ${{ env.PYTHON_VERSION_TO_USE }} - name: Build wheel uses: pypa/cibuildwheel@v3.1 @@ -98,7 +98,7 @@ jobs: - name: Upload wheel uses: actions/upload-artifact@v4 with: - name: wheel-${{ inputs.python_version }}-${{ inputs.os }} + name: wheel-${{ env.PYTHON_VERSION_TO_USE }}-${{ env.OS_TO_USE }} path: wheelhouse/ test: @@ -131,13 +131,13 @@ jobs: with: version: "0.8.16" enable-cache: false - cache-suffix: -${{ inputs.python_version }}-${{ inputs.os }} - python-version: ${{ inputs.python_version }} + cache-suffix: -${{ env.PYTHON_VERSION_TO_USE }}-${{ env.OS_TO_USE }} + python-version: ${{ env.PYTHON_VERSION_TO_USE }} - name: Download wheel uses: actions/download-artifact@v4 with: - name: wheel-${{ inputs.python_version }}-${{ inputs.os }} + name: wheel-${{ env.PYTHON_VERSION_TO_USE }}-${{ env.OS_TO_USE }} path: wheelhouse/ - name: Install dependencies From 8e0836bc404edae87b2f9dee27262810984212e5 Mon Sep 17 00:00:00 2001 From: Paul Timmins Date: Wed, 17 Sep 2025 19:34:19 +0000 Subject: [PATCH 40/40] ci: add comments --- .github/workflows/additional_testing.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/additional_testing.yml b/.github/workflows/additional_testing.yml index b73cea75..2c05e39e 100644 --- a/.github/workflows/additional_testing.yml +++ b/.github/workflows/additional_testing.yml @@ -109,14 +109,15 @@ jobs: fail-fast: false matrix: pytest_config: - - name: "Random Order" + - name: "Random Order" # randomization is automatic due to pytest-randomly args: "" iterations: ${{ inputs.test_iterations || 3 }} - - name: "Multiprocess" + - name: "Multiprocess" # uses pytest-xdist args: "-n auto" iterations: ${{ inputs.test_iterations || 3 }} - - name: "Threaded" - args: "--parallel-threads=4 --iterations=8 --ignore" + - name: "Threaded" # uses pytest-run-parallel + # TODO: Update to use threading specific tests or explicitly mark unsafe tests. test_module was chosen as an example. + args: "--parallel-threads=4 --iterations=8 tests/fast/test_module.py --ignore" iterations: 1 steps: