diff --git a/.github/workflows/additional_testing.yml b/.github/workflows/additional_testing.yml new file mode 100644 index 00000000..2c05e39e --- /dev/null +++ b/.github/workflows/additional_testing.yml @@ -0,0 +1,169 @@ +name: Stress Tests +on: + push: + workflow_dispatch: + inputs: + os: + description: Operating System + required: true + type: choice + default: ubuntu-24.04 + options: + - windows-2025 + - ubuntu-24.04 + - ubuntu-24.04-arm + - macos-15 + - macos-13 + python_version: + description: Python Version + required: true + type: choice + default: cp314 + options: + - cp39 + - cp310 + - cp311 + - cp312 + - cp313 + - cp314 + - cp314t + - cp315 + - cp315t + testsuite: + type: choice + description: Testsuite to run (fast, all) + required: true + default: fast + options: + - fast + - all + duckdb-python-sha: + type: string + description: The commit or ref to build against (defaults to latest commit of current ref) + required: false + duckdb-sha: + type: string + description: Override the DuckDB submodule commit or ref to build against + required: false + test_iterations: + type: number + description: Number of times to run each test phase + required: false + default: 3 + +env: + OS_TO_USE: ${{ inputs.os || 'ubuntu-24.04' }} + PYTHON_VERSION_TO_USE: ${{ inputs.python_version || 'cp314' }} + TESTSUITE_TO_USE: ${{ inputs.testsuite || 'fast' }} + ITERATIONS_TO_USE: ${{ inputs.test_iterations || '3' }} + # Disable the ccache install + CIBW_BEFORE_BUILD: '' + + +jobs: + build: + name: "Build wheel: ${{ inputs.python_version || 'cp314' }}-${{ inputs.os || 'ubuntu-24.04' }}" + runs-on: ${{ inputs.os || 'ubuntu-24.04' }} + + steps: + - name: Checkout DuckDB Python + uses: actions/checkout@v4 + with: + ref: ${{ inputs.duckdb-python-sha }} + fetch-depth: 0 + submodules: true + + - name: Checkout DuckDB + shell: bash + if: ${{ inputs.duckdb-sha }} + run: | + cd external/duckdb + git fetch origin + git checkout ${{ inputs.duckdb-sha }} + + - uses: astral-sh/setup-uv@v6 + with: + version: "0.8.16" + enable-cache: false + cache-suffix: -${{ env.PYTHON_VERSION_TO_USE }}-${{ env.OS_TO_USE }} + python-version: ${{ env.PYTHON_VERSION_TO_USE }} + + - name: Build wheel + uses: pypa/cibuildwheel@v3.1 + env: + CIBW_ARCHS: auto + CIBW_BUILD: ${{ env.PYTHON_VERSION_TO_USE }}-* + CIBW_TEST_SKIP: '*' + + - name: Upload wheel + uses: actions/upload-artifact@v4 + with: + name: wheel-${{ env.PYTHON_VERSION_TO_USE }}-${{ env.OS_TO_USE }} + path: wheelhouse/ + + test: + name: 'Test: ${{ matrix.pytest_config.name }} - ${{ inputs.python_version }}-${{ inputs.os }}' + runs-on: ${{ inputs.os }} + needs: build + strategy: + fail-fast: false + matrix: + pytest_config: + - name: "Random Order" # randomization is automatic due to pytest-randomly + args: "" + iterations: ${{ inputs.test_iterations || 3 }} + - name: "Multiprocess" # uses pytest-xdist + args: "-n auto" + iterations: ${{ inputs.test_iterations || 3 }} + - name: "Threaded" # uses pytest-run-parallel + # TODO: Update to use threading specific tests or explicitly mark unsafe tests. test_module was chosen as an example. + args: "--parallel-threads=4 --iterations=8 tests/fast/test_module.py --ignore" + iterations: 1 + + steps: + - name: Checkout DuckDB Python + uses: actions/checkout@v4 + with: + ref: ${{ inputs.duckdb-python-sha }} + fetch-depth: 0 + submodules: true + + - uses: astral-sh/setup-uv@v6 + with: + version: "0.8.16" + enable-cache: false + cache-suffix: -${{ env.PYTHON_VERSION_TO_USE }}-${{ env.OS_TO_USE }} + python-version: ${{ env.PYTHON_VERSION_TO_USE }} + + - name: Download wheel + uses: actions/download-artifact@v4 + with: + name: wheel-${{ env.PYTHON_VERSION_TO_USE }}-${{ env.OS_TO_USE }} + path: wheelhouse/ + + - name: Install dependencies + shell: bash + run: | + uv export --only-group test --no-emit-project --output-file pylock.toml + uv pip install -r pylock.toml + uv pip install wheelhouse/*.whl + + - name: Run ${{ matrix.pytest_config.name }} tests + shell: bash + run: | + TEST_TARGET="${{ inputs.testsuite == 'fast' && 'tests/fast' || 'tests' }}" + ITERATIONS="${{ matrix.pytest_config.iterations }}" + PYTEST_ARGS="${{ matrix.pytest_config.args }}" + + echo "Running ${{ matrix.pytest_config.name }} pytest $ITERATIONS times against: $TEST_TARGET" + for i in $(seq 1 $ITERATIONS); do + echo "" + echo "${{ matrix.pytest_config.name }} Run $i/$ITERATIONS:" + echo "--------" + uv run pytest $PYTEST_ARGS "$TEST_TARGET" --durations=5 + if [ $? -ne 0 ]; then + echo "${{ matrix.pytest_config.name }} Run $i failed!" + else + echo "${{ matrix.pytest_config.name }} Run $i passed!" + fi + done \ No newline at end of file diff --git a/.github/workflows/packaging.yml b/.github/workflows/packaging.yml index 3851014d..b169da2b 100644 --- a/.github/workflows/packaging.yml +++ b/.github/workflows/packaging.yml @@ -75,7 +75,7 @@ jobs: name: Build and test releases uses: ./.github/workflows/packaging_wheels.yml with: - minimal: false + minimal: ${{ inputs.minimal }} testsuite: all duckdb-python-sha: ${{ inputs.duckdb-python-sha != '' && inputs.duckdb-python-sha || github.sha }} duckdb-sha: ${{ inputs.duckdb-sha }} diff --git a/pyproject.toml b/pyproject.toml index 0639c47a..db26a6ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -227,10 +227,10 @@ test = [ # dependencies used for running tests "pytest", "pytest-reraise", "pytest-timeout", - "pytest-xdist", # Parallel (multi-process) - # "pytest-randomly", # Randomizes order of tests + "pytest-xdist", # multi-processed tests, if `-n | auto` + "pytest-randomly", # randomizes test order to ensure no test dependencies, enabled on install + "pytest-run-parallel", # multi-threaded tests, if `--parallel-threads=N --iterations=N` "pytest-timestamper", # Adds timestamps to test output - "pytest-run-parallel", # For free-threading testing "mypy", "coverage", "gcovr; python_version < '3.14'", @@ -310,6 +310,7 @@ filterwarnings = [ "ignore:distutils Version classes are deprecated:DeprecationWarning", "ignore:is_datetime64tz_dtype is deprecated:DeprecationWarning", ] +timeout = 120 # don't let individual tests run for more than 5 minutes [tool.coverage.run] branch = true diff --git a/tests/conftest.py b/tests/conftest.py index e2f427c3..19586787 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -64,7 +64,7 @@ def pytest_runtest_call(item): if sys.version_info[:2] == (3, 14): try: outcome.get_result() - except Exception as e: + except duckdb.InvalidInputException as e: if "'pandas' is required for this operation but it was not installed" in str(e): pytest.skip("pandas not available - test requires pandas functionality") else: @@ -267,11 +267,9 @@ def spark(): @pytest.fixture(scope='function') -def duckdb_cursor(): - connection = duckdb.connect('') - yield connection - connection.close() - +def duckdb_cursor(tmp_path): + with duckdb.connect(tmp_path / "mytest") as connection: + yield connection @pytest.fixture(scope='function') def integers(duckdb_cursor): diff --git a/tests/fast/api/test_duckdb_connection.py b/tests/fast/api/test_duckdb_connection.py index 4cb565c1..428fae31 100644 --- a/tests/fast/api/test_duckdb_connection.py +++ b/tests/fast/api/test_duckdb_connection.py @@ -57,28 +57,28 @@ def test_arrow(self): def test_begin_commit(self): duckdb.begin() - duckdb.execute("create table tbl as select 1") + duckdb.execute("create table tbl_1 as select 1") duckdb.commit() - res = duckdb.table("tbl") - duckdb.execute("drop table tbl") + res = duckdb.table("tbl_1") + duckdb.execute("drop table tbl_1") def test_begin_rollback(self): duckdb.begin() - duckdb.execute("create table tbl as select 1") + duckdb.execute("create table tbl_1rb as select 1") duckdb.rollback() with pytest.raises(duckdb.CatalogException): # Table does not exist - res = duckdb.table("tbl") + res = duckdb.table("tbl_1rb") def test_cursor(self): - duckdb.execute("create table tbl as select 3") + duckdb.execute("create table tbl_3 as select 3") duckdb_cursor = duckdb.cursor() - res = duckdb_cursor.table("tbl").fetchall() + res = duckdb_cursor.table("tbl_3").fetchall() assert res == [(3,)] - duckdb_cursor.execute("drop table tbl") + duckdb_cursor.execute("drop table tbl_3") with pytest.raises(duckdb.CatalogException): # 'tbl' no longer exists - duckdb.table("tbl") + duckdb.table("tbl_3") def test_cursor_lifetime(self): con = duckdb.connect() @@ -103,12 +103,12 @@ def test_df(self): assert res == ref def test_duplicate(self): - duckdb.execute("create table tbl as select 5") + duckdb.execute("create table tbl_5 as select 5") dup_conn = duckdb.duplicate() - dup_conn.table("tbl").fetchall() - duckdb.execute("drop table tbl") + dup_conn.table("tbl_5").fetchall() + duckdb.execute("drop table tbl_5") with pytest.raises(duckdb.CatalogException): - dup_conn.table("tbl").fetchall() + dup_conn.table("tbl_5").fetchall() def test_readonly_properties(self): duckdb.execute("select 42") @@ -123,11 +123,11 @@ def test_execute(self): def test_executemany(self): # executemany does not keep an open result set # TODO: shouldn't we also have a version that executes a query multiple times with different parameters, returning all of the results? - duckdb.execute("create table tbl (i integer, j varchar)") - duckdb.executemany("insert into tbl VALUES (?, ?)", [(5, 'test'), (2, 'duck'), (42, 'quack')]) - res = duckdb.table("tbl").fetchall() + duckdb.execute("create table tbl_many (i integer, j varchar)") + duckdb.executemany("insert into tbl_many VALUES (?, ?)", [(5, 'test'), (2, 'duck'), (42, 'quack')]) + res = duckdb.table("tbl_many").fetchall() assert res == [(5, 'test'), (2, 'duck'), (42, 'quack')] - duckdb.execute("drop table tbl") + duckdb.execute("drop table tbl_many") def test_pystatement(self): with pytest.raises(duckdb.ParserException, match='seledct'): @@ -163,8 +163,8 @@ def test_pystatement(self): duckdb.execute(statements[0]) assert duckdb.execute(statements[0], {'1': 42}).fetchall() == [(42,)] - duckdb.execute("create table tbl(a integer)") - statements = duckdb.extract_statements('insert into tbl select $1') + duckdb.execute("create table tbl_a(a integer)") + statements = duckdb.extract_statements('insert into tbl_a select $1') assert statements[0].expected_result_type == [ duckdb.ExpectedResultType.CHANGED_ROWS, duckdb.ExpectedResultType.QUERY_RESULT, @@ -174,23 +174,23 @@ def test_pystatement(self): ): duckdb.executemany(statements[0]) duckdb.executemany(statements[0], [(21,), (22,), (23,)]) - assert duckdb.table('tbl').fetchall() == [(21,), (22,), (23,)] - duckdb.execute("drop table tbl") + assert duckdb.table('tbl_a').fetchall() == [(21,), (22,), (23,)] + duckdb.execute("drop table tbl_a") def test_fetch_arrow_table(self): # Needed for 'fetch_arrow_table' pyarrow = pytest.importorskip("pyarrow") - duckdb.execute("Create Table test (a integer)") + duckdb.execute("Create Table test_arrow_tble (a integer)") for i in range(1024): for j in range(2): - duckdb.execute("Insert Into test values ('" + str(i) + "')") - duckdb.execute("Insert Into test values ('5000')") - duckdb.execute("Insert Into test values ('6000')") + duckdb.execute("Insert Into test_arrow_tble values ('" + str(i) + "')") + duckdb.execute("Insert Into test_arrow_tble values ('5000')") + duckdb.execute("Insert Into test_arrow_tble values ('6000')") sql = ''' SELECT a, COUNT(*) AS repetitions - FROM test + FROM test_arrow_tble GROUP BY a ''' @@ -200,7 +200,7 @@ def test_fetch_arrow_table(self): arrow_df = arrow_table.to_pandas() assert result_df['repetitions'].sum() == arrow_df['repetitions'].sum() - duckdb.execute("drop table test") + duckdb.execute("drop table test_arrow_tble") def test_fetch_df(self): ref = [([1, 2, 3],)] @@ -210,22 +210,22 @@ def test_fetch_df(self): assert res == ref def test_fetch_df_chunk(self): - duckdb.execute("CREATE table t as select range a from range(3000);") - query = duckdb.execute("SELECT a FROM t") + duckdb.execute("CREATE table t_df_chunk as select range a from range(3000);") + query = duckdb.execute("SELECT a FROM t_df_chunk") cur_chunk = query.fetch_df_chunk() assert cur_chunk['a'][0] == 0 assert len(cur_chunk) == 2048 cur_chunk = query.fetch_df_chunk() assert cur_chunk['a'][0] == 2048 assert len(cur_chunk) == 952 - duckdb.execute("DROP TABLE t") + duckdb.execute("DROP TABLE t_df_chunk") def test_fetch_record_batch(self): # Needed for 'fetch_arrow_table' pyarrow = pytest.importorskip("pyarrow") - duckdb.execute("CREATE table t as select range a from range(3000);") - duckdb.execute("SELECT a FROM t") + duckdb.execute("CREATE table t_record_batch as select range a from range(3000);") + duckdb.execute("SELECT a FROM t_record_batch") record_batch_reader = duckdb.fetch_record_batch(1024) chunk = record_batch_reader.read_all() assert len(chunk) == 3000 @@ -289,10 +289,10 @@ def test_register(self): def test_register_relation(self): con = duckdb.connect() rel = con.sql('select [5,4,3]') - con.register("relation", rel) + con.register("relation_rr", rel) - con.sql("create table tbl as select * from relation") - assert con.table('tbl').fetchall() == [([5, 4, 3],)] + con.sql("create table tbl_reg_rel as select * from relation_rr") + assert con.table('tbl_reg_rel').fetchall() == [([5, 4, 3],)] def test_unregister_problematic_behavior(self, duckdb_cursor): # We have a VIEW called 'vw' in the Catalog @@ -333,8 +333,8 @@ def temporary_scope(): def test_table(self): con = duckdb.connect() - con.execute("create table tbl as select 1") - assert [(1,)] == con.table("tbl").fetchall() + con.execute("create table tbl_test_table as select 1") + assert [(1,)] == con.table("tbl_test_table").fetchall() def test_table_function(self): assert None != duckdb.table_function diff --git a/tests/fast/api/test_query_interrupt.py b/tests/fast/api/test_query_interrupt.py index 6334e475..693945d1 100644 --- a/tests/fast/api/test_query_interrupt.py +++ b/tests/fast/api/test_query_interrupt.py @@ -1,35 +1,31 @@ import duckdb import time import pytest - import platform import threading import _thread as thread def send_keyboard_interrupt(): - # Wait a little, so we're sure the 'execute' has started time.sleep(0.1) - # Send an interrupt to the main thread thread.interrupt_main() class TestQueryInterruption(object): + @pytest.mark.xfail( condition=platform.system() == "Emscripten", reason="Emscripten builds cannot use threads", ) - def test_query_interruption(self): + @pytest.mark.timeout(5) + def test_keyboard_interruption(self): con = duckdb.connect() thread = threading.Thread(target=send_keyboard_interrupt) # Start the thread thread.start() try: - res = con.execute('select count(*) from range(100000000000)').fetchall() - except RuntimeError: - # If this is not reached, we could not cancel the query before it completed - # indicating that the query interruption functionality is broken - assert True - except KeyboardInterrupt: - pytest.fail() - thread.join() + with pytest.raises((KeyboardInterrupt, RuntimeError)): + res = con.execute('select * from range(100000) t1,range(100000) t2').fetchall() + finally: + # Ensure the thread completes regardless of what happens + thread.join() diff --git a/tests/fast/api/test_to_csv.py b/tests/fast/api/test_to_csv.py index e48ae1b8..768906ef 100644 --- a/tests/fast/api/test_to_csv.py +++ b/tests/fast/api/test_to_csv.py @@ -1,5 +1,4 @@ import duckdb -import tempfile import os import pandas._testing as tm import datetime @@ -10,8 +9,8 @@ class TestToCSV(object): @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_basic_to_csv(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_basic_to_csv(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") df = pandas.DataFrame({'a': [5, 3, 23, 2], 'b': [45, 234, 234, 2]}) rel = duckdb.from_df(df) @@ -21,8 +20,8 @@ def test_basic_to_csv(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_sep(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_sep(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") df = pandas.DataFrame({'a': [5, 3, 23, 2], 'b': [45, 234, 234, 2]}) rel = duckdb.from_df(df) @@ -32,8 +31,8 @@ def test_to_csv_sep(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_na_rep(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_na_rep(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") df = pandas.DataFrame({'a': [5, None, 23, 2], 'b': [45, 234, 234, 2]}) rel = duckdb.from_df(df) @@ -43,8 +42,8 @@ def test_to_csv_na_rep(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_header(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_header(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") df = pandas.DataFrame({'a': [5, None, 23, 2], 'b': [45, 234, 234, 2]}) rel = duckdb.from_df(df) @@ -54,8 +53,8 @@ def test_to_csv_header(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_quotechar(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_quotechar(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") df = pandas.DataFrame({'a': ["\'a,b,c\'", None, "hello", "bye"], 'b': [45, 234, 234, 2]}) rel = duckdb.from_df(df) @@ -65,8 +64,8 @@ def test_to_csv_quotechar(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_escapechar(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_escapechar(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") df = pandas.DataFrame( { "c_bool": [True, False], @@ -81,8 +80,8 @@ def test_to_csv_escapechar(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_date_format(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_date_format(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") df = pandas.DataFrame(getTimeSeriesData()) dt_index = df.index df = pandas.DataFrame({"A": dt_index, "B": dt_index.shift(1)}, index=dt_index) @@ -94,8 +93,8 @@ def test_to_csv_date_format(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_timestamp_format(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_timestamp_format(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") data = [datetime.time(hour=23, minute=1, second=34, microsecond=234345)] df = pandas.DataFrame({'0': pandas.Series(data=data, dtype='object')}) rel = duckdb.from_df(df) @@ -106,8 +105,8 @@ def test_to_csv_timestamp_format(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_quoting_off(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_quoting_off(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']}) rel = duckdb.from_df(df) rel.to_csv(temp_file_name, quoting=None) @@ -116,8 +115,8 @@ def test_to_csv_quoting_off(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_quoting_on(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_quoting_on(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']}) rel = duckdb.from_df(df) rel.to_csv(temp_file_name, quoting="force") @@ -126,8 +125,9 @@ def test_to_csv_quoting_on(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_quoting_quote_all(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_quoting_quote_all(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']}) rel = duckdb.from_df(df) rel.to_csv(temp_file_name, quoting=csv.QUOTE_ALL) @@ -136,8 +136,9 @@ def test_to_csv_quoting_quote_all(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_encoding_incorrect(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_encoding_incorrect(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']}) rel = duckdb.from_df(df) with pytest.raises( @@ -146,8 +147,9 @@ def test_to_csv_encoding_incorrect(self, pandas): rel.to_csv(temp_file_name, encoding="nope") @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_encoding_correct(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_encoding_correct(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']}) rel = duckdb.from_df(df) rel.to_csv(temp_file_name, encoding="UTF-8") @@ -155,8 +157,9 @@ def test_to_csv_encoding_correct(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_compression_gzip(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_compression_gzip(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']}) rel = duckdb.from_df(df) rel.to_csv(temp_file_name, compression="gzip") @@ -164,8 +167,9 @@ def test_compression_gzip(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_partition(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_partition(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame( { "c_category": ['a', 'a', 'b', 'b'], @@ -190,8 +194,9 @@ def test_to_csv_partition(self, pandas): assert csv_rel.execute().fetchall() == expected @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_partition_with_columns_written(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_partition_with_columns_written(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame( { "c_category": ['a', 'a', 'b', 'b'], @@ -210,8 +215,9 @@ def test_to_csv_partition_with_columns_written(self, pandas): assert res.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_overwrite(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_overwrite(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame( { "c_category_1": ['a', 'a', 'b', 'b'], @@ -238,8 +244,9 @@ def test_to_csv_overwrite(self, pandas): assert csv_rel.execute().fetchall() == expected @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_overwrite_with_columns_written(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_overwrite_with_columns_written(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame( { "c_category_1": ['a', 'a', 'b', 'b'], @@ -264,8 +271,9 @@ def test_to_csv_overwrite_with_columns_written(self, pandas): assert res.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_overwrite_not_enabled(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_overwrite_not_enabled(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame( { "c_category_1": ['a', 'a', 'b', 'b'], @@ -282,8 +290,9 @@ def test_to_csv_overwrite_not_enabled(self, pandas): rel.to_csv(temp_file_name, header=True, partition_by=["c_category_1"]) @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_per_thread_output(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_per_thread_output(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + num_threads = duckdb.sql("select current_setting('threads')").fetchone()[0] print('num_threads:', num_threads) df = pandas.DataFrame( @@ -301,8 +310,9 @@ def test_to_csv_per_thread_output(self, pandas): assert rel.execute().fetchall() == csv_rel.execute().fetchall() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_to_csv_use_tmp_file(self, pandas): - temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names())) + def test_to_csv_use_tmp_file(self, pandas, tmp_path): + temp_file_name = str(tmp_path / "test.csv") + df = pandas.DataFrame( { "c_category_1": ['a', 'a', 'b', 'b'], diff --git a/tests/fast/test_many_con_same_file.py b/tests/fast/test_many_con_same_file.py index 6b7362a6..fd825c76 100644 --- a/tests/fast/test_many_con_same_file.py +++ b/tests/fast/test_many_con_same_file.py @@ -10,29 +10,20 @@ def get_tables(con): return tbls -def test_multiple_writes(): - try: - os.remove("test.db") - except: - pass - con1 = duckdb.connect("test.db") - con2 = duckdb.connect("test.db") +def test_multiple_writes(tmp_path): + con1 = duckdb.connect(tmp_path / "test.db") + con2 = duckdb.connect(tmp_path / "test.db") con1.execute("CREATE TABLE foo1 as SELECT 1 as a, 2 as b") con2.execute("CREATE TABLE bar1 as SELECT 2 as a, 3 as b") con2.close() con1.close() - con3 = duckdb.connect("test.db") + con3 = duckdb.connect(tmp_path / "test.db") tbls = get_tables(con3) assert tbls == ['bar1', 'foo1'] del con1 del con2 del con3 - try: - os.remove("test.db") - except: - pass - def test_multiple_writes_memory(): con1 = duckdb.connect() @@ -64,23 +55,23 @@ def test_multiple_writes_named_memory(): del con3 -def test_diff_config(): - con1 = duckdb.connect("test.db", False) +def test_diff_config(tmp_path): + con1 = duckdb.connect(tmp_path / "test.db", False) with pytest.raises( duckdb.ConnectionException, match="Can't open a connection to same database file with a different configuration than existing connections", ): - con2 = duckdb.connect("test.db", True) + con2 = duckdb.connect(tmp_path / "test.db", True) con1.close() del con1 -def test_diff_config_extended(): - con1 = duckdb.connect("test.db", config={'null_order': 'NULLS FIRST'}) +def test_diff_config_extended(tmp_path): + con1 = duckdb.connect(tmp_path / "test.db", config={'null_order': 'NULLS FIRST'}) with pytest.raises( duckdb.ConnectionException, match="Can't open a connection to same database file with a different configuration than existing connections", ): - con2 = duckdb.connect("test.db") + con2 = duckdb.connect(tmp_path / "test.db") con1.close() del con1 diff --git a/tests/fast/test_relation.py b/tests/fast/test_relation.py index 8e68c149..2d9b3b4b 100644 --- a/tests/fast/test_relation.py +++ b/tests/fast/test_relation.py @@ -1,6 +1,5 @@ import duckdb import numpy as np -import platform import tempfile import os import pandas as pd @@ -527,13 +526,6 @@ def test_relation_print(self): 2048, 5000, 1000000, - pytest.param( - 10000000, - marks=pytest.mark.skipif( - condition=platform.system() == "Emscripten", - reason="Emscripten/Pyodide builds run out of memory at this scale, and error might not thrown reliably", - ), - ), ], ) def test_materialized_relation(self, duckdb_cursor, num_rows): diff --git a/tests/slow/test_h2oai_arrow.py b/tests/slow/test_h2oai_arrow.py index eddfc7d1..7ff37d01 100644 --- a/tests/slow/test_h2oai_arrow.py +++ b/tests/slow/test_h2oai_arrow.py @@ -267,4 +267,4 @@ def group_by_data(arrow_dataset_register): "x", ) yield con - con.close() + con.close() \ No newline at end of file diff --git a/tests/slow/test_relation_slow.py b/tests/slow/test_relation_slow.py new file mode 100644 index 00000000..cd892985 --- /dev/null +++ b/tests/slow/test_relation_slow.py @@ -0,0 +1,20 @@ +import platform +import pytest + + +class TestRelationSlow(object): + @pytest.mark.skipif( + condition=platform.system() == "Emscripten", + reason="Emscripten/Pyodide builds run out of memory at this scale, and error might not thrown reliably", + ) + def test_materialized_relation_large(self, duckdb_cursor): + """Test materialized relation with 10M rows - moved from fast tests due to 1+ minute runtime""" + # Import the implementation function from the fast test + import sys + import os + sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'fast')) + from test_relation import TestRelation + + # Create instance and call the test with large parameter + test_instance = TestRelation() + test_instance.test_materialized_relation(duckdb_cursor, 10000000) \ No newline at end of file