diff --git a/pyproject.toml b/pyproject.toml index 8976d360..adb1dffe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -350,7 +350,7 @@ line-length = 120 indent-width = 4 target-version = "py39" fix = true -exclude = ['external/duckdb', 'sqllogic'] +exclude = ['external/duckdb'] [tool.ruff.lint] fixable = ["ALL"] diff --git a/sqllogic/__init__.py b/sqllogic/__init__.py deleted file mode 100644 index 8b137891..00000000 --- a/sqllogic/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/sqllogic/conftest.py b/sqllogic/conftest.py deleted file mode 100644 index 48315109..00000000 --- a/sqllogic/conftest.py +++ /dev/null @@ -1,286 +0,0 @@ -import itertools -import pathlib -import random -import re -import typing -import warnings - -import pytest - -from .skipped_tests import SKIPPED_TESTS - -SQLLOGIC_TEST_CASE_NAME = "test_sqllogic" -SQLLOGIC_TEST_PARAMETER = "test_script_path" -DUCKDB_ROOT_DIR = (pathlib.Path(__file__).parent.parent / "external" / "duckdb").resolve() - - -def pytest_addoption(parser: pytest.Parser): - parser.addoption( - "--test-dir", - action="extend", - nargs="*", - type=pathlib.Path, - default=[], # We handle default in pytest_generate_tests - dest="test_dirs", - help="Path to one or more directories containing SQLLogic test scripts", - ) - parser.addoption( - "--path", - type=str, - default=None, - dest="path", - help="Path (or glob) of the tests to run", - ) - parser.addoption( - "--build-dir", - type=str, - dest="build_dir", - help="Path to the build directory, used for loading extensions", - ) - parser.addoption("--start-offset", type=int, dest="start_offset", help="Index of the first test to run") - parser.addoption("--end-offset", type=int, dest="end_offset", help="Index of the last test to run") - parser.addoption( - "--start-offset-percentage", - type=int, - dest="start_offset_percentage", - help="Runs the tests starting at N % of the total test suite", - ) - parser.addoption( - "--end-offset-percentage", - type=int, - dest="end_offset_percentage", - help="Runs the tests ending at N % of the total test suite, excluding the Nth % test", - ) - parser.addoption( - "--order", - choices=["decl", "lex", "rand"], - default="decl", - dest="order", - help="Specifies the execution order of tests", - ) - parser.addoption("--rng-seed", type=int, dest="rng_seed", help="Random integer seed") - - -@pytest.hookimpl(hookwrapper=True) -def pytest_keyboard_interrupt(excinfo: pytest.ExceptionInfo): - # Ensure all tests are properly cleaned up on keyboard interrupt - from .test_sqllogic import test_sqllogic - - if hasattr(test_sqllogic, "executor") and test_sqllogic.executor: - if test_sqllogic.executor.database and hasattr(test_sqllogic.executor.database, "connection"): - test_sqllogic.executor.database.connection.interrupt() - test_sqllogic.executor.cleanup() - test_sqllogic.executor = None - yield - - -def pytest_configure(config: pytest.Config): - rng_seed = config.getoption("rng_seed") - if rng_seed is not None: - random.seed(rng_seed) - - # Custom marker used to run all tests - config.addinivalue_line("markers", "all") - # These markers are used for .test_slow and .test_coverage files - config.addinivalue_line("markers", "slow") - config.addinivalue_line("markers", "coverage") - - -def get_test_id(path: pathlib.Path, root_dir: pathlib.Path, config: pytest.Config) -> str: - # Test IDs are the path of the script starting from the test/ directory. - return str(path.relative_to(root_dir.parent)) - - -def get_test_marks(path: pathlib.Path, root_dir: pathlib.Path, config: pytest.Config) -> list[typing.Any]: - # Tests are tagged with the their category (i.e., name of their parent directory) - category = path.parent.name - - for mark in config.getini("markers"): - # Look for MarkDecorator object with the same name as the category - if mark == category or (hasattr(mark, "markname") and mark.markname.startswith(category)): - break - else: - # If the category is not in the markers, add it - config.addinivalue_line("markers", category) - - marks = [pytest.mark.all, pytest.mark.__getattr__(category)] - - test_id = get_test_id(path, root_dir, config) - if test_id in SKIPPED_TESTS: - marks.append(pytest.mark.skip(reason="Test is on SKIPPED_TESTS list")) - - if test_id.endswith(".test_slow"): - marks.append(pytest.mark.slow) - if test_id.endswith(".test_coverage"): - marks.append(pytest.mark.coverage) - - return marks - - -def create_parameters_from_paths(paths, root_dir: pathlib.Path, config: pytest.Config) -> typing.Iterator[typing.Any]: - return map( - lambda path: pytest.param( - path.absolute(), id=get_test_id(path, root_dir, config), marks=get_test_marks(path, root_dir, config) - ), - paths, - ) - - -def scan_for_test_scripts(root_dir: pathlib.Path, config: pytest.Config) -> typing.Iterator[typing.Any]: - """Scans for .test files in the given directory and its subdirectories. - Returns an iterator of pytest parameters (argument, id and marks). - """ # noqa: D205 - # TODO: Add tests from extensions - test_script_extensions = [".test", ".test_slow", ".test_coverage"] - it = itertools.chain.from_iterable(root_dir.rglob(f"*{ext}") for ext in test_script_extensions) - return create_parameters_from_paths(it, root_dir, config) - - -def pytest_generate_tests(metafunc: pytest.Metafunc): - # test_sqllogic (a.k.a SQLLOGIC_TEST_CASE_NAME) is defined in test_sqllogic.py - if metafunc.definition.name != SQLLOGIC_TEST_CASE_NAME: - return - - test_dirs: list[pathlib.Path] = metafunc.config.getoption("test_dirs") - test_glob: typing.Optional[pathlib.Path] = metafunc.config.getoption("path") - - parameters = [] - - if test_glob: - test_paths = DUCKDB_ROOT_DIR.rglob(test_glob) - parameters.extend(create_parameters_from_paths(test_paths, DUCKDB_ROOT_DIR, metafunc.config)) - - for test_dir in test_dirs: - # Create absolute & normalized path - test_dir = test_dir.resolve() - assert test_dir.is_dir(), f"{test_dir} is not a directory" - parameters.extend(scan_for_test_scripts(test_dir, metafunc.config)) - - if parameters == []: - if len(test_dirs) == 0: - # Use DuckDB's test directory as the default when no paths are provided - parameters.extend(scan_for_test_scripts(DUCKDB_ROOT_DIR / "test", metafunc.config)) - - metafunc.parametrize(SQLLOGIC_TEST_PARAMETER, parameters) - - -def determine_test_offsets(config: pytest.Config, num_tests: int) -> tuple[int, int]: - """If start_offset and end_offset are specified, then these are used. - start_offset defaults to 0. end_offset defaults to and is capped to the last test index. - start_offset_percentage and end_offset_percentage are used to calculate the start and end offsets based on the total number of tests. - This is done in a way that a test run to 25% and another test run starting at 25% do not overlap by excluding the 25th percent test. - """ # noqa: D205 - start_offset = config.getoption("start_offset") - end_offset = config.getoption("end_offset") - start_offset_percentage = config.getoption("start_offset_percentage") - end_offset_percentage = config.getoption("end_offset_percentage") - - index_specified = start_offset is not None or end_offset is not None - percentage_specified = start_offset_percentage is not None or end_offset_percentage is not None - - if index_specified and percentage_specified: - msg = "You can only specify either start/end offsets or start/end offset percentages, not both" - raise ValueError(msg) - - if start_offset is not None and start_offset < 0: - msg = "--start-offset must be a non-negative integer" - raise ValueError(msg) - - if start_offset_percentage is not None and (start_offset_percentage < 0 or start_offset_percentage > 100): - msg = "--start-offset-percentage must be between 0 and 100" - raise ValueError(msg) - - if end_offset_percentage is not None and (end_offset_percentage < 0 or end_offset_percentage > 100): - msg = "--end-offset-percentage must be between 0 and 100" - raise ValueError(msg) - - if start_offset is None: - if start_offset_percentage is not None: - start_offset = start_offset_percentage * num_tests // 100 - else: - start_offset = 0 - - if end_offset is not None and end_offset < start_offset: - msg = f"--end-offset ({end_offset}) must be greater than or equal to the start offset ({start_offset})" - raise ValueError(msg) - - if end_offset is None: - if end_offset_percentage is not None: - end_offset = end_offset_percentage * num_tests // 100 - 1 - else: - end_offset = num_tests - 1 - - max_end_offset = num_tests - 1 - if end_offset > max_end_offset: - end_offset = max_end_offset - - return start_offset, end_offset - - -# Execute last, after pytest has already deselected tests based on -k and -m parameters -@pytest.hookimpl(trylast=True) -def pytest_collection_modifyitems(session: pytest.Session, config: pytest.Config, items: list[pytest.Item]): - if len(items) == 0: - warnings.warn("No tests were found. Check that you passed the correct directory via --tests-dir.") - return - - # Check if specific test cases to run were passed as arguments, if an expression to match test casees was specified with -k, - # or if markers were passed with -m. - # If none of these are true, we run all .test files, but not .test_slow or .test_coverage, and no tests that are on the SKIPPED_TESTS list. - specific_test_args_pattern = re.compile(r"test_sqllogic\[.*\]") - is_default_run = ( - not config.option.markexpr.strip() - and not config.option.keyword.strip() - and not any(specific_test_args_pattern.search(arg) for arg in config.args) - ) - if is_default_run: - selected_items = [] - deselected_items = [] - for test_case in items: - # Extract the name of the SQLLogic script which is between the brackets in the test case name. - # The test case name looks something like this: test_sqllogic[test/extension/autoloading_reset_setting.test] - sqllogic_test_name = test_case.name[test_case.name.find("[") + 1 : test_case.name.find("]")] - if sqllogic_test_name.endswith(".test"): - selected_items.append(test_case) - else: - deselected_items.append(test_case) - - config.hook.pytest_deselected(items=deselected_items) - items[:] = selected_items - - start_offset, end_offset = determine_test_offsets(config, len(items)) - - # Order tests based on --order option. Take as is if order is "decl". - if config.getoption("order") == "rand": - random.shuffle(items) - elif config.getoption("order") == "lex": - items.sort(key=lambda item: item.name) - - for index, item in enumerate(items): - # Store some information that are later used in pytest_runtest_logreport. - # We store the test index after sorting but before deselecting to match start and end offset. - item.user_properties.append(("test_index", index)) - item.user_properties.append(("total_num_tests", len(items))) - item.user_properties.append( - ("should_print_progress", config.get_verbosity() > 0 and config.getoption("capture") == "no") - ) - - deselected_items = items[:start_offset] + items[end_offset + 1 :] - config.hook.pytest_deselected(items=deselected_items) - items[:] = items[start_offset : end_offset + 1] - - -def pytest_runtest_setup(item: pytest.Item): - """Show the test index after the test name.""" - - def get_from_tuple_list(tuples, key): - for t in tuples: - if t[0] == key: - return t[1] - return None - - if get_from_tuple_list(item.user_properties, "should_print_progress"): - idx = get_from_tuple_list(item.user_properties, "test_index") - # index is 0-based, but total_num_tests 1-based - max_idx = get_from_tuple_list(item.user_properties, "total_num_tests") - 1 - print(f"[{idx}/{max_idx}]", end=" ", flush=True) diff --git a/sqllogic/pytest.ini b/sqllogic/pytest.ini deleted file mode 100644 index 7648515d..00000000 --- a/sqllogic/pytest.ini +++ /dev/null @@ -1,2 +0,0 @@ -[pytest] -addopts = "--import-mode=importlib" \ No newline at end of file diff --git a/sqllogic/skipped_tests.py b/sqllogic/skipped_tests.py deleted file mode 100644 index 485ed9b9..00000000 --- a/sqllogic/skipped_tests.py +++ /dev/null @@ -1,42 +0,0 @@ -SKIPPED_TESTS = set( - [ - "test/sql/timezone/disable_timestamptz_casts.test", # <-- ICU extension is always loaded - "test/sql/copy/return_stats_truncate.test", # <-- handling was changed - "test/sql/copy/return_stats.test", # <-- handling was changed - "test/sql/copy/parquet/writer/skip_empty_write.test", # <-- handling was changed - "test/sql/types/map/map_empty.test", - "test/extension/wrong_function_type.test", # <-- JSON is always loaded - "test/sql/insert/test_insert_invalid.test", # <-- doesn't parse properly - "test/sql/cast/cast_error_location.test", # <-- python exception doesn't contain error location yet - "test/sql/pragma/test_query_log.test", # <-- query_log gets filled with NULL when con.query(...) is used - "test/sql/json/table/read_json_objects.test", # <-- Python client is always loaded with JSON available - "test/sql/copy/csv/zstd_crash.test", # <-- Python client is always loaded with Parquet available - "test/sql/error/extension_function_error.test", # <-- Python client is always loaded with TPCH available - "test/optimizer/joins/tpcds_nofail.test", # <-- Python client is always loaded with TPCDS available - "test/sql/settings/errors_as_json.test", # <-- errors_as_json not currently supported in Python - "test/sql/parallelism/intraquery/depth_first_evaluation_union_and_join.test", # <-- Python client is always loaded with TPCDS available - "test/sql/types/timestamp/test_timestamp_tz.test", # <-- Python client is always loaded wih ICU available - making the TIMESTAMPTZ::DATE cast pass - "test/sql/parser/invisible_spaces.test", # <-- Parser is getting tripped up on the invisible spaces - "test/sql/copy/csv/code_cov/csv_state_machine_invalid_utf.test", # <-- ConversionException is empty, see Python Mega Issue (duckdb-internal #1488) - "test/sql/copy/csv/test_csv_timestamp_tz.test", # <-- ICU is always loaded - "test/fuzzer/duckfuzz/duck_fuzz_column_binding_tests.test", # <-- ICU is always loaded - "test/sql/pragma/test_custom_optimizer_profiling.test", # Because of logic related to enabling 'restart' statement capabilities, this will not measure the right statement - "test/sql/pragma/test_custom_profiling_settings.test", # Because of logic related to enabling 'restart' statement capabilities, this will not measure the right statement - "test/sql/copy/csv/test_copy.test", # JSON is always loaded - "test/sql/copy/csv/test_timestamptz_12926.test", # ICU is always loaded - "test/fuzzer/pedro/in_clause_optimization_error.test", # error message differs due to a different execution path - "test/sql/order/test_limit_parameter.test", # error message differs due to a different execution path - "test/sql/catalog/test_set_search_path.test", # current_query() is not the same - "test/sql/catalog/table/create_table_parameters.test", # prepared statement error quirks - "test/sql/pragma/profiling/test_custom_profiling_rows_scanned.test", # we perform additional queries that mess with the expected metrics - "test/sql/pragma/profiling/test_custom_profiling_disable_metrics.test", # we perform additional queries that mess with the expected metrics - "test/sql/pragma/profiling/test_custom_profiling_result_set_size.test", # we perform additional queries that mess with the expected metrics - "test/sql/pragma/profiling/test_custom_profiling_result_set_size.test", # we perform additional queries that mess with the expected metrics - "test/sql/cte/materialized/materialized_cte_modifiers.test", # problems connected to auto installing tpcds from remote - "test/sql/tpcds/dsdgen_readonly.test", # problems connected to auto installing tpcds from remote - "test/sql/tpcds/tpcds_sf0.test", # problems connected to auto installing tpcds from remote - "test/sql/optimizer/plan/test_filter_pushdown_materialized_cte.test", # problems connected to auto installing tpcds from remote - "test/sql/explain/test_explain_analyze.test", # unknown problem with changes in API - "test/sql/pragma/profiling/test_profiling_all.test", # Because of logic related to enabling 'restart' statement capabilities, this will not measure the right statement - ] -) diff --git a/sqllogic/test_sqllogic.py b/sqllogic/test_sqllogic.py deleted file mode 100644 index 35736015..00000000 --- a/sqllogic/test_sqllogic.py +++ /dev/null @@ -1,166 +0,0 @@ -import gc -import os -import pathlib -import signal -import sys -from collections.abc import Generator -from typing import Any, Optional - -import pytest - -sys.path.append(str(pathlib.Path(__file__).parent.parent / "external" / "duckdb" / "scripts")) -from sqllogictest import ( - SQLLogicParser, - SQLLogicTest, - SQLParserException, -) -from sqllogictest.result import ( - ExecuteResult, - SQLLogicContext, - SQLLogicDatabase, - SQLLogicRunner, - TestException, -) - - -def sigquit_handler(signum, frame): - # Access the executor from the test_sqllogic function - if hasattr(test_sqllogic, "executor") and test_sqllogic.executor: - if test_sqllogic.executor.database and hasattr(test_sqllogic.executor.database, "connection"): - test_sqllogic.executor.database.connection.interrupt() - test_sqllogic.executor.cleanup() - test_sqllogic.executor = None - # Re-raise the signal to let the default handler take over - signal.signal(signal.SIGQUIT, signal.default_int_handler) - os.kill(os.getpid(), signal.SIGQUIT) - - -# Register the SIGQUIT handler -signal.signal(signal.SIGQUIT, sigquit_handler) - - -class SQLLogicTestExecutor(SQLLogicRunner): - def __init__(self, test_directory: str, build_directory: Optional[str] = None) -> None: - super().__init__(build_directory) - self.test_directory = test_directory - # TODO: get this from the `duckdb` package - self.AUTOLOADABLE_EXTENSIONS = [ - "aws", - "autocomplete", - "excel", - "fts", - "httpfs", - "json", - "parquet", - "postgres_scanner", - "sqlsmith", - "sqlite_scanner", - "tpcds", - "tpch", - # "azure", - # "inet", - # "icu", - # "spatial", - # TODO: table function isnt always autoloaded so test fails - ] - - def get_test_directory(self) -> str: - return self.test_directory - - def delete_database(self, path): - def test_delete_file(path): - try: - if os.path.exists(path): - os.remove(path) - except FileNotFoundError: - pass - - # FIXME: support custom test directory - test_delete_file(path) - test_delete_file(path + ".wal") - - def execute_test(self, test: SQLLogicTest) -> ExecuteResult: - try: - self.reset() - self.test = test - self.original_sqlite_test = self.test.is_sqlite_test() - - # Top level keywords - keywords = {"__TEST_DIR__": self.get_test_directory(), "__WORKING_DIRECTORY__": os.getcwd()} - - def update_value(_: SQLLogicContext) -> Generator[Any, Any, Any]: - # Yield once to represent one iteration, do not touch the keywords - yield None - - self.database = SQLLogicDatabase(":memory:", None) - pool = self.database.connect() - context = SQLLogicContext(pool, self, test.statements, keywords, update_value) - pool.initialize_connection(context, pool.get_connection()) - # The outer context is not a loop! - context.is_loop = False - - try: - context.verify_statements() - res = context.execute() - except TestException as e: - res = e.handle_result() - if res.type == ExecuteResult.Type.SKIPPED: - pytest.skip(str(e.message)) - else: - pytest.fail(str(e.message), pytrace=False) - - self.database.reset() - - # Clean up any databases that we created - for loaded_path in self.loaded_databases: - if not loaded_path: - continue - # Only delete database files that were created during the tests - if not loaded_path.startswith(self.get_test_directory()): - continue - os.remove(loaded_path) - return res - except KeyboardInterrupt: - if self.database: - self.database.interrupt() - raise - - def cleanup(self): - if self.database: - if hasattr(self.database, "connection"): - self.database.connection.interrupt() - self.database.reset() - self.database = None - # Clean up any remaining test databases - for loaded_path in self.loaded_databases: - if loaded_path and loaded_path.startswith(self.get_test_directory()): - try: - os.remove(loaded_path) - except FileNotFoundError: - pass - - -def test_sqllogic(test_script_path: pathlib.Path, pytestconfig: pytest.Config, tmp_path: pathlib.Path): - gc.collect() - sql_parser = SQLLogicParser() - try: - test = sql_parser.parse(str(test_script_path)) - except SQLParserException as e: - pytest.skip("Failed to parse SQLLogic script: " + str(e.message)) - return - - build_dir = pytestconfig.getoption("build_dir") - executor = SQLLogicTestExecutor(str(tmp_path), build_dir) - # Store executor in the function's arguments so it can be accessed by the interrupt handler - test_sqllogic.executor = executor - try: - result = executor.execute_test(test) - assert result.type == ExecuteResult.Type.SUCCESS - finally: - executor.cleanup() - test_sqllogic.executor = None - - -if __name__ == "__main__": - # Pass all arguments including the script name to pytest - sys.exit(pytest.main(sys.argv))