man-group
diff --git a/‎build_tooling/list_pytests.sh‎
Lines changed: 27 additions & 0 deletions b/‎build_tooling/list_pytests.sh‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 33 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 33 additions & 2 deletions
diff --git a/‎python/arcticdb/util/logger.py‎
Lines changed: 0 additions & 5 deletions b/‎python/arcticdb/util/logger.py‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎python/tests/conftest.py‎
Lines changed: 280 additions & 1 deletion b/‎python/tests/conftest.py‎
Lines changed: 280 additions & 1 deletion
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+# Script: list_unique_tests.sh
+# Description: Lists unique pytest test names (without parameterized fixture values)
+#              for the given pytest -m marker expression(s).
+
+if [ $# -eq 0 ]; then
+    echo "Usage: $0 <pytest_mark_expression>"
+    echo "Example: $0 \"pipeline and real_s3\""
+else
+    # Join all arguments into a single marker expression
+    MARK_EXPR="$*"
+
+    # Collect and deduplicate test names
+    tests=$(pytest --co -q -m "$MARK_EXPR" \
+        | sed 's/\[.*\]//' \
+        | sort -u)
+
+    # Print tests
+    echo "$tests"
+
+    # Count them
+    count=$(echo "$tests" | grep -c '^')
+    echo "Total unique tests: $count"
+fi
+
+
@@ -53,9 +53,40 @@ exclude = '''
 [tool.pytest.ini_options]
 markers = [
     "storage: marks a test as a test against real storage (deselect with: -m 'not storage')",
+    "dedup: marks deduplication tests",
     "authentication: marks a test for authentication group (deselect with: -m 'not authentication')",
     "pipeline: Pipeline tests (deselect with: -m 'not pipeline')",
     "skip_fixture_params: will instruct fixture that supports excluding fixture values, which values to be excluded",
     "only_fixture_params: will instruct fixture supporting that to include only parameters from the list",
-    "bug_ids: allows specifying bug ids list the tests is based on or depends"
-]
+    "bug_ids: allows specifying bug ids list the tests is based on or depends",
+    "priority0: Most important tests group",
+    "compat: Mark from physical folder",
+    "integration: Mark from physical folder",
+    "unit: Mark from physical folder",
+    "stress: Mark from physical folder",
+    "nonreg: Mark from physical folder",
+    "hypothesis: Mark from physical folder",
+    "arcticdb: Mark from physical folder",
+    "version_store: Mark from physical folder",
+    "toolbox: Mark from physical folder",
+    "lmdb: Mark from test usage for execution against LMDB storage",
+    "mem: Mark from test usage for execution against In-memory storage",
+    "s3: Mark from test usage for execution against Simulated S3 storage",
+    "gcp: Mark from test usage for execution against Simulated GCP storage",
+    "azurite: Mark from test usage for execution against Simulated Azurite storage",
+    "nfs: Mark from test usage for execution against Simulated NFS S3 storage",
+    "mongo: Mark from test usage for execution against Mongo storage",
+    "real_s3: Mark from test usage for execution against AWS S3 storage",
+    "real_azure: Mark from test usage for execution against Azure storage",
+    "real_gcp: Mark from test usage for execution against GCP storage",
+    "dynamic_schema: marks test using dynamic_schema=True",
+    "empty_types: marks test using empty_types=True",
+    "delayed_deletes: marks test using delayed_deletes=True",
+    "sync_passive: marks test using sync_passive=True",
+    "use_tombstones: marks test using use_tombstones=True",
+    "segment_size: marks test using any of library segment size settings",
+    "dynamic_strings: marks tests using dynamic_strings=True",
+    "bucketize_dynamic: marks tests using bucketize_dynamic=True",
+    "prune_previous: marks tests using prune_previous_version=True",
+    "encoding_v2: marks tests that use V2 encoding"
+]
@@ -81,8 +81,3 @@ def __init__(self, message: str):
         # Sanitize the message
         sanitized_message = GitHubSanitizingHandler.sanitize_message(message)
         super().__init__(sanitized_message)
-
-
-sanitized_message = " fgy 54654 ARCTICDB_REAL_S3_SECRET_KEY=AwsB1YWasZBtonDiBcsqtz36M3m4yPl9EsiTS57w"
-sanitized_message = re.sub(r"(.*SECRET_KEY=).*$", r"\1***", sanitized_message, flags=re.IGNORECASE)
-print(sanitized_message)
@@ -7,7 +7,7 @@
 """
 
 import enum
-from typing import Callable, Generator, Union
+from typing import Callable, Generator, Iterable, Union
 from arcticdb.util.logger import get_logger
 from arcticdb.version_store._store import NativeVersionStore
 from arcticdb.version_store.library import Library
@@ -54,7 +54,9 @@
 from arcticdb.version_store._normalization import MsgPackNormalizer
 from arcticdb.util.test import create_df
 from arcticdb.arctic import Arctic
+from tests.util.marking import Mark
 from .util.mark import (
+    EXTENDED_MARKS,
     LMDB_TESTS_MARK,
     LOCAL_STORAGE_TESTS_ENABLED,
     MACOS_WHEEL_BUILD,
@@ -1541,3 +1543,280 @@ def clear_query_stats():
     yield
     query_stats.disable()
     query_stats.reset_stats()
+
+
+# region Pytest special xfail handling
+
+
+def pytest_runtest_makereport(item, call):
+    from tests.pytest_xfail import pytest_runtest_makereport
+
+    return pytest_runtest_makereport(item, call)
+
+
+def pytest_terminal_summary(terminalreporter, exitstatus):
+    from tests.pytest_xfail import pytest_terminal_summary
+
+    pytest_terminal_summary(terminalreporter, exitstatus)
+
+
+# endregion
+
+# region =================================== Pytest plugins&hooks ====================================
+
+
+class Marks:
+    """Central Marks Registry
+    Usage:
+        @mark([Marks.abc, Marks.cde])
+        def test_first():
+            ....
+        @Marks.abc.mark
+        def test_two():
+            ....
+    """
+
+    storage = Mark("storage")
+    dedup = Mark("dedup")
+    authentication = Mark("authentication")
+    pipeline = Mark("pipeline")
+    compat = Mark("compat")
+    dynamic_schema = Mark("dynamic_schema")
+    encoding_v2 = Mark("encoding_v2")
+    empty_types = Mark("empty_types")
+    delayed_deletes = Mark("delayed_deletes")
+    use_tombstones = Mark("use_tombstones")
+    sync_passive = Mark("sync_passive")
+    segment_size = Mark("segment_size")
+    dynamic_strings = Mark("dynamic_strings")
+    prune_previous = Mark("prune_previous")
+    bucketize_dynamic = Mark("bucketize_dynamic")
+    lmdb = Mark("lmdb")
+    mem = Mark("mem")
+    nfs = Mark("nfs")
+    mongo = Mark("mongo")
+    azurite = Mark("azurite")
+    s3 = Mark("s3")
+    gcp = Mark("gcp")
+    real_s3 = Mark("real_s3")
+    real_gcp = Mark("real_gcp")
+    real_azure = Mark("real_azure")
+    integration = Mark("integration")
+    unit = Mark("unit")
+    stress = Mark("stress")
+    nonreg = Mark("nonreg")
+    hypothesis = Mark("hypothesis")
+    arcticdb = Mark("arcticdb")
+    version_store = Mark("version_store")
+    toolbox = Mark("toolbox")
+    priority0 = Mark("priority0")
+
+    @classmethod
+    def list_all_marks(cls):
+        """Lists all marks in the registry"""
+        return [v for k, v in cls.__dict__.items() if isinstance(v, Mark)]
+
+
+def apply_hybrid_marks(item, source_values: Iterable[str], rules: dict):
+    """
+    Apply marks to pytest item if any of the source_values matches a rule.
+
+    :param item: pytest.Item
+    :param source_values: values to search in (e.g., [item.name], item.fixturenames, [item.fspath])
+    :param rules: dict of mark_name -> list[str | regex]
+    """
+    for mark_name, patterns in rules.items():
+
+        # Deduplication guard
+        if item.get_closest_marker(mark_name):
+            continue
+
+        marked = False
+        for pattern in patterns:
+            if marked:
+                break
+            for value in source_values:
+                value_lower = value.lower()
+                if isinstance(pattern, str):
+                    if pattern.lower() in value_lower:
+                        item.add_marker(mark_name)
+                        marked = True
+                        break
+                elif pattern.search(value):
+                    item.add_marker(mark_name)
+                    marked = True
+                    break
+
+
+# Define how fixtures map to marks
+ALL_FIXTURES = [
+    re.compile(r"^arctic_client(?!.*lmdb).*", re.I),
+    re.compile(r"^arctic_library(?!.*lmdb).*", re.I),
+    re.compile(r"^object_and_mem_and_lmdb.*", re.I),
+]
+ALL_FIXTURES_AND_LMDB = [
+    re.compile(r"^arctic_client.*", re.I),
+    re.compile(r"^arctic_library.*", re.I),
+    re.compile(r"^object_and_mem_and_lmdb.*", re.I),
+]
+BASIC_ARCTIC_FIXTURES = [re.compile(r"^basic_arctic", re.I)]
+BASIC_STORE_FIXTURES = [re.compile(r"^(basic_store.*|basic_version_.*) ", re.I)]
+OBJECT_STORE_FIXTURES = [re.compile(r"^(object_store.*|object_version_.*)", re.I)]
+LOCAL_OBJECT_STORE_FIXTURES = [re.compile(r"^(local_object_store.*|local_object_version.*)", re.I)]
+VERSION_STORE_AND_REAL_FIXTURES = [re.compile(r"^version_store_and_real*", re.I)]
+
+FIXTURES_TO_MARK = {
+    Marks.lmdb.name: [re.compile(r"^lmdb_.*", re.I)]
+    + ALL_FIXTURES_AND_LMDB
+    + VERSION_STORE_AND_REAL_FIXTURES
+    + BASIC_STORE_FIXTURES,
+    Marks.mem.name: [re.compile(r"^(mem_.*|in_memory_.*)", re.I)] + ALL_FIXTURES + BASIC_STORE_FIXTURES,
+    Marks.s3.name: [re.compile(r"^(s3_.*|mock_s3.*)", re.I)]
+    + ALL_FIXTURES
+    + BASIC_STORE_FIXTURES
+    + LOCAL_OBJECT_STORE_FIXTURES
+    + OBJECT_STORE_FIXTURES,
+    Marks.nfs.name: [re.compile(r"^nfs_.*", re.I)] + ALL_FIXTURES + OBJECT_STORE_FIXTURES,
+    Marks.gcp.name: [re.compile(r"^gcp_.*", re.I)] + ALL_FIXTURES,
+    Marks.mongo.name: [re.compile(r"^mongo_.*", re.I)] + ALL_FIXTURES,
+    Marks.azurite.name: [re.compile(r"^(azurite_.*|azure_.*)", re.I)]
+    + ALL_FIXTURES
+    + LOCAL_OBJECT_STORE_FIXTURES
+    + OBJECT_STORE_FIXTURES
+    + OBJECT_STORE_FIXTURES,
+    Marks.real_s3.name: [re.compile(r"^real_s3_.*", re.I)]
+    + ALL_FIXTURES
+    + BASIC_STORE_FIXTURES
+    + BASIC_ARCTIC_FIXTURES
+    + VERSION_STORE_AND_REAL_FIXTURES
+    + OBJECT_STORE_FIXTURES,
+    Marks.real_azure.name: [re.compile(r"^real_azure_.*", re.I)]
+    + ALL_FIXTURES
+    + BASIC_STORE_FIXTURES
+    + BASIC_ARCTIC_FIXTURES
+    + VERSION_STORE_AND_REAL_FIXTURES
+    + OBJECT_STORE_FIXTURES,
+    Marks.real_gcp.name: [re.compile(r"^real_gcp_.*", re.I)]
+    + ALL_FIXTURES
+    + BASIC_STORE_FIXTURES
+    + BASIC_ARCTIC_FIXTURES
+    + VERSION_STORE_AND_REAL_FIXTURES
+    + OBJECT_STORE_FIXTURES,
+    Marks.dynamic_schema.name: [re.compile(r".*(dynamic_schema|dynamic(?!string)).*", re.I)],
+    Marks.empty_types.name: [
+        "empty_types",
+        "lmdb_version_store_delayed_deletes_v1",
+        "lmdb_version_store_delayed_deletes_v2",
+    ],
+    Marks.delayed_deletes.name: ["delayed_deletes"],
+    Marks.use_tombstones.name: ["tombstone", "basic_store_prune_previous", "basic_store_prune_previous"],
+    Marks.sync_passive.name: ["sync_passive"],
+    Marks.bucketize_dynamic.name: ["buckets"],
+    Marks.prune_previous.name: [
+        "prune_previous",
+        "lmdb_version_store_delayed_deletes_v1",
+        "lmdb_version_store_tombstone_and_pruning",
+        "basic_store_delayed_deletes_v1",
+        "basic_store_delayed_deletes_v2",
+    ],
+    Marks.segment_size.name: ["segment", "lmdb_version_store_no_symbol_list"],
+    Marks.dynamic_strings.name: [
+        "dynamic_strings",
+        "real_s3_version_store_dynamic_schema",
+        "real_gcp_version_store_dynamic_schema",
+        "real_azure_version_store_dynamic_schema",
+        "nfs_backed_s3_version_store_v1",
+        "nfs_backed_s3_version_store_v2",
+        "s3_version_store_v1",
+        "s3_version_store_v2",
+        "s3_version_store_dynamic_schema_v1",
+        "s3_version_store_dynamic_schema_v2",
+        "nfs_backed_s3_version_store_dynamic_schema_v2",
+        "nfs_backed_s3_version_store_dynamic_schema_v2",
+        "azure_version_store_dynamic_schema",
+        "lmdb_version_store_v1",
+        "lmdb_version_store_v2",
+        "lmdb_version_store_prune_previous",
+        "lmdb_version_store_dynamic_schema_v1",
+        "lmdb_version_store_dynamic_schema_v2",
+        "lmdb_version_store_dynamic_schema",
+        "lmdb_version_store_empty_types_v1",
+        "lmdb_version_store_empty_types_v2",
+        "lmdb_version_store_empty_types_dynamic_schema_v1",
+        "lmdb_version_store_empty_types_dynamic_schema_v2",
+        "lmdb_version_store_delayed_deletes_v1",
+        "lmdb_version_store_delayed_deletes_v2",
+        "lmdb_version_store_tombstones_no_symbol_list",
+        "lmdb_version_store_allows_pickling",
+        "lmdb_version_store_tiny_segment_dynamic_strings",
+        "basic_store_prune_previous",
+        "basic_store_dynamic_schema_v1",
+        "basic_store_dynamic_schema_v2",
+        "basic_store_dynamic_schema",
+        "basic_store_delayed_deletes_v1",
+        "basic_store_delayed_deletes_v2",
+        "basic_store_tombstones_no_symbol_list",
+        "basic_store_allows_pickling",
+    ],
+    Marks.encoding_v2.name: [
+        re.compile(
+            r".*("
+            r"arctic_client|"
+            r"nfs_backed_s3_version_store_dynamic_schema|"
+            r"lmdb_version_store_|"
+            r"lmdb_version_store_dynamic_schema|"
+            r"lmdb_version_store_empty_types_|"
+            r"lmdb_version_store_empty_types_dynamic_schema|"
+            r"lmdb_version_store_delayed_deletes|"
+            r"basic_store_dynamic_schema"
+            r").*(?!v1).*",
+            re.I,
+        )
+    ],
+}
+
+ALL_FIXTURE_NAMES = set()
+
+
+def pytest_collection_modifyitems(config, items):
+    """This hook is useful for filtering in out tests and modifying tests
+    as soon as pytest collects them before execution
+    """
+
+    def evaluate_item(item, part_string: str, mark_to_add: Mark):
+        """Evaluate item(test) if its module path contains certain string
+        If there it will mark the test with specified mark
+        """
+        doc = item.module.__file__
+        if doc and part_string in doc.lower():
+            item.add_marker(mark_to_add)
+
+    # Apply this process only when asked for
+    if not EXTENDED_MARKS:
+        return
+
+    start_time = time.time()
+    for item in items:
+        ## Add custom marks to test depending file path name of module to the test
+        ## Electively this silently marks each test with its physical location in the repo
+        ## allowing later that physical location to be used in combination with other marks
+        ##
+        ## Example:
+        ##   pytest -s --co -m "toolbox and storage"
+        evaluate_item(item, Marks.unit.name, Marks.unit.mark)
+        evaluate_item(item, Marks.integration.name, Marks.integration.mark)
+        evaluate_item(item, Marks.stress.name, Marks.stress.mark)
+        evaluate_item(item, Marks.hypothesis.name, Marks.hypothesis.mark)
+        evaluate_item(item, Marks.nonreg.name, Marks.integration.mark)
+        evaluate_item(item, Marks.version_store.name, Marks.version_store.mark)
+        evaluate_item(item, Marks.toolbox.name, Marks.toolbox.mark)
+
+        # --- Auto‑mark by fixtures ---
+        fixtures = set(item.fixturenames)
+        ALL_FIXTURE_NAMES.update(fixtures)
+        apply_hybrid_marks(item, fixtures, FIXTURES_TO_MARK)
+
+    get_logger().info(f"Extended marks applied for: {time.time() - start_time} sec.")
+
+
+# endregion