Skip to content

Commit d036b12

Browse files
authored
Merge pull request #13704 from bluetech/overlapping-collection-args
main: add explicit handling of overlapping collection arguments
2 parents 7cfe8aa + 6764439 commit d036b12

File tree

7 files changed

+822
-51
lines changed

7 files changed

+822
-51
lines changed

changelog/12083.breaking.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
Fixed a bug where an invocation such as `pytest a/ a/b` would cause only tests from `a/b` to run, and not other tests under `a/`.
2+
3+
The fix entails a few breaking changes to how such overlapping arguments and duplicates are handled:
4+
5+
1. `pytest a/b a/` or `pytest a/ a/b` are equivalent to `pytest a`; if an argument overlaps another arguments, only the prefix remains.
6+
7+
2. `pytest x.py x.py` is equivalent to `pytest x.py`; previously such an invocation was taken as an explicit request to run the tests from the file twice.
8+
9+
If you rely on these behaviors, consider using :ref:`--keep-duplicates <duplicate-paths>`, which retains its existing behavior (including the bug).

doc/en/example/pythoncollection.rst

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ You can run all of the tests within ``tests/`` *except* for ``tests/foobar/test_
5555
by invoking ``pytest`` with ``--deselect tests/foobar/test_foobar_01.py::test_a``.
5656
``pytest`` allows multiple ``--deselect`` options.
5757

58+
.. _duplicate-paths:
59+
5860
Keeping duplicate paths specified from command line
5961
----------------------------------------------------
6062

@@ -82,18 +84,6 @@ Example:
8284
collected 2 items
8385
...
8486
85-
As the collector just works on directories, if you specify twice a single test file, ``pytest`` will
86-
still collect it twice, no matter if the ``--keep-duplicates`` is not specified.
87-
Example:
88-
89-
.. code-block:: pytest
90-
91-
pytest test_a.py test_a.py
92-
93-
...
94-
collected 2 items
95-
...
96-
9787
9888
Changing directory recursion
9989
-----------------------------------------------------

src/_pytest/main.py

Lines changed: 90 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -781,14 +781,25 @@ def perform_collect(
781781
try:
782782
initialpaths: list[Path] = []
783783
initialpaths_with_parents: list[Path] = []
784-
for arg in args:
785-
collection_argument = resolve_collection_argument(
784+
785+
collection_args = [
786+
resolve_collection_argument(
786787
self.config.invocation_params.dir,
787788
arg,
789+
i,
788790
as_pypath=self.config.option.pyargs,
789791
consider_namespace_packages=consider_namespace_packages,
790792
)
791-
self._initial_parts.append(collection_argument)
793+
for i, arg in enumerate(args)
794+
]
795+
796+
if not self.config.getoption("keepduplicates"):
797+
# Normalize the collection arguments -- remove duplicates and overlaps.
798+
self._initial_parts = normalize_collection_arguments(collection_args)
799+
else:
800+
self._initial_parts = collection_args
801+
802+
for collection_argument in self._initial_parts:
792803
initialpaths.append(collection_argument.path)
793804
initialpaths_with_parents.append(collection_argument.path)
794805
initialpaths_with_parents.extend(collection_argument.path.parents)
@@ -859,6 +870,7 @@ def collect(self) -> Iterator[nodes.Item | nodes.Collector]:
859870

860871
argpath = collection_argument.path
861872
names = collection_argument.parts
873+
parametrization = collection_argument.parametrization
862874
module_name = collection_argument.module_name
863875

864876
# resolve_collection_argument() ensures this.
@@ -943,12 +955,18 @@ def collect(self) -> Iterator[nodes.Item | nodes.Collector]:
943955

944956
# Name part e.g. `TestIt` in `/a/b/test_file.py::TestIt::test_it`.
945957
else:
946-
# TODO: Remove parametrized workaround once collection structure contains
947-
# parametrization.
948-
is_match = (
949-
node.name == matchparts[0]
950-
or node.name.split("[")[0] == matchparts[0]
951-
)
958+
if len(matchparts) == 1:
959+
# This the last part, one parametrization goes.
960+
if parametrization is not None:
961+
# A parametrized arg must match exactly.
962+
is_match = node.name == matchparts[0] + parametrization
963+
else:
964+
# A non-parameterized arg matches all parametrizations (if any).
965+
# TODO: Remove the hacky split once the collection structure
966+
# contains parametrization.
967+
is_match = node.name.split("[")[0] == matchparts[0]
968+
else:
969+
is_match = node.name == matchparts[0]
952970
if is_match:
953971
work.append((node, matchparts[1:]))
954972
any_matched_in_collector = True
@@ -969,12 +987,9 @@ def genitems(self, node: nodes.Item | nodes.Collector) -> Iterator[nodes.Item]:
969987
yield node
970988
else:
971989
assert isinstance(node, nodes.Collector)
972-
keepduplicates = self.config.getoption("keepduplicates")
973990
# For backward compat, dedup only applies to files.
974-
handle_dupes = not (keepduplicates and isinstance(node, nodes.File))
991+
handle_dupes = not isinstance(node, nodes.File)
975992
rep, duplicate = self._collect_one_node(node, handle_dupes)
976-
if duplicate and not keepduplicates:
977-
return
978993
if rep.passed:
979994
for subnode in rep.result:
980995
yield from self.genitems(subnode)
@@ -1024,12 +1039,15 @@ class CollectionArgument:
10241039

10251040
path: Path
10261041
parts: Sequence[str]
1042+
parametrization: str | None
10271043
module_name: str | None
1044+
original_index: int
10281045

10291046

10301047
def resolve_collection_argument(
10311048
invocation_path: Path,
10321049
arg: str,
1050+
arg_index: int,
10331051
*,
10341052
as_pypath: bool = False,
10351053
consider_namespace_packages: bool = False,
@@ -1052,14 +1070,15 @@ def resolve_collection_argument(
10521070
When as_pypath is True, expects that the command-line argument actually contains
10531071
module paths instead of file-system paths:
10541072
1055-
"pkg.tests.test_foo::TestClass::test_foo"
1073+
"pkg.tests.test_foo::TestClass::test_foo[a,b]"
10561074
10571075
In which case we search sys.path for a matching module, and then return the *path* to the
10581076
found module, which may look like this:
10591077
10601078
CollectionArgument(
10611079
path=Path("/home/u/myvenv/lib/site-packages/pkg/tests/test_foo.py"),
10621080
parts=["TestClass", "test_foo"],
1081+
parametrization="[a,b]",
10631082
module_name="pkg.tests.test_foo",
10641083
)
10651084
@@ -1068,10 +1087,9 @@ def resolve_collection_argument(
10681087
"""
10691088
base, squacket, rest = arg.partition("[")
10701089
strpath, *parts = base.split("::")
1071-
if squacket:
1072-
if not parts:
1073-
raise UsageError(f"path cannot contain [] parametrization: {arg}")
1074-
parts[-1] = f"{parts[-1]}{squacket}{rest}"
1090+
if squacket and not parts:
1091+
raise UsageError(f"path cannot contain [] parametrization: {arg}")
1092+
parametrization = f"{squacket}{rest}" if squacket else None
10751093
module_name = None
10761094
if as_pypath:
10771095
pyarg_strpath = search_pypath(
@@ -1099,5 +1117,59 @@ def resolve_collection_argument(
10991117
return CollectionArgument(
11001118
path=fspath,
11011119
parts=parts,
1120+
parametrization=parametrization,
11021121
module_name=module_name,
1122+
original_index=arg_index,
1123+
)
1124+
1125+
1126+
def is_collection_argument_subsumed_by(
1127+
arg: CollectionArgument, by: CollectionArgument
1128+
) -> bool:
1129+
"""Check if `arg` is subsumed (contained) by `by`."""
1130+
# First check path subsumption.
1131+
if by.path != arg.path:
1132+
# `by` subsumes `arg` if `by` is a parent directory of `arg` and has no
1133+
# parts (collects everything in that directory).
1134+
if not by.parts:
1135+
return arg.path.is_relative_to(by.path)
1136+
return False
1137+
# Paths are equal, check parts.
1138+
# For example: ("TestClass",) is a prefix of ("TestClass", "test_method").
1139+
if len(by.parts) > len(arg.parts) or arg.parts[: len(by.parts)] != by.parts:
1140+
return False
1141+
# Paths and parts are equal, check parametrization.
1142+
# A `by` without parametrization (None) matches everything, e.g.
1143+
# `pytest x.py::test_it` matches `x.py::test_it[0]`. Otherwise must be
1144+
# exactly equal.
1145+
if by.parametrization is not None and by.parametrization != arg.parametrization:
1146+
return False
1147+
return True
1148+
1149+
1150+
def normalize_collection_arguments(
1151+
collection_args: Sequence[CollectionArgument],
1152+
) -> list[CollectionArgument]:
1153+
"""Normalize collection arguments to eliminate overlapping paths and parts.
1154+
1155+
Detects when collection arguments overlap in either paths or parts and only
1156+
keeps the shorter prefix, or the earliest argument if duplicate, preserving
1157+
order. The result is prefix-free.
1158+
"""
1159+
# A quadratic algorithm is not acceptable since large inputs are possible.
1160+
# So this uses an O(n*log(n)) algorithm which takes advantage of the
1161+
# property that after sorting, a collection argument will immediately
1162+
# precede collection arguments it subsumes. An O(n) algorithm is not worth
1163+
# it.
1164+
collection_args_sorted = sorted(
1165+
collection_args,
1166+
key=lambda arg: (arg.path, arg.parts, arg.parametrization or ""),
11031167
)
1168+
normalized: list[CollectionArgument] = []
1169+
last_kept = None
1170+
for arg in collection_args_sorted:
1171+
if last_kept is None or not is_collection_argument_subsumed_by(arg, last_kept):
1172+
normalized.append(arg)
1173+
last_kept = arg
1174+
normalized.sort(key=lambda arg: arg.original_index)
1175+
return normalized

0 commit comments

Comments
 (0)