@@ -781,14 +781,25 @@ def perform_collect(
781
781
try :
782
782
initialpaths : list [Path ] = []
783
783
initialpaths_with_parents : list [Path ] = []
784
- for arg in args :
785
- collection_argument = resolve_collection_argument (
784
+
785
+ collection_args = [
786
+ resolve_collection_argument (
786
787
self .config .invocation_params .dir ,
787
788
arg ,
789
+ i ,
788
790
as_pypath = self .config .option .pyargs ,
789
791
consider_namespace_packages = consider_namespace_packages ,
790
792
)
791
- self ._initial_parts .append (collection_argument )
793
+ for i , arg in enumerate (args )
794
+ ]
795
+
796
+ if not self .config .getoption ("keepduplicates" ):
797
+ # Normalize the collection arguments -- remove duplicates and overlaps.
798
+ self ._initial_parts = normalize_collection_arguments (collection_args )
799
+ else :
800
+ self ._initial_parts = collection_args
801
+
802
+ for collection_argument in self ._initial_parts :
792
803
initialpaths .append (collection_argument .path )
793
804
initialpaths_with_parents .append (collection_argument .path )
794
805
initialpaths_with_parents .extend (collection_argument .path .parents )
@@ -859,6 +870,7 @@ def collect(self) -> Iterator[nodes.Item | nodes.Collector]:
859
870
860
871
argpath = collection_argument .path
861
872
names = collection_argument .parts
873
+ parametrization = collection_argument .parametrization
862
874
module_name = collection_argument .module_name
863
875
864
876
# resolve_collection_argument() ensures this.
@@ -943,12 +955,18 @@ def collect(self) -> Iterator[nodes.Item | nodes.Collector]:
943
955
944
956
# Name part e.g. `TestIt` in `/a/b/test_file.py::TestIt::test_it`.
945
957
else :
946
- # TODO: Remove parametrized workaround once collection structure contains
947
- # parametrization.
948
- is_match = (
949
- node .name == matchparts [0 ]
950
- or node .name .split ("[" )[0 ] == matchparts [0 ]
951
- )
958
+ if len (matchparts ) == 1 :
959
+ # This the last part, one parametrization goes.
960
+ if parametrization is not None :
961
+ # A parametrized arg must match exactly.
962
+ is_match = node .name == matchparts [0 ] + parametrization
963
+ else :
964
+ # A non-parameterized arg matches all parametrizations (if any).
965
+ # TODO: Remove the hacky split once the collection structure
966
+ # contains parametrization.
967
+ is_match = node .name .split ("[" )[0 ] == matchparts [0 ]
968
+ else :
969
+ is_match = node .name == matchparts [0 ]
952
970
if is_match :
953
971
work .append ((node , matchparts [1 :]))
954
972
any_matched_in_collector = True
@@ -969,12 +987,9 @@ def genitems(self, node: nodes.Item | nodes.Collector) -> Iterator[nodes.Item]:
969
987
yield node
970
988
else :
971
989
assert isinstance (node , nodes .Collector )
972
- keepduplicates = self .config .getoption ("keepduplicates" )
973
990
# For backward compat, dedup only applies to files.
974
- handle_dupes = not ( keepduplicates and isinstance (node , nodes .File ) )
991
+ handle_dupes = not isinstance (node , nodes .File )
975
992
rep , duplicate = self ._collect_one_node (node , handle_dupes )
976
- if duplicate and not keepduplicates :
977
- return
978
993
if rep .passed :
979
994
for subnode in rep .result :
980
995
yield from self .genitems (subnode )
@@ -1024,12 +1039,15 @@ class CollectionArgument:
1024
1039
1025
1040
path : Path
1026
1041
parts : Sequence [str ]
1042
+ parametrization : str | None
1027
1043
module_name : str | None
1044
+ original_index : int
1028
1045
1029
1046
1030
1047
def resolve_collection_argument (
1031
1048
invocation_path : Path ,
1032
1049
arg : str ,
1050
+ arg_index : int ,
1033
1051
* ,
1034
1052
as_pypath : bool = False ,
1035
1053
consider_namespace_packages : bool = False ,
@@ -1052,14 +1070,15 @@ def resolve_collection_argument(
1052
1070
When as_pypath is True, expects that the command-line argument actually contains
1053
1071
module paths instead of file-system paths:
1054
1072
1055
- "pkg.tests.test_foo::TestClass::test_foo"
1073
+ "pkg.tests.test_foo::TestClass::test_foo[a,b] "
1056
1074
1057
1075
In which case we search sys.path for a matching module, and then return the *path* to the
1058
1076
found module, which may look like this:
1059
1077
1060
1078
CollectionArgument(
1061
1079
path=Path("/home/u/myvenv/lib/site-packages/pkg/tests/test_foo.py"),
1062
1080
parts=["TestClass", "test_foo"],
1081
+ parametrization="[a,b]",
1063
1082
module_name="pkg.tests.test_foo",
1064
1083
)
1065
1084
@@ -1068,10 +1087,9 @@ def resolve_collection_argument(
1068
1087
"""
1069
1088
base , squacket , rest = arg .partition ("[" )
1070
1089
strpath , * parts = base .split ("::" )
1071
- if squacket :
1072
- if not parts :
1073
- raise UsageError (f"path cannot contain [] parametrization: { arg } " )
1074
- parts [- 1 ] = f"{ parts [- 1 ]} { squacket } { rest } "
1090
+ if squacket and not parts :
1091
+ raise UsageError (f"path cannot contain [] parametrization: { arg } " )
1092
+ parametrization = f"{ squacket } { rest } " if squacket else None
1075
1093
module_name = None
1076
1094
if as_pypath :
1077
1095
pyarg_strpath = search_pypath (
@@ -1099,5 +1117,59 @@ def resolve_collection_argument(
1099
1117
return CollectionArgument (
1100
1118
path = fspath ,
1101
1119
parts = parts ,
1120
+ parametrization = parametrization ,
1102
1121
module_name = module_name ,
1122
+ original_index = arg_index ,
1123
+ )
1124
+
1125
+
1126
+ def is_collection_argument_subsumed_by (
1127
+ arg : CollectionArgument , by : CollectionArgument
1128
+ ) -> bool :
1129
+ """Check if `arg` is subsumed (contained) by `by`."""
1130
+ # First check path subsumption.
1131
+ if by .path != arg .path :
1132
+ # `by` subsumes `arg` if `by` is a parent directory of `arg` and has no
1133
+ # parts (collects everything in that directory).
1134
+ if not by .parts :
1135
+ return arg .path .is_relative_to (by .path )
1136
+ return False
1137
+ # Paths are equal, check parts.
1138
+ # For example: ("TestClass",) is a prefix of ("TestClass", "test_method").
1139
+ if len (by .parts ) > len (arg .parts ) or arg .parts [: len (by .parts )] != by .parts :
1140
+ return False
1141
+ # Paths and parts are equal, check parametrization.
1142
+ # A `by` without parametrization (None) matches everything, e.g.
1143
+ # `pytest x.py::test_it` matches `x.py::test_it[0]`. Otherwise must be
1144
+ # exactly equal.
1145
+ if by .parametrization is not None and by .parametrization != arg .parametrization :
1146
+ return False
1147
+ return True
1148
+
1149
+
1150
+ def normalize_collection_arguments (
1151
+ collection_args : Sequence [CollectionArgument ],
1152
+ ) -> list [CollectionArgument ]:
1153
+ """Normalize collection arguments to eliminate overlapping paths and parts.
1154
+
1155
+ Detects when collection arguments overlap in either paths or parts and only
1156
+ keeps the shorter prefix, or the earliest argument if duplicate, preserving
1157
+ order. The result is prefix-free.
1158
+ """
1159
+ # A quadratic algorithm is not acceptable since large inputs are possible.
1160
+ # So this uses an O(n*log(n)) algorithm which takes advantage of the
1161
+ # property that after sorting, a collection argument will immediately
1162
+ # precede collection arguments it subsumes. An O(n) algorithm is not worth
1163
+ # it.
1164
+ collection_args_sorted = sorted (
1165
+ collection_args ,
1166
+ key = lambda arg : (arg .path , arg .parts , arg .parametrization or "" ),
1103
1167
)
1168
+ normalized : list [CollectionArgument ] = []
1169
+ last_kept = None
1170
+ for arg in collection_args_sorted :
1171
+ if last_kept is None or not is_collection_argument_subsumed_by (arg , last_kept ):
1172
+ normalized .append (arg )
1173
+ last_kept = arg
1174
+ normalized .sort (key = lambda arg : arg .original_index )
1175
+ return normalized
0 commit comments