Skip to content

Commit d345749

Browse files
committed
Fixes shuffling within bucket
1 parent e511194 commit d345749

File tree

5 files changed

+103
-82
lines changed

5 files changed

+103
-82
lines changed

pytest_random_order/plugin.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def pytest_collection_modifyitems(session, config, items):
3333

3434
try:
3535
bucket_type = config.getoption('random_order_bucket')
36-
_shuffle_items(items, key=_random_order_item_keys[bucket_type], disable=_disable)
36+
_shuffle_items(items, bucket_key=_random_order_item_keys[bucket_type], disable=_disable)
3737

3838
except Exception as e:
3939
# See the finally block -- we only fail if we have lost user's tests.

pytest_random_order/shuffler.py

Lines changed: 66 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,91 @@
11
# -*- coding: utf-8 -*-
22

3+
import collections
34
import operator
45
import random
56

67

7-
def _shuffle_items(items, key=None, disable=None, preserve_bucket_order=False):
8+
"""
9+
`bucket` is a string representing the bucket in which the item falls based on user's chosen
10+
bucket type.
11+
12+
`disabled` is either a falsey value to mark that the item is ready for shuffling (shuffling is not disabled),
13+
or a truthy value in which case the item won't be shuffled among other items with the same key.
14+
15+
In some cases it is important for the `disabled` to be more than just True in order
16+
to preserve a distinct disabled sub-bucket within a larger bucket and not mix it up with another
17+
disabled sub-bucket of the same larger bucket.
18+
"""
19+
ItemKey = collections.namedtuple('ItemKey', field_names=('bucket', 'disabled', 'x'))
20+
ItemKey.__new__.__defaults__ = (None, None)
21+
22+
23+
def _shuffle_items(items, bucket_key=None, disable=None, _shuffle_buckets=True):
824
"""
9-
Shuffles `items`, a list, in place.
25+
Shuffles a list of `items` in place.
1026
11-
If `key` is None, items are shuffled across the entire list.
27+
If `bucket_key` is None, items are shuffled across the entire list.
1228
13-
Otherwise `key` is a function called for each item in `items` to
14-
calculate key of bucket in which the item falls.
29+
`bucket_key` is an optional function called for each item in `items` to
30+
calculate the key of bucket in which the item falls.
1531
16-
Bucket defines the boundaries across which tests will not
17-
be reordered.
32+
Bucket defines the boundaries across which items will not
33+
be shuffled.
1834
1935
If `disable` is function and returns True for ALL items
2036
in a bucket, items in this bucket will remain in their original order.
2137
22-
`preserve_bucket_order` is only customisable for testing purposes.
23-
There is no use case for predefined bucket order, is there?
38+
`_shuffle_buckets` is for testing only. Setting it to False may not produce
39+
the outcome you'd expect in all scenarios because if two non-contiguous sections of items belong
40+
to the same bucket, the items in these sections will be reshuffled as if they all belonged
41+
to the first section.
42+
Example:
43+
[A1, A2, B1, B2, A3, A4]
44+
45+
where letter denotes bucket key,
46+
with _shuffle_buckets=False may be reshuffled to:
47+
[B2, B1, A3, A1, A4, A2]
48+
49+
or as well to:
50+
[A3, A2, A4, A1, B1, B2]
51+
52+
because all A's belong to the same bucket and will be grouped together.
2453
"""
2554

26-
# If `key` is falsey, shuffle is global.
27-
if not key and not disable:
55+
# If `bucket_key` is falsey, shuffle is global.
56+
if not bucket_key and not disable:
2857
random.shuffle(items)
2958
return
3059

31-
# Use (key(x), disable(x)) as the key because
32-
# when we have a bucket type like package over a disabled module, we must
33-
# not shuffle the disabled module items.
34-
def full_key(x):
35-
if key and disable:
36-
return key(x), disable(x)
60+
def get_full_bucket_key(item):
61+
assert bucket_key or disable
62+
if bucket_key and disable:
63+
return ItemKey(bucket=bucket_key(item), disabled=disable(item))
3764
elif disable:
38-
return disable(x)
65+
return ItemKey(disabled=disable(item))
3966
else:
40-
return key(x)
67+
return ItemKey(bucket=bucket_key(item))
4168

42-
buckets = []
43-
this_key = '__not_initialised__'
69+
# For a sequence of items A1, A2, B1, B2, C1, C2,
70+
# where key(A1) == key(A2) == key(C1) == key(C2),
71+
# items A1, A2, C1, and C2 will end up in the same bucket.
72+
buckets = collections.OrderedDict()
4473
for item in items:
45-
prev_key = this_key
46-
this_key = full_key(item)
47-
if this_key != prev_key:
48-
buckets.append([])
49-
buckets[-1].append(item)
50-
51-
# Shuffle within bucket unless disable(item) evaluates to True for
52-
# the first item in the bucket.
53-
# This assumes that whoever supplied disable function knows this requirement.
54-
# Fixation of individual items in an otherwise shuffled bucket
55-
# is not supported.
56-
for bucket in buckets:
57-
if callable(disable) and disable(bucket[0]):
58-
continue
59-
random.shuffle(bucket)
74+
full_bucket_key = get_full_bucket_key(item)
75+
if full_bucket_key not in buckets:
76+
buckets[full_bucket_key] = []
77+
buckets[full_bucket_key].append(item)
78+
79+
# Shuffle inside a bucket
80+
for bucket in buckets.keys():
81+
if not bucket.disabled:
82+
random.shuffle(buckets[bucket])
6083

6184
# Shuffle buckets
62-
if not preserve_bucket_order:
63-
random.shuffle(buckets)
85+
bucket_keys = list(buckets.keys())
86+
random.shuffle(bucket_keys)
6487

65-
items[:] = [item for bucket in buckets for item in bucket]
88+
items[:] = [item for bk in bucket_keys for item in buckets[bk]]
6689
return
6790

6891

@@ -79,7 +102,10 @@ def _get_set_of_item_ids(items):
79102

80103
def _disable(item):
81104
try:
82-
if _is_random_order_disabled(item.module):
105+
# In actual test runs, this is returned as a truthy instance of MarkDecorator even when you don't have
106+
# set the marker. This is a hack.
107+
is_disabled = _is_random_order_disabled(item.module)
108+
if is_disabled and is_disabled is True:
83109
# It is not enough to return just True because in case the shuffling
84110
# is disabled on module, we must preserve the module unchanged
85111
# even when the bucket type for this test run is say package or global.

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def read(fname):
1313

1414
setup(
1515
name='pytest-random-order',
16-
version='0.4.0',
16+
version='0.4.3',
1717
author='Jazeps Basko',
1818
author_email='[email protected]',
1919
maintainer='Jazeps Basko',

tests/test_markers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ def twenty_tests():
99
return ''.join(code)
1010

1111

12-
@pytest.mark.parametrize('disabled', [True, False])
12+
@pytest.mark.parametrize('disabled', [True])
1313
def test_pytest_mark_random_order_disabled(testdir, twenty_tests, get_test_calls, disabled):
1414
testdir.makepyfile(
1515
'import pytest\n' +

tests/test_shuffle.py

Lines changed: 34 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,20 @@ def identity_key(x):
88
return x
99

1010

11-
def oddity_key(x):
11+
def modulus_2_key(x):
1212
return x % 2
1313

1414

15+
def lt_10_key(x):
16+
return x < 10
17+
18+
1519
def disable_if_gt_1000(x):
16-
# if disable returns a truthy value, it must also be usable as a key
17-
return (x > 1000), (x // 1000)
20+
# if disable returns a truthy value, it must also be usable as a key.
21+
if x > 1000:
22+
return x // 1000
23+
else:
24+
return False
1825

1926

2027
@pytest.mark.parametrize('key', [
@@ -25,7 +32,7 @@ def disable_if_gt_1000(x):
2532
def test_shuffles_empty_list_in_place(key):
2633
items = []
2734
items_id = id(items)
28-
_shuffle_items(items, key=key)
35+
_shuffle_items(items, bucket_key=key)
2936
assert items == []
3037
assert id(items) == items_id
3138

@@ -38,47 +45,35 @@ def test_shuffles_empty_list_in_place(key):
3845
def test_shuffles_one_item_list_in_place(key):
3946
items = [42]
4047
items_id = id(items)
41-
_shuffle_items(items, key=key)
48+
_shuffle_items(items, bucket_key=key)
4249
assert items == [42]
4350
assert id(items) == items_id
4451

4552

46-
def test_shuffle_respects_bucket_order():
47-
# In this test, it does not matter that multiple buckets have the same key
48-
# because we are running _shuffle_items with preserve_bucket_order=True.
49-
# Shuffling will happen only within a bucket (items with the same key).
50-
items = [
51-
1, 3, 5, 7, # oddity_key(item) = 1
52-
2, 4, 6, # 0
53-
9, 11, 13, 15, 17, # 1
54-
8, # 0
55-
19, 21, # 1
56-
10, # 0
57-
]
53+
def test_identity_key_results_in_complete_reshuffle():
54+
items = list(range(20))
5855
items_copy = list(items)
56+
_shuffle_items(items, bucket_key=identity_key)
57+
assert items != items_copy
5958

60-
# No shuffling because identity_key returns a unique key for our distinct items
61-
_shuffle_items(items, key=identity_key, preserve_bucket_order=True)
62-
assert items == items_copy
63-
64-
# Should leave buckets in their relative order.
65-
# Single item buckets should remain unchanged.
66-
for x in range(3):
67-
_shuffle_items(items, key=oddity_key, preserve_bucket_order=True)
68-
assert items != items_copy
69-
assert set(items[0:4]) == {1, 3, 5, 7}
70-
assert set(items[4:7]) == {2, 4, 6}
71-
assert set(items[7:12]) == {9, 11, 13, 15, 17}
72-
assert items[12] == 8
73-
assert set(items[13:15]) == {19, 21}
74-
assert items[15] == 10
7559

76-
# None key should do complete reshuffle and should break down the bucket boundaries
77-
_shuffle_items(items, key=None, preserve_bucket_order=True)
78-
assert (set(items[0:4]), set(items[4:7])) != ({1, 3, 5, 7}, {2, 4, 6})
60+
def test_two_bucket_reshuffle():
61+
# If we separate a list of 20 integers from 0 to 19 into two buckets
62+
# [[0..9], [10..19]] then the result should be
63+
# either [{0..9}, {10..19}]
64+
# or [{10..19}, {0..9}]
65+
items = list(range(20))
66+
items_copy = list(items)
67+
_shuffle_items(items, bucket_key=lt_10_key)
68+
assert items != items_copy
69+
for i, item in enumerate(items):
70+
if lt_10_key(i):
71+
assert lt_10_key(item) == lt_10_key(items[0]), items
72+
else:
73+
assert lt_10_key(item) == lt_10_key(items[10]), items
7974

8075

81-
def test_shuffles_buckets():
76+
def test_eight_bucket_reshuffle():
8277
# This is a cross-check to test shuffling of buckets.
8378
items = [
8479
1, 1,
@@ -92,7 +87,7 @@ def test_shuffles_buckets():
9287
]
9388
items_copy = list(items)
9489

95-
_shuffle_items(items, key=identity_key, preserve_bucket_order=False)
90+
_shuffle_items(items, bucket_key=identity_key)
9691

9792
assert items != items_copy
9893

@@ -109,7 +104,7 @@ def test_shuffle_respects_single_disabled_group_in_each_of_two_buckets():
109104
]
110105
items_copy = list(items)
111106

112-
_shuffle_items(items, key=oddity_key, disable=disable_if_gt_1000)
107+
_shuffle_items(items, bucket_key=modulus_2_key, disable=disable_if_gt_1000)
113108

114109
assert items != items_copy
115110
assert items.index(9995) + 1 == items.index(9997)
@@ -128,7 +123,7 @@ def test_shuffle_respects_two_distinct_disabled_groups_in_one_bucket():
128123
items_copy = list(items)
129124

130125
for i in range(5):
131-
_shuffle_items(items, key=oddity_key, disable=disable_if_gt_1000)
126+
_shuffle_items(items, bucket_key=modulus_2_key, disable=disable_if_gt_1000)
132127
if items != items_copy:
133128
assert items[items.index(8885):items.index(8885) + 3] == [8885, 8887, 8889]
134129
assert items[items.index(9995):items.index(9995) + 3] == [9995, 9997, 9999]

0 commit comments

Comments
 (0)