Skip to content

Commit 0e09cc4

Browse files
committed
NPI-4485 non-critical doc updates relating to tests for unittest baseliner utility, based on PR feedback
1 parent 9bafcae commit 0e09cc4

File tree

1 file changed

+26
-4
lines changed

1 file changed

+26
-4
lines changed

tests/test_utils.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,28 @@ def test_configure_logging(self):
7070

7171

7272
class TestUnitTestBaseliner(unittest.TestCase):
73+
"""
74+
Test functionality for creating and verifying hash & pickle baselines of objects produced by unit tests. Used to
75+
support detection and troubleshooting of subtle regressions.
76+
"""
7377

7478
def test_verify_refusal_in_wrong_mode(self):
79+
# This tests our logic for preventing baselining and verification from running in the same session / without
80+
# manual code changes.
81+
82+
# We do this to make baselining a more deliberate, developer supervised activity.
83+
# This specific functionality is designed to mitigate the risk posed by the following sequence of events:
84+
# - setting `UnitTestBaseliner.mode = baseline` is *accidentally committed*, then
85+
# - a regression is introduced, then
86+
# - the utility updates the baseline (so it now matches the regression), then
87+
# - verification is performed against the updated baseline, which will be considered valid despite being
88+
# a regression!
89+
7590
mode_backup = UnitTestBaseliner.mode
7691
try:
7792
df = DataFrame(["a", "b", "c"])
7893

79-
# Baseline (do not commit uncommented!) Note: every function needs its own baseline, becuase the
94+
# Baseline (do not commit uncommented!) Note: every function needs its own baseline, because the
8095
# function name determines the filename, unless we override that.
8196
# UnitTestBaseliner.mode = "baseline"
8297
# UnitTestBaseliner.record_baseline([df])
@@ -121,11 +136,12 @@ def test_repeat_caller_rejection(self):
121136
# likely fail).
122137

123138
# We're only testing it with the verify function below, but both verify and baseline functions use the same
124-
# caller check logic, and store the caller record statically in a class variable. ?
139+
# caller check logic, and store the caller record *statically* in a class variable (known as static variables
140+
# in some other languages).
125141

126142
df = DataFrame(["a", "b", "c"])
127143

128-
# Baseline (every function needs its own baseline, becuase the function name determines the filename,
144+
# Baseline (every function needs its own baseline, because the function name determines the filename,
129145
# unless we override that)
130146
# UnitTestBaseliner.mode = "baseline"
131147
# UnitTestBaseliner.record_baseline([df])
@@ -139,6 +155,11 @@ def test_repeat_caller_rejection(self):
139155
self.fail("DF / object list verification should fail on *second*/repeated calls from a function.")
140156

141157
def test_duplicate_object_rejection(self):
158+
# This function tests our logic for ensuring we reject input data with multiple *top level* references to
159+
# the same object (which likely indicates a mistake while using the utility in a unit test).
160+
# I.e. passing in a list with two references to the exact same DataFrame object, should be considered an error.
161+
# NOTE: this check is not recursive. Passing in two lists, which themselves refer to the same objects, will
162+
# not raise an exception, though this would arguably also be a bad sign.
142163

143164
# List to aggregate DFs / objects for hashing
144165
objects_to_hash: list[object] = []
@@ -163,7 +184,8 @@ def test_duplicate_object_rejection(self):
163184
"DF / object list verification should succeed here (unless baseline files are missing, or baselining has been turned on)",
164185
)
165186

166-
# The local variable df still points to the same DF, so now the list contains [a,b,b]. This should be an error.
187+
# The local variable df still points to the same DF, so adding it to the list will result in a duplicate
188+
# ref i.e. the list will point at dataframes: [a,b,b]. This should cause an error to be raised.
167189
objects_to_hash.extend([df])
168190
with self.assertRaises(ValueError):
169191
UnitTestBaseliner.verify(objects_to_hash)

0 commit comments

Comments
 (0)