@@ -70,13 +70,28 @@ def test_configure_logging(self):
7070
7171
7272class TestUnitTestBaseliner (unittest .TestCase ):
73+ """
74+ Test functionality for creating and verifying hash & pickle baselines of objects produced by unit tests. Used to
75+ support detection and troubleshooting of subtle regressions.
76+ """
7377
7478 def test_verify_refusal_in_wrong_mode (self ):
79+ # This tests our logic for preventing baselining and verification from running in the same session / without
80+ # manual code changes.
81+
82+ # We do this to make baselining a more deliberate, developer supervised activity.
83+ # This specific functionality is designed to mitigate the risk posed by the following sequence of events:
84+ # - setting `UnitTestBaseliner.mode = baseline` is *accidentally committed*, then
85+ # - a regression is introduced, then
86+ # - the utility updates the baseline (so it now matches the regression), then
87+ # - verification is performed against the updated baseline, which will be considered valid despite being
88+ # a regression!
89+
7590 mode_backup = UnitTestBaseliner .mode
7691 try :
7792 df = DataFrame (["a" , "b" , "c" ])
7893
79- # Baseline (do not commit uncommented!) Note: every function needs its own baseline, becuase the
94+ # Baseline (do not commit uncommented!) Note: every function needs its own baseline, because the
8095 # function name determines the filename, unless we override that.
8196 # UnitTestBaseliner.mode = "baseline"
8297 # UnitTestBaseliner.record_baseline([df])
@@ -121,11 +136,12 @@ def test_repeat_caller_rejection(self):
121136 # likely fail).
122137
123138 # We're only testing it with the verify function below, but both verify and baseline functions use the same
124- # caller check logic, and store the caller record statically in a class variable. ?
139+ # caller check logic, and store the caller record *statically* in a class variable (known as static variables
140+ # in some other languages).
125141
126142 df = DataFrame (["a" , "b" , "c" ])
127143
128- # Baseline (every function needs its own baseline, becuase the function name determines the filename,
144+ # Baseline (every function needs its own baseline, because the function name determines the filename,
129145 # unless we override that)
130146 # UnitTestBaseliner.mode = "baseline"
131147 # UnitTestBaseliner.record_baseline([df])
@@ -139,6 +155,11 @@ def test_repeat_caller_rejection(self):
139155 self .fail ("DF / object list verification should fail on *second*/repeated calls from a function." )
140156
141157 def test_duplicate_object_rejection (self ):
158+ # This function tests our logic for ensuring we reject input data with multiple *top level* references to
159+ # the same object (which likely indicates a mistake while using the utility in a unit test).
160+ # I.e. passing in a list with two references to the exact same DataFrame object, should be considered an error.
161+ # NOTE: this check is not recursive. Passing in two lists, which themselves refer to the same objects, will
162+ # not raise an exception, though this would arguably also be a bad sign.
142163
143164 # List to aggregate DFs / objects for hashing
144165 objects_to_hash : list [object ] = []
@@ -163,7 +184,8 @@ def test_duplicate_object_rejection(self):
163184 "DF / object list verification should succeed here (unless baseline files are missing, or baselining has been turned on)" ,
164185 )
165186
166- # The local variable df still points to the same DF, so now the list contains [a,b,b]. This should be an error.
187+ # The local variable df still points to the same DF, so adding it to the list will result in a duplicate
188+ # ref i.e. the list will point at dataframes: [a,b,b]. This should cause an error to be raised.
167189 objects_to_hash .extend ([df ])
168190 with self .assertRaises (ValueError ):
169191 UnitTestBaseliner .verify (objects_to_hash )
0 commit comments