Skip to content

Commit 9bafcae

Browse files
authored
Merge pull request #110 from GeoscienceAustralia/NPI-4453-implement-hash-baselining-unit-tests
NPI-4453 Framework for DataFrame hashing & test baselining
2 parents 4c0deba + ee4fae0 commit 9bafcae

8 files changed

+553
-2
lines changed

gnssanalysis/gn_utils.py

Lines changed: 420 additions & 1 deletion
Large diffs are not rendered by default.

tests/test_utils.py

Lines changed: 130 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
import logging
2+
import os
3+
import unittest
4+
from pandas import DataFrame
25
from pyfakefs.fake_filesystem_unittest import TestCase
36
from pathlib import Path
47

5-
from gnssanalysis.gn_utils import delete_entire_directory
8+
from gnssanalysis.gn_utils import UnitTestBaseliner, delete_entire_directory
69
import gnssanalysis.gn_utils as ga_utils
710

811

@@ -64,3 +67,129 @@ def test_configure_logging(self):
6467

6568
# Verify
6669
self.assertEqual(logger_not_output, None)
70+
71+
72+
class TestUnitTestBaseliner(unittest.TestCase):
73+
74+
def test_verify_refusal_in_wrong_mode(self):
75+
mode_backup = UnitTestBaseliner.mode
76+
try:
77+
df = DataFrame(["a", "b", "c"])
78+
79+
# Baseline (do not commit uncommented!) Note: every function needs its own baseline, becuase the
80+
# function name determines the filename, unless we override that.
81+
# UnitTestBaseliner.mode = "baseline"
82+
# UnitTestBaseliner.record_baseline([df])
83+
84+
# In baseline (write) mode, verify should be refused.
85+
UnitTestBaseliner.mode = "baseline"
86+
87+
with self.assertWarns(Warning) as warning_assessor:
88+
self.assertFalse(
89+
UnitTestBaseliner.verify([df]),
90+
"DF / object list verification should not succeed in 'baseline' mode",
91+
)
92+
# Ensure the expected warning, and only that warning, was raised
93+
captured_warnings = warning_assessor.warnings
94+
self.assertEqual(
95+
"Refusing to run verify method while not in verify mode. Set UnitTestBaseliner.mode = 'verify' first",
96+
str(captured_warnings[0].message),
97+
)
98+
self.assertEqual(
99+
len(captured_warnings),
100+
1,
101+
"Expected exactly 1 warning. Check what other warnings are being raised!",
102+
)
103+
104+
# Should succeed in correct mode.
105+
UnitTestBaseliner.mode = "verify"
106+
self.assertTrue(
107+
UnitTestBaseliner.verify([df]),
108+
"DF / object list verification should succeed in 'verify' mode",
109+
)
110+
finally:
111+
# Ensure flag reset to avoid impacts on other tests (across the whole suite)
112+
UnitTestBaseliner.mode = mode_backup
113+
114+
def test_repeat_caller_rejection(self):
115+
# These functions determine what files to write/read baselines from, based on the identity of the (test)
116+
# function that called them. Therefore, calling twice from the same function would cause the *same baseline
117+
# files* to be read/written for a different part of the unit test.
118+
# That would have the effect of:
119+
# - in write mode: overwriting the baseline file for a previous part of the test function.
120+
# - in read mode: repeating verification of the same file against a different DF / object list (which would
121+
# likely fail).
122+
123+
# We're only testing it with the verify function below, but both verify and baseline functions use the same
124+
# caller check logic, and store the caller record statically in a class variable. ?
125+
126+
df = DataFrame(["a", "b", "c"])
127+
128+
# Baseline (every function needs its own baseline, becuase the function name determines the filename,
129+
# unless we override that)
130+
# UnitTestBaseliner.mode = "baseline"
131+
# UnitTestBaseliner.record_baseline([df])
132+
133+
self.assertTrue(
134+
UnitTestBaseliner.verify([df]),
135+
"DF / object list verification should succeed on *first* call from a function.",
136+
)
137+
with self.assertRaises(ValueError):
138+
UnitTestBaseliner.verify([df])
139+
self.fail("DF / object list verification should fail on *second*/repeated calls from a function.")
140+
141+
def test_duplicate_object_rejection(self):
142+
143+
# List to aggregate DFs / objects for hashing
144+
objects_to_hash: list[object] = []
145+
146+
df = DataFrame(["a", "b", "c"]) # Let's call this Dataframe 'a'
147+
objects_to_hash.extend([df])
148+
149+
# Overwrite local variable, as often happens in our unit tests
150+
df = DataFrame(["b", "c", "d"]) # Let's call this Dataframe 'b'
151+
152+
# This might look questionable, but is ok, because we saved a reference to dataframe 'a' to the list,
153+
# before overwriting local var 'df' to point at dataframe 'b'.
154+
objects_to_hash.extend([df])
155+
156+
# Baseline this test (this should only be committed commented out!)
157+
# UnitTestBaseliner.mode = "baseline"
158+
# UnitTestBaseliner.record_baseline(dfs_to_hash)
159+
160+
# Will return True if verification succeeded. False if baseline missing or mode != verify
161+
self.assertTrue(
162+
UnitTestBaseliner.verify(objects_to_hash),
163+
"DF / object list verification should succeed here (unless baseline files are missing, or baselining has been turned on)",
164+
)
165+
166+
# The local variable df still points to the same DF, so now the list contains [a,b,b]. This should be an error.
167+
objects_to_hash.extend([df])
168+
with self.assertRaises(ValueError):
169+
UnitTestBaseliner.verify(objects_to_hash)
170+
171+
def test_caller_identity_fetch(self):
172+
def wrapper_function():
173+
class_name, func_name = UnitTestBaseliner.get_grandparent_caller_id()
174+
self.assertEqual(class_name, "TestUnitTestBaseliner")
175+
self.assertEqual(func_name, "test_caller_identity_fetch")
176+
177+
# We have to do this (create an extra stack frame) because the function looks for
178+
# the *grandparent* caller, not parent caller.
179+
wrapper_function()
180+
181+
182+
# For use with debugger
183+
# if __name__ == "__main__":
184+
185+
# logging.basicConfig(format="%(levelname)s: %(message)s")
186+
# logger = logging.getLogger()
187+
# logger.setLevel(logging.DEBUG)
188+
189+
# os.chdir("./tests")
190+
191+
# baseliner_tests = TestUnitTestBaseliner()
192+
# baseliner_tests.test_duplicate_object_rejection()
193+
# baseliner_tests.test_verify_refusal_in_wrong_mode
194+
# baseliner_tests.test_repeat_caller_rejection()
195+
# baseliner_tests.test_caller_identity_fetch()
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
6b5020201b08f64a2e7412422e03f94a6e7b0479f3a69a792967cec80b17a08b
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
1b369c0e1d2ee74b36b233cb0655cc5e0158b334ec0757f546d5e45e6d05d58e
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
1b369c0e1d2ee74b36b233cb0655cc5e0158b334ec0757f546d5e45e6d05d58e

0 commit comments

Comments
 (0)