Skip to content

Commit 741bf69

Browse files
committed
improve testcase hashing
1 parent c65c9ec commit 741bf69

File tree

3 files changed

+24
-29
lines changed

3 files changed

+24
-29
lines changed

bin/generate.py

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -556,7 +556,7 @@ def __init__(
556556
self.rule["copy"] = str(self.copy)
557557
for ext in config.KNOWN_TESTCASE_EXTENSIONS:
558558
if self.copy.with_suffix(ext).is_file():
559-
hashes[ext] = hash_file(self.copy.with_suffix(ext))
559+
hashes[ext] = hash_file_content(self.copy.with_suffix(ext))
560560

561561
# 3. hardcoded
562562
for ext in config.KNOWN_TEXT_DATA_EXTENSIONS:
@@ -586,14 +586,8 @@ def __init__(
586586
color_type=MessageType.LOG,
587587
)
588588

589-
# build ordered list of hashes we want to consider
590-
hs = list(hashes.values())
591-
592589
# combine hashes
593-
if len(hs) == 1:
594-
self.hash = hs[0]
595-
else:
596-
self.hash = combine_hashes(hs)
590+
self.hash = combine_hashes_dict(hashes)
597591

598592
if self.hash in generator_config.rules_cache:
599593
self.copy_of = generator_config.rules_cache[self.hash]
@@ -1170,24 +1164,20 @@ def add_testdata_to_cache():
11701164

11711165
# consider specific files for the uniqueness of this testcase
11721166
relevant_files = {
1167+
"invalid_input": [".in"],
11731168
"invalid_answer": [".in", ".ans"],
11741169
"invalid_output": [".in", ".ans", ".out"],
11751170
"valid_output": [".in", ".ans", ".out"],
11761171
}
1177-
extensions = relevant_files.get(t.root, [".in"])
1172+
relevant_files_default = [".in"] if problem.settings.ans_is_output else [".in", ".ans"]
1173+
extensions = relevant_files.get(t.root, relevant_files_default)
11781174

11791175
for ext in extensions:
11801176
if target_infile.with_suffix(ext).is_file():
1181-
hashes[ext] = hash_file(target_infile.with_suffix(ext))
1182-
1183-
# build ordered list of hashes we want to consider
1184-
hs = list(hashes.values())
1177+
hashes[ext] = hash_file_content(target_infile.with_suffix(ext))
11851178

11861179
# combine hashes
1187-
if len(hs) == 1:
1188-
test_hash = hs[0]
1189-
else:
1190-
test_hash = combine_hashes(hs)
1180+
test_hash = combine_hashes_dict(hashes)
11911181

11921182
# check for duplicates
11931183
if test_hash not in generator_config.generated_testdata:

bin/problem.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1175,9 +1175,21 @@ def reset_testcase_hashes(self):
11751175

11761176
# Returns None for new testcases or the Testcase object it equals.
11771177
def matches_existing_testcase(self, t):
1178-
if t.root in ["invalid_input", "invalid_answer"]:
1179-
return None
1180-
h = hash_file_content(t.in_path)
1178+
hashes = {}
1179+
relevant_files = {
1180+
"invalid_input": ["in"],
1181+
"invalid_answer": [".in", ".ans"],
1182+
"invalid_output": [".in", ".ans", ".out"],
1183+
"valid_output": [".in", ".ans", ".out"],
1184+
}
1185+
relevant_files_default = [".in"] if self.settings.ans_is_output else [".in", ".ans"]
1186+
extensions = relevant_files.get(t.root, relevant_files_default)
1187+
1188+
for ext in extensions:
1189+
if t.with_suffix(ext).is_file():
1190+
hashes[ext] = hash_file_content(t.with_suffix(ext))
1191+
1192+
h = combine_hashes_dict(hashes)
11811193
if h in self._testcase_hashes:
11821194
return self._testcase_hashes[h]
11831195
self._testcase_hashes[h] = t
@@ -1394,14 +1406,7 @@ def process_testcase(testcase: testcase.Testcase):
13941406

13951407
localbar = bar.start(testcase.name)
13961408

1397-
if (
1398-
mode == validate.Mode.INPUT
1399-
and not testcase.in_path.is_symlink()
1400-
and not testcase.root == "invalid_answer"
1401-
and not testcase.root == "invalid_output"
1402-
and not testcase.root == "valid_output"
1403-
and not extra
1404-
):
1409+
if mode == validate.Mode.INPUT and not testcase.in_path.is_symlink() and not extra:
14051410
t2 = problem.matches_existing_testcase(testcase)
14061411
if t2 is not None:
14071412
localbar.warn(

bin/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1463,7 +1463,7 @@ def combine_hashes(values: Sequence[str]) -> str:
14631463
return hasher.hexdigest()
14641464

14651465

1466-
def combine_hashes_dict(d: dict[str, Optional[str]]) -> str:
1466+
def combine_hashes_dict(d: Mapping[str, Optional[str]]) -> str:
14671467
hasher = hashlib.sha512(usedforsecurity=False)
14681468
for key, value in d.items():
14691469
hasher.update(key.encode())

0 commit comments

Comments
 (0)