Skip to content

Commit 60b1b24

Browse files
committed
fix: normalize task hash in CUR cost JOIN by stripping '/'
Task hashes are Nextflow workdir paths ('ab/cdef12'), CUR hashes are plain MD5 prefixes ('abcdef12'). REPLACE(t.hash, '/', '') before LEFT() ensures the JOIN matches correctly.
1 parent d63fb42 commit 60b1b24

File tree

2 files changed

+1
-3
lines changed

2 files changed

+1
-3
lines changed

bin/benchmark_report.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ def query_run_costs(db: duckdb.DuckDBPyConnection) -> list[dict]:
328328
ROUND(SUM(COALESCE(c.unused_cost, 0)), 2) AS unused_cost,
329329
FROM runs r
330330
LEFT JOIN tasks t ON r.run_id = t.run_id
331-
LEFT JOIN costs c ON t.run_id = c.run_id AND LEFT(t.hash, 8) = c.hash
331+
LEFT JOIN costs c ON t.run_id = c.run_id AND LEFT(REPLACE(t.hash, '/', ''), 8) = c.hash
332332
GROUP BY r.run_id, r."group"
333333
ORDER BY r."group"
334334
""")

bin/test_benchmark_report.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,6 @@ def _write_cur_parquet(self, tmp_path, run_id="run1", task_hash_md5="abcdef12345
138138
db.close()
139139
return path
140140

141-
@pytest.mark.xfail(strict=True, reason="CUR hash-join mismatch: LEFT(t.hash,8) includes '/' so never matches CUR hash")
142141
def test_cur_costs_override_task_costs(self, tmp_path):
143142
"""When CUR data is present, query_run_costs should use CUR cost (10.0)
144143
not the task-level cost (1.50)."""
@@ -153,7 +152,6 @@ def test_cur_costs_override_task_costs(self, tmp_path):
153152
# CUR total cost = split_cost + unused = 8.0 + 2.0 = 10.0
154153
assert costs[0]["cost"] == pytest.approx(10.0)
155154

156-
@pytest.mark.xfail(strict=True, reason="CUR hash-join mismatch: LEFT(t.hash,8) includes '/' so never matches CUR hash")
157155
def test_cur_used_and_unused_cost_split(self, tmp_path):
158156
"""CUR provides used_cost and unused_cost breakdown."""
159157
task = _flat_task(hash_val="ab/cdef1234567890", cost=1.50)

0 commit comments

Comments
 (0)