|
18 | 18 | # httpretty avoids this specific problem because it mocks at the socket level, |
19 | 19 | # But I would rather not have two dependencies with almost the same goal. |
20 | 20 | import httpretty |
| 21 | +import numpy as np |
21 | 22 | import pandas |
22 | 23 | import pandas as pd |
23 | 24 | import pytest |
@@ -166,16 +167,17 @@ def test_basic(self, tmp_path, job_manager, job_manager_root_dir, sleep_mock): |
166 | 167 | } |
167 | 168 | ) |
168 | 169 |
|
169 | | - assert [(r.id, r.status, r.backend_name) for r in pd.read_csv(job_db_path).itertuples()] == [ |
170 | | - ("job-2018", "finished", "foo"), |
171 | | - ("job-2019", "finished", "foo"), |
172 | | - ("job-2020", "finished", "bar"), |
173 | | - ("job-2021", "finished", "bar"), |
174 | | - ("job-2022", "finished", "foo"), |
| 170 | + assert [ |
| 171 | + (r.id, r.status, r.backend_name, r.cpu, r.memory, r.duration, r.costs) |
| 172 | + for r in pd.read_csv(job_db_path).itertuples() |
| 173 | + ] == [ |
| 174 | + ("job-2018", "finished", "foo", "1234.5 cpu-seconds", "34567.89 mb-seconds", "2345 seconds", 123), |
| 175 | + ("job-2019", "finished", "foo", "1234.5 cpu-seconds", "34567.89 mb-seconds", "2345 seconds", 123), |
| 176 | + ("job-2020", "finished", "bar", "1234.5 cpu-seconds", "34567.89 mb-seconds", "2345 seconds", 123), |
| 177 | + ("job-2021", "finished", "bar", "1234.5 cpu-seconds", "34567.89 mb-seconds", "2345 seconds", 123), |
| 178 | + ("job-2022", "finished", "foo", "1234.5 cpu-seconds", "34567.89 mb-seconds", "2345 seconds", 123), |
175 | 179 | ] |
176 | 180 |
|
177 | | - assert not pd.read_csv(job_db_path)[["cpu", "memory", "duration", "costs"]].isnull().any().any() |
178 | | - |
179 | 181 | # Check downloaded results and metadata. |
180 | 182 | assert set(p.relative_to(job_manager_root_dir) for p in job_manager_root_dir.glob("**/*.*")) == { |
181 | 183 | Path(f"job_{job_id}") / filename |
@@ -206,7 +208,10 @@ def test_db_class(self, tmp_path, job_manager, job_manager_root_dir, sleep_mock, |
206 | 208 | assert len(result) == 5 |
207 | 209 | assert set(result.status) == {"finished"} |
208 | 210 | assert set(result.backend_name) == {"foo", "bar"} |
209 | | - assert not result[["cpu", "memory", "duration", "costs"]].isnull().any().any() |
| 211 | + assert set(result.cpu) == {"1234.5 cpu-seconds"} |
| 212 | + assert set(result.memory) == {"34567.89 mb-seconds"} |
| 213 | + assert set(result.duration) == {"2345 seconds"} |
| 214 | + assert set(result.costs) == {123} |
210 | 215 |
|
211 | 216 | @pytest.mark.parametrize( |
212 | 217 | ["filename", "expected_db_class"], |
@@ -257,16 +262,17 @@ def test_basic_threading(self, tmp_path, job_manager, job_manager_root_dir, slee |
257 | 262 | # TODO #645 how to collect stats with the threaded run_job? |
258 | 263 | assert sleep_mock.call_count > 10 |
259 | 264 |
|
260 | | - assert [(r.id, r.status, r.backend_name) for r in pd.read_csv(job_db_path).itertuples()] == [ |
261 | | - ("job-2018", "finished", "foo"), |
262 | | - ("job-2019", "finished", "foo"), |
263 | | - ("job-2020", "finished", "bar"), |
264 | | - ("job-2021", "finished", "bar"), |
265 | | - ("job-2022", "finished", "foo"), |
| 265 | + assert [ |
| 266 | + (r.id, r.status, r.backend_name, r.cpu, r.memory, r.duration, r.costs) |
| 267 | + for r in pd.read_csv(job_db_path).itertuples() |
| 268 | + ] == [ |
| 269 | + ("job-2018", "finished", "foo", "1234.5 cpu-seconds", "34567.89 mb-seconds", "2345 seconds", 123), |
| 270 | + ("job-2019", "finished", "foo", "1234.5 cpu-seconds", "34567.89 mb-seconds", "2345 seconds", 123), |
| 271 | + ("job-2020", "finished", "bar", "1234.5 cpu-seconds", "34567.89 mb-seconds", "2345 seconds", 123), |
| 272 | + ("job-2021", "finished", "bar", "1234.5 cpu-seconds", "34567.89 mb-seconds", "2345 seconds", 123), |
| 273 | + ("job-2022", "finished", "foo", "1234.5 cpu-seconds", "34567.89 mb-seconds", "2345 seconds", 123), |
266 | 274 | ] |
267 | 275 |
|
268 | | - assert not pd.read_csv(job_db_path)[["cpu", "memory", "duration", "costs"]].isnull().any().any() |
269 | | - |
270 | 276 | # Check downloaded results and metadata. |
271 | 277 | assert set(p.relative_to(job_manager_root_dir) for p in job_manager_root_dir.glob("**/*.*")) == { |
272 | 278 | Path(f"job_{job_id}") / filename |
@@ -339,15 +345,16 @@ def start_worker_thread(): |
339 | 345 | ) |
340 | 346 |
|
341 | 347 | # Also check that we got sensible end results in the job db. |
342 | | - result = pd.read_csv(job_db_path) |
343 | | - assert [(r.id, r.status, r.backend_name) for r in result.itertuples()] == [ |
344 | | - ("job-2018", "finished", "foo"), |
345 | | - ("job-2019", "finished", "foo"), |
346 | | - ("job-2020", "finished", "bar"), |
347 | | - ("job-2021", "finished", "bar"), |
348 | | - ("job-2022", "error", "foo"), |
| 348 | + results = pd.read_csv(job_db_path).replace({np.nan: None}) # np.nan's are replaced by None for easy comparison |
| 349 | + assert [ |
| 350 | + (r.id, r.status, r.backend_name, r.cpu, r.memory, r.duration, r.costs) for r in results.itertuples() |
| 351 | + ] == [ |
| 352 | + ("job-2018", "finished", "foo", "1234.5 cpu-seconds", "34567.89 mb-seconds", "2345 seconds", 123), |
| 353 | + ("job-2019", "finished", "foo", "1234.5 cpu-seconds", "34567.89 mb-seconds", "2345 seconds", 123), |
| 354 | + ("job-2020", "finished", "bar", "1234.5 cpu-seconds", "34567.89 mb-seconds", "2345 seconds", 123), |
| 355 | + ("job-2021", "finished", "bar", "1234.5 cpu-seconds", "34567.89 mb-seconds", "2345 seconds", 123), |
| 356 | + ("job-2022", "error", "foo", None, None, None, None), |
349 | 357 | ] |
350 | | - assert not result[result["status"] == "finished"][["cpu", "memory", "duration", "costs"]].isnull().any().any() |
351 | 358 |
|
352 | 359 | # Check downloaded results and metadata. |
353 | 360 | assert set(p.relative_to(job_manager_root_dir) for p in job_manager_root_dir.glob("**/*.*")) == { |
|
0 commit comments