|
37 | 37 |
|
38 | 38 | # From a fully qualified file nam (i.e. everything sub-bucket name level), get the job id. |
39 | 39 | def get_job_id(filename: str) -> int: |
40 | | - tmp = "" |
| 40 | + tmp = get_filename_without_approve_dir(filename) |
| 41 | + return int(tmp.split("/")[0]) |
| 42 | + |
| 43 | + |
| 44 | +# Remove the approved or unapproved prefix as that isn't a property of the filename itself |
| 45 | +# and may change if the file gets approved or unapproved. |
| 46 | +def get_filename_without_approve_dir(filename: str) -> int: |
41 | 47 | if filename.startswith("approved/"): |
42 | | - tmp = filename.removeprefix("approved/") |
| 48 | + return filename.removeprefix("approved/") |
43 | 49 | elif filename.startswith("unapproved/"): |
44 | | - tmp = filename.removeprefix("unapproved/") |
| 50 | + return filename.removeprefix("unapproved/") |
45 | 51 | else: |
46 | | - raise ValueError("Unexpected filename structure.") |
47 | | - return int(tmp.split("/")[0]) |
| 52 | + return filename |
48 | 53 |
|
49 | 54 |
|
| 55 | +# This should of course be called before any prefix stripping. |
50 | 56 | def is_file_approved(filename: str) -> bool: |
51 | 57 | if filename.startswith("approved/"): |
52 | 58 | return True |
@@ -77,6 +83,8 @@ def update_db_from_bucket(inst_id: str, session, storage_control): |
77 | 83 | if not f.endswith(".png") and not f.endswith(".csv"): |
78 | 84 | continue |
79 | 85 | file_approved = is_file_approved(f) |
| 86 | + # We strip the approved/unapproved prefix since the file can move between the two and should still be considered one file. |
| 87 | + f = get_filename_without_approve_dir(f) |
80 | 88 | # Check if that file already exists in the table, otherwise add it. |
81 | 89 | query_result = session.execute( |
82 | 90 | select(FileTable).where( |
|
0 commit comments