Skip to content

Commit e60b055

Browse files
authored
Merge pull request #460 from kedhammar/improve-finding-run-dirs
Improve identifying run dirs
2 parents 048c781 + d7b8a92 commit e60b055

File tree

2 files changed

+24
-37
lines changed

2 files changed

+24
-37
lines changed

VERSIONLOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# TACA Version Log
22

3+
## 20250122.2
4+
5+
Improve the way TACA identifies run dirs in the "bioinfo_deliveries --update" command (bioinfo_tab.py).
6+
37
## 20250122.1
48

59
Ruff formatting.

taca/utils/bioinfo_tab.py

Lines changed: 20 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -26,52 +26,36 @@ def __init__(self, value=None):
2626

2727
def collect_runs():
2828
"""Update command."""
29-
found_runs = {"illumina": [], "element": []}
29+
3030
# Pattern explained:
3131
# 6-8Digits_(maybe ST-)AnythingLetterornumberNumber_Number_AorBLetterornumberordash
3232
illumina_rundir_re = re.compile("\d{6,8}_[ST-]*\w+\d+_\d+_[AB]?[A-Z0-9\-]+")
33+
# E.g. 20250121_AV242106_B2425434199
34+
element_rundir_re = re.compile("\d{8}_AV242106_[AB]\d+")
35+
3336
for inst_brand in CONFIG["bioinfo_tab"]["data_dirs"]:
3437
for data_dir in CONFIG["bioinfo_tab"]["data_dirs"][inst_brand]:
3538
if os.path.exists(data_dir):
3639
potential_run_dirs = glob.glob(os.path.join(data_dir, "*"))
37-
for run_dir in potential_run_dirs:
38-
if os.path.isdir(run_dir):
39-
if inst_brand == "illumina" and illumina_rundir_re.match(
40-
os.path.basename(os.path.abspath(run_dir))
41-
):
42-
found_runs[inst_brand].append(os.path.basename(run_dir))
43-
logger.info(f"Working on {run_dir}")
44-
update_statusdb(run_dir, inst_brand)
45-
elif inst_brand == "element":
46-
# Skip no sync dirs, they will be checked below
47-
if run_dir == os.path.join(data_dir, "nosync"):
48-
continue
49-
logger.info(f"Working on {run_dir}")
50-
update_statusdb(run_dir, inst_brand)
51-
elif inst_brand == "ont":
52-
# Skip archived, no_backup, nosync and qc folders
53-
if re.match(
54-
ONT_RUN_PATTERN,
55-
os.path.basename(os.path.abspath(run_dir)),
56-
):
57-
logger.info(f"Working on {run_dir}")
58-
update_statusdb(run_dir, inst_brand)
40+
potential_run_dirs += glob.glob(os.path.join(data_dir, "nosync", "*"))
5941

60-
nosync_data_dir = os.path.join(data_dir, "nosync")
61-
potential_nosync_run_dirs = glob.glob(
62-
os.path.join(nosync_data_dir, "*")
63-
)
64-
for run_dir in potential_nosync_run_dirs:
42+
for run_dir in potential_run_dirs:
6543
if os.path.isdir(run_dir):
6644
if (
67-
inst_brand == "illumina"
68-
and illumina_rundir_re.match(
69-
os.path.basename(os.path.abspath(run_dir))
45+
(
46+
inst_brand == "illumina"
47+
and illumina_rundir_re.match(os.path.basename(run_dir))
7048
)
71-
) or (inst_brand == "element" or inst_brand == "ont"):
72-
# Skip archived dirs
73-
if run_dir == os.path.join(nosync_data_dir, "archived"):
74-
continue
49+
or (
50+
inst_brand == "element"
51+
and element_rundir_re.match(os.path.basename(run_dir))
52+
)
53+
or (
54+
inst_brand == "ont"
55+
and ONT_RUN_PATTERN.match(os.path.basename(run_dir))
56+
)
57+
):
58+
logger.info(f"Working on {run_dir}")
7559
update_statusdb(run_dir, inst_brand)
7660

7761

@@ -89,7 +73,6 @@ def update_statusdb(run_dir, inst_brand):
8973
# WARNING - Run parameters file not found for ElementRun(<run_dir>), might not be ready yet
9074
return
9175
elif inst_brand == "ont":
92-
run_dir = os.path.abspath(run_dir)
9376
try:
9477
ont_run = ONT_run(run_dir)
9578
except AssertionError as e:
@@ -320,7 +303,7 @@ def get_ss_projects_illumina(run_dir):
320303
proj_tree = Tree()
321304
lane_pattern = re.compile("^([1-8]{1,2})$")
322305
sample_proj_pattern = re.compile("^((P[0-9]{3,5})_[0-9]{3,5})")
323-
run_name = os.path.basename(os.path.abspath(run_dir))
306+
run_name = os.path.basename(run_dir)
324307
run_date = run_name.split("_")[0]
325308
if len(run_date) == 6:
326309
current_year = "20" + run_date[0:2]

0 commit comments

Comments
 (0)