Skip to content

Commit 48e3c1b

Browse files
committed
Support for symbolic linking
1 parent ec6db72 commit 48e3c1b

File tree

7 files changed

+73
-9
lines changed

7 files changed

+73
-9
lines changed

ezfastq/api.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,12 @@ def copy(
1919
prefix="",
2020
workdir=Path("."),
2121
subdir="seq",
22+
link=False,
2223
verbose=False,
2324
):
24-
copier = FastqCopier.from_dir(sample_names, seq_path, prefix=prefix, pair_mode=pair_mode)
25+
copier = FastqCopier.from_dir(
26+
sample_names, seq_path, prefix=prefix, pair_mode=pair_mode, link=link
27+
)
2528
copier.copy_files(workdir / subdir)
2629
copier.print_copy_log()
2730
nlogs = len(list((workdir / subdir).glob("copy-log-*.toml")))

ezfastq/cli.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def main(arglist=None):
2424
prefix=args.prefix,
2525
workdir=args.workdir,
2626
subdir=args.subdir,
27+
link=args.link,
2728
verbose=args.verbose,
2829
)
2930

@@ -91,6 +92,12 @@ def get_parser():
9192
default=0,
9293
help="specify 1 to indicate that all samples are single-end, or 2 to indicate that all samples are paired-end; by default, read layout is inferred automatically on a per-sample basis",
9394
)
95+
parser.add_argument(
96+
"-l",
97+
"--link",
98+
action="store_true",
99+
help="symbolically link files rather than copying; only supported for gzip-compressed files",
100+
)
94101
parser.add_argument(
95102
"-V",
96103
"--verbose",

ezfastq/copier.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,16 @@ class FastqCopier:
4040
skipped_files: List
4141
file_map: SampleFastqMap
4242
prefix: str = ""
43+
link: bool = False
4344

4445
@classmethod
45-
def from_dir(cls, sample_names, data_path, prefix="", pair_mode=PairMode.Unspecified):
46+
def from_dir(
47+
cls, sample_names, data_path, prefix="", pair_mode=PairMode.Unspecified, link=False
48+
):
4649
copied_files = list()
4750
skipped_files = list()
4851
file_map = SampleFastqMap.new(sample_names, data_path, pair_mode=pair_mode)
49-
copier = cls(sorted(sample_names), copied_files, skipped_files, file_map, prefix)
52+
copier = cls(sorted(sample_names), copied_files, skipped_files, file_map, prefix, link)
5053
return copier
5154

5255
def copy_files(self, destination):
@@ -110,11 +113,13 @@ def __iter__(self):
110113
def __str__(self):
111114
output = StringIO()
112115
if len(self.copied_files) > 0:
113-
print("[CopiedFiles]", file=output)
116+
header = "[LinkedFiles]" if self.link else "[CopiedFiles]"
117+
print(header, file=output)
114118
for fastq in self.copied_files:
115119
print(fastq, file=output)
116120
if len(self.skipped_files) > 0:
117-
print("\n[SkippedFiles]\nalready_copied = [", file=output)
121+
key = "linked" if self.link else "copied"
122+
print(f"\n[SkippedFiles]\nalready_{key} = [", file=output)
118123
for fastq in self.skipped_files:
119124
print(f' "{fastq.source_path.name}",', file=output)
120125
print("]", file=output)

ezfastq/fastq.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,14 @@ class FastqFile:
2626
def __str__(self):
2727
return f'"{self.source_path.name}" = "{self.name}"'
2828

29-
def check_and_copy(self, destination):
29+
def check_and_copy(self, destination, link=False):
3030
destination = Path(destination)
3131
compressed_copy = destination / self.name
3232
if compressed_copy.is_file():
3333
return False
34+
elif link is True:
35+
self.link(destination)
36+
return True
3437
else:
3538
self.copy(destination)
3639
return True
@@ -42,6 +45,14 @@ def copy(self, destination):
4245
if self.extension == "fastq":
4346
run(["gzip", str(file_copy)])
4447

48+
def link(self, destination):
49+
if self.extension != "fastq.gz":
50+
message = "symbolic linking only supported for gzip-compressed files"
51+
raise LinkError(message)
52+
destination.mkdir(parents=True, exist_ok=True)
53+
sym_link = destination / self._working_name
54+
sym_link.symlink_to(self.source_path)
55+
4556
@property
4657
def name(self):
4758
return f"{self.stem}.fastq.gz"
@@ -58,3 +69,7 @@ def extension(self):
5869
@property
5970
def _working_name(self):
6071
return f"{self.stem}.{self.extension}"
72+
73+
74+
class LinkError(ValueError):
75+
pass

ezfastq/tests/test_cli.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,19 @@ def test_copy_verbose(tmp_path):
4545
assert Path(log_data["Paths"]["destination"]) == tmp_path / "seq"
4646

4747

48+
def test_link(tmp_path):
49+
seq_path = files("ezfastq") / "tests" / "data" / "flat"
50+
arglist = [seq_path, "test1", "test2", "--workdir", tmp_path, "--link"]
51+
cli.main(arglist)
52+
assert len(list((tmp_path / "seq").glob("*_R?.fastq.gz"))) == 4
53+
copy_log = tmp_path / "seq" / "copy-log-1.toml"
54+
with open(copy_log, "rb") as fh:
55+
log_data = tomllib.load(fh)
56+
assert len(log_data["LinkedFiles"]) == 4
57+
assert "CopiedFiles" not in log_data
58+
assert "SkippedFiles" not in log_data
59+
60+
4861
def test_copy_subdir(tmp_path):
4962
seq_path = files("ezfastq") / "tests" / "data" / "flat"
5063
arglist = [seq_path, "test1", "test2", "--workdir", tmp_path, "--subdir", "seq/PROJa/RUNb"]

ezfastq/tests/test_copier.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,24 @@ def test_copier_copy(tmp_path):
5757
assert len(copier3.skipped_files) == 3
5858

5959

60+
def test_copier_link(tmp_path):
61+
sample_names = ["test1", "test2"]
62+
copier = FastqCopier.from_dir(sample_names, SEQ_PATH_1, link=True)
63+
copier.copy_files(tmp_path)
64+
assert len(copier.copied_files) == 4
65+
destination = tmp_path / "seq"
66+
assert all(fq.is_symlink() for fq in destination.glob("*.fastq.gz"))
67+
observed = str(copier)
68+
expected = """
69+
[LinkedFiles]
70+
"test1_S1_L001_R1_001.fastq.gz" = "test1_R1.fastq.gz"
71+
"test1_S1_L001_R2_001.fastq.gz" = "test1_R2.fastq.gz"
72+
"test2_R1.fq.gz" = "test2_R1.fastq.gz"
73+
"test2_R2.fq.gz" = "test2_R2.fastq.gz"
74+
"""
75+
assert observed.strip() == expected.strip()
76+
77+
6078
def test_copier_prefix(tmp_path):
6179
sample_names = ["test2", "test3"]
6280
copier = FastqCopier.from_dir(sample_names, SEQ_PATH_1, prefix="abc_")

ezfastq/tests/test_fastq.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,16 @@ def test_fastq_file_copy(tmp_path):
4343
assert file_copy.is_file()
4444

4545

46-
def test_fastq_file_check_and_copy(tmp_path):
46+
@pytest.mark.parametrize("link_mode", [True, False])
47+
def test_fastq_file_check_and_copy(tmp_path, link_mode):
4748
destination = tmp_path / "seq"
4849
inpath = files("ezfastq") / "tests" / "data" / "flat" / "test1_S1_L001_R2_001.fastq.gz"
4950
infile = FastqFile(inpath, "test1", 2)
50-
was_copied = infile.check_and_copy(destination)
51+
was_copied = infile.check_and_copy(destination, link=link_mode)
5152
assert was_copied
5253
file_copy = tmp_path / "seq" / "test1_R2.fastq.gz"
5354
assert file_copy.is_file()
54-
was_copied = infile.check_and_copy(destination)
55+
if link_mode is True:
56+
assert file_copy.is_symlink()
57+
was_copied = infile.check_and_copy(destination, link=link_mode)
5558
assert not was_copied

0 commit comments

Comments
 (0)