|
4 | 4 | import logging
|
5 | 5 | import os
|
6 | 6 | import re
|
| 7 | +import resource |
7 | 8 | import shutil
|
8 | 9 | import subprocess # nosec
|
9 | 10 | import sys
|
10 | 11 | import tempfile
|
| 12 | +import textwrap |
11 | 13 | import threading
|
12 | 14 | import time
|
13 | 15 | import uuid
|
@@ -529,6 +531,15 @@ def run(
|
529 | 531 | tmpdir_lock: Optional[threading.Lock] = None,
|
530 | 532 | ) -> None:
|
531 | 533 |
|
| 534 | + # attempt to set an "unlimited" (-1) heap size for this process |
| 535 | + # (& thus commandline size) on any system that supports it |
| 536 | + # TODO: Do containers inherit the processes's limits? |
| 537 | + # Can they be configured from outside of the container? |
| 538 | + try: |
| 539 | + resource.setrlimit(resource.RLIMIT_DATA, (-1, -1)) |
| 540 | + except Exception: |
| 541 | + pass |
| 542 | + |
532 | 543 | if tmpdir_lock:
|
533 | 544 | with tmpdir_lock:
|
534 | 545 | if not os.path.exists(self.tmpdir):
|
@@ -589,6 +600,20 @@ def run(
|
589 | 600 |
|
590 | 601 | class ContainerCommandLineJob(JobBase, metaclass=ABCMeta):
|
591 | 602 | """Commandline job using containers."""
|
| 603 | + def __init__( |
| 604 | + self, |
| 605 | + builder: Builder, |
| 606 | + joborder: CWLObjectType, |
| 607 | + make_path_mapper: Callable[..., PathMapper], |
| 608 | + requirements: List[CWLObjectType], |
| 609 | + hints: List[CWLObjectType], |
| 610 | + name: str, |
| 611 | + ) -> None: |
| 612 | + super(JobBase, self).__init__( |
| 613 | + builder, joborder, make_path_mapper, requirements, hints, name |
| 614 | + ) |
| 615 | + self.universal_file_bindmount_dir = None |
| 616 | + self.bindings_map = None |
592 | 617 |
|
593 | 618 | @abstractmethod
|
594 | 619 | def get_from_requirements(
|
@@ -710,6 +735,41 @@ def add_volumes(
|
710 | 735 | )
|
711 | 736 | pathmapper.update(key, new_path, vol.target, vol.type, vol.staged)
|
712 | 737 |
|
| 738 | + # Dir of individual file inputs for the job (all named as uuid4). |
| 739 | + # This creates the same dir inside of the container as exists outside of it, |
| 740 | + # Overlayfs must be supported/enabled (which should always be true for CWL). |
| 741 | + src = dst = self.universal_file_bindmount_dir |
| 742 | + runtime.append(f"--bind={src}:{dst}:rw") |
| 743 | + |
| 744 | + # Make a TSV of the file mappings. |
| 745 | + mapping_tsv = os.path.join(self.universal_file_bindmount_dir, 'mapping.tsv') |
| 746 | + with open(mapping_tsv, 'w') as f: |
| 747 | + # 1. Sort by the destination path, which should sort alphabetically |
| 748 | + # and by shortest path first. |
| 749 | + # 2. Then, when we go to hardlink the files, we |
| 750 | + # should then just be able to hardlink them in order. |
| 751 | + for (src, dst, writable) in sorted(self.bindings_map, key=lambda x: len(x[1])): |
| 752 | + f.write('\t'.join((src, dst, writable)) + '\n') |
| 753 | + |
| 754 | + # Make the script that uses the TSV file mappings to hardlink everything |
| 755 | + # inside of the container to where the job expects to find them. |
| 756 | + # This script needs to be the first thing run inside of the container. |
| 757 | + linking_script = os.path.join(self.universal_file_bindmount_dir, 'hard_linking_script.py') |
| 758 | + # TODO: Write in bash instead. All images might not have python. |
| 759 | + with open(linking_script, 'w') as f: |
| 760 | + f.write(textwrap.dedent(f""" |
| 761 | + import os |
| 762 | +
|
| 763 | + with open('{mapping_tsv}', 'r') as f: |
| 764 | + for line in f: |
| 765 | + src, dst, writable = line.split('\\t') |
| 766 | + os.makedirs(os.path.dirname(dst), exist_ok=True) |
| 767 | + os.link(src, dst) |
| 768 | + # TODO: set the permissions on the file here after linking |
| 769 | +
|
| 770 | + """[1:])) |
| 771 | + os.chmod(linking_script, 0o777) |
| 772 | + |
713 | 773 | def run(
|
714 | 774 | self,
|
715 | 775 | runtimeContext: RuntimeContext,
|
|
0 commit comments