diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 689d69e..55a4046 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,7 +2,7 @@
 
 ## Helping Out
 
-There are `TODOs` that better enhance the reproducability of datasets or analysis of algorithm outputs, as well as
+There are `TODOs` that better enhance the reproducability and accuracy of datasets or analysis of algorithm outputs, as well as
 [open resolvable issues](https://github.com/Reed-CompBio/spras-benchmarking/).
 
 ## Adding a dataset
diff --git a/cache/.gitignore b/cache/.gitignore
new file mode 100644
index 0000000..de153db
--- /dev/null
+++ b/cache/.gitignore
@@ -0,0 +1 @@
+artifacts
diff --git a/cache/README.md b/cache/README.md
new file mode 100644
index 0000000..d997004
--- /dev/null
+++ b/cache/README.md
@@ -0,0 +1,3 @@
+# cache
+
+Handles artifact fetching and cache. This folder has a `Snakefile` which only contains a single function used for producing fetching rules.
diff --git a/cache/Snakefile b/cache/Snakefile
new file mode 100644
index 0000000..2d22236
--- /dev/null
+++ b/cache/Snakefile
@@ -0,0 +1,34 @@
+from cache import link
+from cache.util import uncompress
+import urllib.parse
+from dataclasses import dataclass
+from typing import Union
+from pathlib import Path
+
+@dataclass
+class FetchConfig:
+    directive: list[str]
+    uncompress: bool = False
+
+def produce_fetch_rules(input_dict: dict[str, Union[FetchConfig, list[str]]]):
+    """
+    Produces fetch rules based on a dictionary mapping
+    output files to their directory.py-based directive.
+    """
+    # Map inputs to be wrapped with FetchConfig if list[str]
+    input_dict = {k: FetchConfig(v) if isinstance(v, list) else v for k, v in input_dict.items()}
+
+    directives = [urllib.parse.quote_plus("/".join(directive.directive)) for directive in input_dict.values()]
+    assert len(directives) == len(set(directives)), "Directives aren't unique!"
+
+    for output_file, config in input_dict.items():
+        # Since placeholders are evaluated when the job is actually ran,
+        # we pass data using params and output.
+        rule:
+            name: f"fetch_{urllib.parse.quote_plus("/".join(config.directive))}_to_{urllib.parse.quote_plus(output_file)}"
+            output: file=output_file
+            params:
+                config=config
+            run:
+                Path(output.file).parent.mkdir(exist_ok=True)
+                link(Path(output.file), params.config.directive, uncompress=params.config.uncompress)
diff --git a/cache/__init__.py b/cache/__init__.py
index e69de29..2f15fe4 100644
--- a/cache/__init__.py
+++ b/cache/__init__.py
@@ -0,0 +1,82 @@
+"""
+This is how spras-benchmarking handles artifact caching. `cache` should be used specifically inside `Snakefile`
+"""
+
+from cache.util import uncompress as uncompress_file
+from cache.directory import get_cache_item
+from pathlib import Path
+import os
+from urllib.parse import quote_plus
+import pickle
+
+__all__ = ["link"]
+
+dir_path = Path(os.path.dirname(os.path.realpath(__file__)))
+artifacts_dir = dir_path / "artifacts"
+
+def get_artifact_name(directive: list[str]) -> str:
+    return quote_plus("/".join(directive))
+
+def has_expired(directive: list[str]) -> bool:
+    """
+    Check if the artifact metadata associated with a directive has expired.
+    Avoids re-downloading the artifact if nothing has changed.
+    """
+    artifact_name = get_artifact_name(directive)
+    cache_item = get_cache_item(directive)
+
+    metadata_dir = artifacts_dir / 'metadata'
+    metadata_dir.mkdir(exist_ok=True)
+    metadata_file = (artifacts_dir / 'metadata' / artifact_name).with_suffix((artifacts_dir / artifact_name).suffix + '.metadata')
+
+    # metadata never existed: we need to retrieve the new file
+    if not metadata_file.exists():
+        with open(metadata_file, 'wb') as f:
+            pickle.dump(cache_item, f)
+        return True
+
+    old_cache_item = None
+    with open(metadata_file, 'rb') as f:
+        old_cache_item = pickle.load(f)
+
+    # metadata expired: re-retrieve the item
+    if old_cache_item != cache_item:
+        with open(metadata_file, 'wb') as f:
+            pickle.dump(cache_item, f)
+        return True
+
+    # metadata hasn't changed and already existed: this hasn't expired
+    return False
+
+def link(output: str, directive: list[str], uncompress=False):
+    """
+    Links output files from cache.directory directives.
+    For example,
+
+    ```py
+    link("output/ensg-ensp.tsv", ["BioMart", "ensg-ensp.tsv"])
+    ```
+
+    would download and check BioMart's cache for ENSG-ENSP mapping, then symlink the cached output
+    (lying somewhere in the cache folder) with the desired `output`.
+    """
+
+    artifacts_dir.mkdir(exist_ok=True)
+
+    artifact_name = get_artifact_name(directive)
+
+    Path(output).unlink(missing_ok=True)
+
+    # Re-download if the directive has expired.
+    cache_item = get_cache_item(directive)
+    if has_expired(directive):
+        (artifacts_dir / artifact_name).unlink(missing_ok=True)
+        cache_item.download(artifacts_dir / artifact_name)
+
+    if uncompress:
+        uncompressed_artifact_path = Path(str(artifacts_dir / artifact_name) + '.uncompressed')
+        uncompressed_artifact_path.unlink(missing_ok=True)
+        uncompress_file(artifacts_dir / artifact_name, uncompressed_artifact_path)
+        Path(output).symlink_to(uncompressed_artifact_path)
+    else:
+        Path(output).symlink_to(artifacts_dir / artifact_name)
diff --git a/cache/biomart/README.md b/cache/biomart/README.md
index fbc9dde..d5d85c5 100644
--- a/cache/biomart/README.md
+++ b/cache/biomart/README.md
@@ -1,3 +1,4 @@
 # BioMart XML Queries
 
 Directory for storing XML queries generated from [the BioMart interface](https://www.ensembl.org/info/data/biomart/index.html).
+See the martview: https://www.ensembl.org/biomart/martview.
diff --git a/cache/directory.py b/cache/directory.py
index c0adeec..5df6f1b 100644
--- a/cache/directory.py
+++ b/cache/directory.py
@@ -12,6 +12,7 @@
 
 dir_path = Path(os.path.dirname(os.path.realpath(__file__)))
 
+
 def fetch_biomart_url(xml: str) -> str:
     """
     Access BioMart data through the BioMart REST API:
@@ -20,16 +21,31 @@ def fetch_biomart_url(xml: str) -> str:
     ROOT = "http://www.ensembl.org/biomart/martservice?query="
     return ROOT + urllib.parse.quote_plus(xml)
 
+
 @dataclass
 class CacheItem:
     """Class for differentriating between offline and online items in a cache."""
 
+    name: str
+    """The display name of the artifact, used for human-printing."""
     cached: str
     online: str
 
+    @classmethod
+    def cache_only(cls, name: str, cached: str) -> "CacheItem":
+        """Wrapper method to explicitly declare a CacheItem as cached only."""
+        return cls(name=name, online=cached, cached="")
+
     def download(self, output: str | PathLike):
+        print(f"Fetching {self.name}...")
         print(f"Downloading {self.online}...")
 
+        if self.cached == "":
+            # From CacheItem.cached_only
+            # (gdown doesn't take in Paths for the output_file, so we must stringify it here)
+            gdown.download(self.online, str(output))
+            return
+
         urllib.request.urlretrieve(self.online, output)
 
         with NamedTemporaryFile() as cached_file:
@@ -45,14 +61,16 @@ def download(self, output: str | PathLike):
 directory: CacheDirectory = {
     "STRING": {
         "9606": {
-            "links": CacheItem(
+            "9606.protein.links.txt.gz": CacheItem(
+                name="STRING 9606 protein links",
                 cached="https://drive.google.com/uc?id=1fvjdIbgzbgJrdJxWRRRwwS1zuegf6DOj",
                 online="http://stringdb-downloads.org/download/protein.links.v12.0/9606.protein.links.v12.0.txt.gz",
             ),
-            "aliases": CacheItem(
+            "9606.protein.aliases.txt.gz": CacheItem(
+                name="STRING 9606 protein aliases",
                 cached="https://drive.google.com/uc?id=1IWrQeTVCcw1A-jDk-4YiReWLnwP0S9bY",
                 online="https://stringdb-downloads.org/download/protein.aliases.v12.0/9606.protein.aliases.v12.0.txt.gz",
-            )
+            ),
         }
     },
     "UniProt": {
@@ -60,67 +78,80 @@ def download(self, output: str | PathLike):
         "9606": {
             # We prefer manually curated genes.
             "SwissProt_9606.tsv": CacheItem(
+                name="UniProt 9606 SwissProt genes",
                 cached="https://drive.google.com/uc?id=1h2Cl-60qcKse-djcsqlRXm_n60mVY7lk",
-                online="https://rest.uniprot.org/uniprotkb/stream?fields=accession%2Cid%2Cprotein_name%2Cgene_names&format=tsv&query=%28*%29+AND+%28reviewed%3Atrue%29+AND+%28model_organism%3A9606%29"
+                online="https://rest.uniprot.org/uniprotkb/stream?fields=accession%2Cid%2Cprotein_name%2Cgene_names&format=tsv&query=%28*%29+AND+%28reviewed%3Atrue%29+AND+%28model_organism%3A9606%29",
             ),
             "HUMAN_9606_idmapping_selected.tab.gz": CacheItem(
+                name="UniProt 9606 ID external database mapping",
                 cached="https://drive.google.com/uc?id=1Oysa5COq31H771rVeyrs-6KFhE3VJqoX",
-                online="https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping_selected.tab.gz"
+                online="https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping_selected.tab.gz",
             ),
             "HUMAN_9606_idmapping.dat.gz": CacheItem(
+                name="UniProt 9606 internal id mapping",
                 cached="https://drive.google.com/uc?id=1lGxrx_kGyNdupwIOUXzfIZScc7rQKP-O",
-                online="https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping.dat.gz"
-            )
+                online="https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/HUMAN_9606_idmapping.dat.gz",
+            ),
         }
     },
     "DISEASES": {
         # Instead of going through https://unmtid-shinyapps.net/shiny/tiga/, we use their
         # archived files directory instead.
         "tiga_gene-trait_stats.tsv": CacheItem(
+            name="TIGA data",
             cached="https://drive.google.com/uc?id=114qyuNDy4qdmYDHHJAW-yBeTxcGTDUnK",
             online="https://unmtid-dbs.net/download/TIGA/20250916/tiga_gene-trait_stats.tsv",
         ),
         "HumanDO.tsv": CacheItem(
+            name="Disease ontology data",
             cached="https://drive.google.com/uc?id=1lfB1DGJgrXTxP_50L6gGu_Nq6OyDjiIi",
             online="https://raw.githubusercontent.com/DiseaseOntology/HumanDiseaseOntology/016a4ec33d1a1508d669650086cd92ccebe138e6/DOreports/HumanDO.tsv",
         ),
         "human_disease_textmining_filtered.tsv": CacheItem(
+            name="DISEASES textmining channel",
             cached="https://drive.google.com/uc?id=1vD8KbT9sk04VEJx9r3_LglCTGYJdhN0D",
             online="https://download.jensenlab.org/human_disease_textmining_filtered.tsv",
         ),
         "human_disease_knowledge_filtered.tsv": CacheItem(
+            name="DISEASES knowledge channel",
             cached="https://drive.google.com/uc?id=1qGUnjVwF9-8p5xvp8_6CfVsbMSM_wkld",
             online="https://download.jensenlab.org/human_disease_knowledge_filtered.tsv",
         ),
     },
     "BioMart": {
         "ensg-ensp.tsv": CacheItem(
+            name="BioMart ENSG <-> ENSP mapping",
             cached="https://drive.google.com/uc?id=1-gPrDoluXIGydzWKjWEnW-nWhYu3YkHL",
-            online=fetch_biomart_url((dir_path / "biomart" / "ensg-ensp.xml").read_text())
+            online=fetch_biomart_url((dir_path / "biomart" / "ensg-ensp.xml").read_text()),
         )
     },
     "DepMap": {
         "OmicsProfiles.csv": CacheItem(
+            name="DepMap omics metadata",
             cached="https://drive.google.com/uc?id=1i54aKfO0Ci2QKLTNJnuQ_jgGhH4c9rTL",
-            online="https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2F2025-05-01-master-mapping-table-28c2.12%2Fpublic_release_date.2025-05-01.master_mapping_table.csv&dl_name=OmicsProfiles.csv&bucket=depmap-external-downloads"
+            online="https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2F2025-05-01-master-mapping-table-28c2.12%2Fpublic_release_date.2025-05-01.master_mapping_table.csv&dl_name=OmicsProfiles.csv&bucket=depmap-external-downloads",
         ),
         "CRISPRGeneDependency.csv": CacheItem(
+            name="DepMap gene dependency probability estimates",
             cached="https://drive.google.com/uc?id=122rWNqT_u3M7B_11WYZMtOLiPbBykkaz",
-            online="https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2F25q2-public-557c.3%2FCRISPRGeneDependency.csv&dl_name=CRISPRGeneDependency.csv&bucket=depmap-external-downloads"
+            online="https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2F25q2-public-557c.3%2FCRISPRGeneDependency.csv&dl_name=CRISPRGeneDependency.csv&bucket=depmap-external-downloads",
         ),
         "OmicsSomaticMutationsMatrixDamaging.csv": CacheItem(
+            name="DepMap genotyped matrix",
             cached="https://drive.google.com/uc?id=1W7N2H0Qi7NwmTmNChcwa2ZZ4WxAuz-Xh",
-            online="https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2Fpublic-25q2-c5ef.87%2FOmicsSomaticMutationsMatrixDamaging.csv&dl_name=OmicsSomaticMutationsMatrixDamaging.csv&bucket=depmap-external-downloads"
+            online="https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2Fpublic-25q2-c5ef.87%2FOmicsSomaticMutationsMatrixDamaging.csv&dl_name=OmicsSomaticMutationsMatrixDamaging.csv&bucket=depmap-external-downloads",
         ),
         "OmicsExpressionProteinCodingGenesTPMLogp1.csv": CacheItem(
+            name="DepMap model-level TPMs",
             cached="https://drive.google.com/uc?id=1P0m88eXJ8GPdru8h9oOcHPeXKU7ljIrP",
-            online="https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2Fpublic-25q2-c5ef.73%2FOmicsExpressionProteinCodingGenesTPMLogp1.csv&dl_name=OmicsExpressionProteinCodingGenesTPMLogp1.csv&bucket=depmap-external-downloads"
+            online="https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2Fpublic-25q2-c5ef.73%2FOmicsExpressionProteinCodingGenesTPMLogp1.csv&dl_name=OmicsExpressionProteinCodingGenesTPMLogp1.csv&bucket=depmap-external-downloads",
         ),
         "OmicsCNGeneWGS.csv": CacheItem(
+            name="DepMap gene-level copy number data",
             cached="https://drive.google.com/uc?id=1TPp3cfK7OZUrftucr3fLO-krXSQAA6Ub",
-            online="https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2Fpublic-25q2-c5ef.104%2FOmicsCNGeneWGS.csv&dl_name=OmicsCNGeneWGS.csv&bucket=depmap-external-downloads"
-        )
-    }
+            online="https://depmap.org/portal/download/api/download?file_name=downloads-by-canonical-id%2Fpublic-25q2-c5ef.104%2FOmicsCNGeneWGS.csv&dl_name=OmicsCNGeneWGS.csv&bucket=depmap-external-downloads",
+        ),
+    },
 }
 
 
diff --git a/cache/index.py b/cache/index.py
deleted file mode 100644
index 385a2ff..0000000
--- a/cache/index.py
+++ /dev/null
@@ -1 +0,0 @@
-# Artifact caching
diff --git a/databases/util.py b/cache/util.py
similarity index 99%
rename from databases/util.py
rename to cache/util.py
index 8ffb500..da17d2b 100644
--- a/databases/util.py
+++ b/cache/util.py
@@ -2,6 +2,7 @@
 import gzip
 import shutil
 
+
 def uncompress(source: Path, target: Path):
     """Uncompresses a .gz file"""
     # Uncompressing a .gz file: https://stackoverflow.com/a/44712152/7589775
diff --git a/databases/README.md b/databases/README.md
deleted file mode 100644
index a87fa4a..0000000
--- a/databases/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# databases
-
-A catalog of CLIs wrapping various common background PPI databases.
diff --git a/databases/__init__.py b/databases/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/databases/stringdb.py b/databases/stringdb.py
deleted file mode 100644
index 3c6717a..0000000
--- a/databases/stringdb.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import argparse
-import os
-from pathlib import Path
-from databases.util import uncompress
-
-from cache.directory import get_cache_item
-
-# https://stackoverflow.com/a/5137509/7589775
-dir_path = os.path.dirname(os.path.realpath(__file__))
-
-string_path = Path(dir_path, "string")
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(
-        prog="STRING DB Fetcher", description="Downloads specified STRING DB background interactomes from a specific organism."
-    )
-
-    parser.add_argument(
-        "-i",
-        "--id",
-        help="""
-                        The specified organism ID to use.
-                        See https://string-db.org/cgi/download for more info.
-                        For example, 9606 is the homo sapiens background interactome.
-                        For an example usage, see datasets/diseases's Snakefile.
-                        """,
-        type=int,
-        required=True,
-    )
-
-    return parser.parse_args()
-
-def main():
-    args = parse_args()
-    string_path.mkdir(exist_ok=True)
-
-    # We download the links file
-    links_file = string_path / f"{args.id}.protein.links.v12.0.txt.gz"
-    get_cache_item(["STRING", str(args.id), "links"]).download(links_file)
-    uncompress(links_file, links_file.with_suffix("")) # an extra call of with_suffix strips the `.gz` prefix
-
-    # and its associated aliases
-    aliases_file = string_path / f"{args.id}.protein.aliases.v12.0.txt.gz"
-    get_cache_item(["STRING", str(args.id), "aliases"]).download(aliases_file)
-    uncompress(aliases_file, aliases_file.with_suffix(""))
-
-if __name__ == "__main__":
-    main()
diff --git a/datasets/depmap/Snakefile b/datasets/depmap/Snakefile
index 2182663..b98db0d 100644
--- a/datasets/depmap/Snakefile
+++ b/datasets/depmap/Snakefile
@@ -1,3 +1,5 @@
+include: "../../cache/Snakefile"
+
 rule all:
     # We currently only care about the FADU cell line.
     input:
@@ -5,18 +7,16 @@ rule all:
         "processed/FADU_cell_line_prizes.txt",
         "processed/FADU_gold_standard_thresh_0_5.txt"
 
-rule fetch:
-    output:
-        "raw/CRISPRGeneDependency.csv",
-        "raw/OmicsProfiles.csv",
-        "raw/OmicsSomaticMutationsMatrixDamaging.csv",
-        "raw/OmicsExpressionProteinCodingGenesTPMLogp1.csv",
-        "raw/OmicsCNGeneWGS.csv",
-        "raw/HUMAN_9606_idmapping.tsv",
-        "raw/HUMAN_9606_idmapping_selected.tsv",
-        "raw/SwissProt_9606.tsv"
-    shell:
-        "uv run scripts/fetch.py"
+produce_fetch_rules({
+    "raw/CRISPRGeneDependency.csv": ["DepMap", "CRISPRGeneDependency.csv"],
+    "raw/OmicsProfiles.csv": ["DepMap", "OmicsProfiles.csv"],
+    "raw/OmicsSomaticMutationsMatrixDamaging.csv": ["DepMap", "OmicsSomaticMutationsMatrixDamaging.csv"],
+    "raw/OmicsExpressionProteinCodingGenesTPMLogp1.csv": ["DepMap", "OmicsExpressionProteinCodingGenesTPMLogp1.csv"],
+    "raw/OmicsCNGeneWGS.csv": ["DepMap", "OmicsCNGeneWGS.csv"],
+    "raw/HUMAN_9606_idmapping.tsv": FetchConfig(["UniProt", "9606", "HUMAN_9606_idmapping.dat.gz"], uncompress=True),
+    "raw/HUMAN_9606_idmapping_selected.tsv": FetchConfig(["UniProt", "9606", "HUMAN_9606_idmapping_selected.tab.gz"], uncompress=True),
+    "raw/SwissProt_9606.tsv": ["UniProt", "9606", "SwissProt_9606.tsv"],
+})
 
 rule mapping:
     input:
diff --git a/datasets/depmap/scripts/cell_line_processing.py b/datasets/depmap/scripts/cell_line_processing.py
index 0fad7d9..7ab5dbe 100644
--- a/datasets/depmap/scripts/cell_line_processing.py
+++ b/datasets/depmap/scripts/cell_line_processing.py
@@ -131,6 +131,7 @@ def process_single_cell_line(
         print(f"Processing for cell line '{cell_line_name}' completed successfully.")
         return True
 
+
 def generate_gold_standard(cell_line_name, model_id, CRISPR_dependency, gene_to_uniprot, threshold: float):
     """Generate gold standard file for the cell line based on CRISPR dependency and gene to Uniprot mapping."""
     # map Uniprot IDs to gene symbols in the CRISPR dependency data
diff --git a/datasets/depmap/scripts/fetch.py b/datasets/depmap/scripts/fetch.py
deleted file mode 100644
index b922312..0000000
--- a/datasets/depmap/scripts/fetch.py
+++ /dev/null
@@ -1,48 +0,0 @@
-"""
-Fetches the latest DepMap data we need
-
-Download page: https://depmap.org/portal/data_page/?tab=allData
-"""
-
-from pathlib import Path
-import os
-from cache.directory import get_cache_item
-from databases.util import uncompress
-
-# https://stackoverflow.com/a/5137509/7589775
-dir_path = os.path.dirname(os.path.realpath(__file__))
-
-raw_dir = Path(dir_path, "..", "raw")
-
-
-def main():
-    raw_dir.mkdir(exist_ok=True)
-
-    print("Fetching DepMap omics metadata")
-    get_cache_item(["DepMap", "OmicsProfiles.csv"]).download(raw_dir / "OmicsProfiles.csv")
-
-    print("Fetching DepMap gene dependency probability estimates...")
-    get_cache_item(["DepMap", "CRISPRGeneDependency.csv"]).download(raw_dir / "CRISPRGeneDependency.csv")
-
-    print("Fetching DepMap genotyped matrix...")
-    get_cache_item(["DepMap", "OmicsSomaticMutationsMatrixDamaging.csv"]).download(raw_dir / "OmicsSomaticMutationsMatrixDamaging.csv")
-
-    print("Fetching DepMap model-level TPMs...")
-    get_cache_item(["DepMap", "OmicsExpressionProteinCodingGenesTPMLogp1.csv"]).download(raw_dir / "OmicsExpressionProteinCodingGenesTPMLogp1.csv")
-
-    print("Fetching DepMap gene-level copy number data...")
-    get_cache_item(["DepMap", "OmicsCNGeneWGS.csv"]).download(raw_dir / "OmicsCNGeneWGS.csv")
-
-    print("Fetching UniProt internal id mapping...")
-    get_cache_item(["UniProt", "9606", "HUMAN_9606_idmapping.dat.gz"]).download(raw_dir / "HUMAN_9606_idmapping.dat.gz")
-    uncompress(raw_dir / "HUMAN_9606_idmapping.dat.gz", raw_dir / "HUMAN_9606_idmapping.tsv")
-
-    print("Fetching UniProt id external database mapping...")
-    get_cache_item(["UniProt", "9606", "HUMAN_9606_idmapping_selected.tab.gz"]).download(raw_dir / "HUMAN_9606_idmapping_selected.tab.gz")
-    uncompress(raw_dir / "HUMAN_9606_idmapping_selected.tab.gz", raw_dir / "HUMAN_9606_idmapping_selected.tsv")
-
-    print("Fetching UniProt SwissProt genes...")
-    get_cache_item(["UniProt", "9606", "SwissProt_9606.tsv"]).download(raw_dir / "SwissProt_9606.tsv")
-
-if __name__ == "__main__":
-    main()
diff --git a/datasets/depmap/scripts/uniprot_mapping.py b/datasets/depmap/scripts/uniprot_mapping.py
index 4198366..71fc1cb 100644
--- a/datasets/depmap/scripts/uniprot_mapping.py
+++ b/datasets/depmap/scripts/uniprot_mapping.py
@@ -15,10 +15,12 @@ def extract_gene_symbols(input_df: pd.DataFrame) -> pd.DataFrame:
     gene_columns = input_df.columns.tolist()[1:]
     gene_symbols = [
         # We want to extract GENE_NAME from GENE_NAME (Unknown)
-        (col[:col.find("(") - 1], None) if "(Unknown)" in col else
+        (col[: col.find("(") - 1], None)
+        if "(Unknown)" in col
         # or GENE_ID from "GENE_NAME (GENE_ID)"
-        (col[:col.find("(") - 1], col[col.find("(") + 1:-1]) if "(" in col else
-        (col, None)
+        else (col[: col.find("(") - 1], col[col.find("(") + 1 : -1])
+        if "(" in col
+        else (col, None)
         for col in gene_columns
     ]
 
@@ -50,19 +52,22 @@ def main():
     # while idmapping will be used for GeneSymbol -> UniProtKB-AC mapping.
 
     # We'll also take the idmapping data and trim for specifically Swiss-Prot (curated) genes.
-    curated_df = pd.read_csv(dir_path / ".." / "raw" / "SwissProt_9606.tsv", sep='\t', usecols=["Entry", "Entry Name", "Gene Names"])
+    curated_df = pd.read_csv(dir_path / ".." / "raw" / "SwissProt_9606.tsv", sep="\t", usecols=["Entry", "Entry Name", "Gene Names"])
     curated_df.columns = ["UniProtKB-AC", "Entry Name", "Gene Names"]
 
     idmapping_df = pd.read_csv(
-        dir_path / ".." / "raw" / "HUMAN_9606_idmapping.tsv",
-        header=None, names=["UniProtKB-AC", "ID_type", "Value"], sep='\t')
+        dir_path / ".." / "raw" / "HUMAN_9606_idmapping.tsv", header=None, names=["UniProtKB-AC", "ID_type", "Value"], sep="\t"
+    )
     idmapping_df = idmapping_df[idmapping_df["ID_type"] == "Gene_Name"].drop(columns=["ID_type"]).rename(columns={"Value": "GeneSymbol"})
     idmapping_df = idmapping_df.merge(curated_df, on="UniProtKB-AC", how="inner")
     gene_symbols_df_nid = gene_symbols_df_nid.merge(idmapping_df, on="GeneSymbol", how="inner").drop(columns=["GeneID"])
 
     idmapping_selected_df = pd.read_csv(
         dir_path / ".." / "raw" / "HUMAN_9606_idmapping_selected.tsv",
-        header=None, usecols=[0, 1, 2], names=["UniProtKB-AC", "UniProtKB-ID", "GeneID"], sep='\t'
+        header=None,
+        usecols=[0, 1, 2],
+        names=["UniProtKB-AC", "UniProtKB-ID", "GeneID"],
+        sep="\t",
     )
     idmapping_selected_df = idmapping_selected_df[~idmapping_selected_df["GeneID"].isna()]
     idmapping_selected_df = idmapping_selected_df.merge(curated_df, on="UniProtKB-AC", how="inner")
@@ -72,7 +77,7 @@ def main():
     gene_symbol_df = gene_symbols_df_id.merge(gene_symbols_df_nid, on=["GeneSymbol", "UniProtKB-AC", "Entry Name", "Gene Names"], how="outer")
     gene_symbol_df = gene_symbol_df.drop(columns=["Gene Names"])
     gene_symbol_df = gene_symbol_df.rename(columns={"GeneSymbol": "From"})
-    gene_symbol_df.to_csv(dir_path / ".." / "processed" / "DamagingMutations_idMapping.tsv", sep='\t', index=False)
+    gene_symbol_df.to_csv(dir_path / ".." / "processed" / "DamagingMutations_idMapping.tsv", sep="\t", index=False)
 
 
 if __name__ == "__main__":
diff --git a/datasets/diseases/Snakefile b/datasets/diseases/Snakefile
index fcaa100..0455b57 100644
--- a/datasets/diseases/Snakefile
+++ b/datasets/diseases/Snakefile
@@ -1,3 +1,5 @@
+include: "../../cache/Snakefile"
+
 rule all:
     input:
         "GS_files/Alopecia_areata_GS.txt",
@@ -5,27 +7,21 @@ rule all:
         "prize_files/alopecia_areata_prizes.txt",
         "prize_files/diabetes_mellitus_prizes.txt"
 
-rule of_db:
-    output:
-        "../../databases/string/9606.protein.links.v12.0.txt",
-        "../../databases/string/9606.protein.aliases.v12.0.txt"
-    shell:
-        "uv run ../../databases/stringdb.py --id 9606"
-
-rule fetch:
-    output:
-        "raw/human_disease_knowledge_filtered.tsv",
-        "raw/human_disease_textmining_filtered.tsv",
-        "raw/HumanDO.tsv",
-        "raw/tiga_gene-trait_stats.tsv"
-    shell:
-        "uv run scripts/fetch.py"
+produce_fetch_rules({
+    "raw/human_disease_textmining_filtered.tsv": ["DISEASES", "human_disease_textmining_filtered.tsv"],
+    "raw/human_disease_knowledge_filtered.tsv": ["DISEASES", "human_disease_knowledge_filtered.tsv"],
+    "raw/HumanDO.tsv": ["DISEASES", "HumanDO.tsv"],
+    "raw/tiga_gene-trait_stats.tsv": ["DISEASES", "tiga_gene-trait_stats.tsv"],
+    "raw/ensg-ensp.tsv": ["BioMart", "ensg-ensp.tsv"],
+    "raw/9606.protein.links.txt": FetchConfig(["STRING", "9606", "9606.protein.links.txt.gz"], uncompress=True),
+    "raw/9606.protein.aliases.txt": FetchConfig(["STRING", "9606", "9606.protein.aliases.txt.gz"], uncompress=True),
+})
 
 rule inputs:
     input:
         "raw/HumanDO.tsv",
         "raw/tiga_gene-trait_stats.tsv",
-        "../../databases/string/9606.protein.aliases.v12.0.txt"
+        "raw/9606.protein.aliases.txt"
     output:
         "data/inputs.csv"
     shell:
@@ -35,7 +31,8 @@ rule gold_standard:
     input:
         "raw/human_disease_knowledge_filtered.tsv",
         "raw/human_disease_textmining_filtered.tsv",
-        "../../databases/string/9606.protein.aliases.v12.0.txt"
+        "raw/9606.protein.aliases.txt",
+        "raw/ensg-ensp.tsv"
     output:
         "data/gold_standard.csv"
     shell:
@@ -45,7 +42,7 @@ rule files:
     input:
         "data/inputs.csv",
         "data/gold_standard.csv",
-        "../../databases/string/9606.protein.links.v12.0.txt"
+        "raw/9606.protein.links.txt"
     output:
         # These are the two we use for the SPRAS run for now
         "GS_files/Alopecia_areata_GS.txt",
diff --git a/datasets/diseases/scripts/fetch.py b/datasets/diseases/scripts/fetch.py
deleted file mode 100644
index 8daf58d..0000000
--- a/datasets/diseases/scripts/fetch.py
+++ /dev/null
@@ -1,44 +0,0 @@
-"""
-Fetches the latest DISEASES database channels, TIGA data, and human disease ontology data that we need.
-
-Download pages:
-- DISEASES: https://diseases.jensenlab.org/Downloads
-- TIGA: https://unmtid-shinyapps.net/shiny/tiga/
-- Disease Ontology: https://disease-ontology.org/downloads/
-"""
-
-from pathlib import Path
-import os
-from cache.directory import get_cache_item
-
-# https://stackoverflow.com/a/5137509/7589775
-dir_path = os.path.dirname(os.path.realpath(__file__))
-
-raw_dir = Path(dir_path, "..", "raw")
-
-
-def main():
-    # We only need the text mining and knowledge channels
-    # and avoid the integrated channel as it is the multiplied probabilities of all
-    # three channels (personal correspondence with Damian Szklarczyk)
-
-    raw_dir.mkdir(exist_ok=True)
-
-    print("Fetching DISEASES text channel...")
-    get_cache_item(["DISEASES", "human_disease_textmining_filtered.tsv"]).download(raw_dir / "human_disease_textmining_filtered.tsv")
-
-    print("Fetching DISEASES knowledge channel...")
-    get_cache_item(["DISEASES", "human_disease_knowledge_filtered.tsv"]).download(raw_dir / "human_disease_knowledge_filtered.tsv")
-
-    print("Fetching TIGA data...")
-    get_cache_item(["DISEASES", "tiga_gene-trait_stats.tsv"]).download(raw_dir / "tiga_gene-trait_stats.tsv")
-
-    print("Fetching human disease ontology data...")
-    get_cache_item(["DISEASES", "HumanDO.tsv"]).download(raw_dir / "HumanDO.tsv")
-
-    print("Fetching BioMart ENSG - ENSP mapping...")
-    get_cache_item(["BioMart", "ensg-ensp.tsv"]).download(raw_dir / "ensg-ensp.tsv")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/datasets/diseases/scripts/files.py b/datasets/diseases/scripts/files.py
index 2897c94..dc5a949 100644
--- a/datasets/diseases/scripts/files.py
+++ b/datasets/diseases/scripts/files.py
@@ -40,9 +40,9 @@ def main():
         df = df[["str_id"]]
         df.to_csv(diseases_path / "GS_files" / f"{disease.replace(' ', '_')}_GS.txt", sep="\t", index=False, header=None)
 
-    # See /databases/stringdb.py for information on how this was grabbed.
+    # See /cache/directory.py for information on how this was grabbed.
     # 9606 is the organism code for homo sapiens and the required background interactome of DISEASES.
-    string = pd.read_csv(diseases_path / ".." / ".." / "databases" / "string" / "9606.protein.links.v12.0.txt", sep=" ", skiprows=[0], header=None)
+    string = pd.read_csv(diseases_path / "raw" / "9606.protein.links.txt", sep=" ", skiprows=[0], header=None)
 
     # Threshold anything above a confidence score of 900 to trim down the background interactome
     string = string[string.iloc[:, 2] > 900]
diff --git a/datasets/diseases/scripts/gold_standard.py b/datasets/diseases/scripts/gold_standard.py
index 1d3ee5f..a60b20c 100644
--- a/datasets/diseases/scripts/gold_standard.py
+++ b/datasets/diseases/scripts/gold_standard.py
@@ -65,9 +65,7 @@ def main():
     # NOTE: the STRING API call to map genes to proteins
     # also does text search, which brings up more false positives than true positives: because
     # of this, we specifically only care about ENSG -> ENSP and nothing greater.
-    string_aliases = pd.read_csv(
-        diseases_path / ".." / ".." / "databases" / "string" / "9606.protein.aliases.v12.0.txt", sep="\t", usecols=["#string_protein_id", "alias"]
-    )
+    string_aliases = pd.read_csv(diseases_path / "raw" / "9606.protein.aliases.txt", sep="\t", usecols=["#string_protein_id", "alias"])
     string_aliases.columns = ["str_id", "ENSP"]
     string_aliases = string_aliases.drop_duplicates()
 
diff --git a/datasets/diseases/scripts/inputs.py b/datasets/diseases/scripts/inputs.py
index 18608f2..ba35396 100644
--- a/datasets/diseases/scripts/inputs.py
+++ b/datasets/diseases/scripts/inputs.py
@@ -24,9 +24,7 @@ def main():
 
     # Mapping ENSG IDs to STRING IDs through the STRING aliases file
     # given our ENSG and ENSP (non one-to-one!) mapping `string_aliases`,
-    string_aliases = pd.read_csv(
-        diseases_path / ".." / ".." / "databases" / "string" / "9606.protein.aliases.v12.0.txt", sep="\t", usecols=["#string_protein_id", "alias"]
-    )
+    string_aliases = pd.read_csv(diseases_path / "raw" / "9606.protein.aliases.txt", sep="\t", usecols=["#string_protein_id", "alias"])
     string_aliases.columns = ["str_id", "ENSP"]
     string_aliases = string_aliases.drop_duplicates()
 
diff --git a/datasets/hiv/Scripts/Data_Prep.py b/datasets/hiv/Scripts/Data_Prep.py
index 1aca432..43460cf 100644
--- a/datasets/hiv/Scripts/Data_Prep.py
+++ b/datasets/hiv/Scripts/Data_Prep.py
@@ -2,30 +2,24 @@
 import pickle
 import os
 
-prize_05 = pandas.read_csv('raw/prize_05.csv', sep='\t', lineterminator='\n')
-prize_060 = pandas.read_csv('raw/prize_060.csv', sep='\t', lineterminator='\n')
+prize_05 = pandas.read_csv("raw/prize_05.csv", sep="\t", lineterminator="\n")
+prize_060 = pandas.read_csv("raw/prize_060.csv", sep="\t", lineterminator="\n")
 
-prize_05['Uniprot'] = prize_05['Uniprot'].str.split('-', expand=False).str[0]
-prize_05 = prize_05.sort_values('Prize',
-                                ascending=False).drop_duplicates('Uniprot').sort_index()
+prize_05["Uniprot"] = prize_05["Uniprot"].str.split("-", expand=False).str[0]
+prize_05 = prize_05.sort_values("Prize", ascending=False).drop_duplicates("Uniprot").sort_index()
 
-prize_060['Uniprot'] = prize_060['Uniprot'].str.split('-', expand=False).str[0]
-prize_060 = prize_060.sort_values('Prize',
-                                  ascending=False).drop_duplicates('Uniprot').sort_index()
+prize_060["Uniprot"] = prize_060["Uniprot"].str.split("-", expand=False).str[0]
+prize_060 = prize_060.sort_values("Prize", ascending=False).drop_duplicates("Uniprot").sort_index()
 
-prize_060_nodes = prize_060['Uniprot'].tolist()
-prize_05_nodes = prize_05['Uniprot'].tolist()
+prize_060_nodes = prize_060["Uniprot"].tolist()
+prize_05_nodes = prize_05["Uniprot"].tolist()
 
-nodeset = list(set(prize_05_nodes+prize_060_nodes))
+nodeset = list(set(prize_05_nodes + prize_060_nodes))
 
-df = {
-    "NodeIDs": nodeset,
-    "prize_05": prize_05,
-    "prize_060": prize_060
-}
+df = {"NodeIDs": nodeset, "prize_05": prize_05, "prize_060": prize_060}
 
-if not os.path.exists('./Pickles'):
-    os.makedirs('./Pickles')
+if not os.path.exists("./Pickles"):
+    os.makedirs("./Pickles")
 
-with open("Pickles/NodeIDs.pkl","wb") as file:
-    pickle.dump(df,file)
+with open("Pickles/NodeIDs.pkl", "wb") as file:
+    pickle.dump(df, file)
diff --git a/datasets/hiv/Scripts/Kegg_Orthology.py b/datasets/hiv/Scripts/Kegg_Orthology.py
index 301e340..919e442 100644
--- a/datasets/hiv/Scripts/Kegg_Orthology.py
+++ b/datasets/hiv/Scripts/Kegg_Orthology.py
@@ -3,62 +3,59 @@
 import pandas as pd
 from more_itertools import chunked
 
-pathway = read(open("Raw_Data/ko03250.xml", 'r'))
+pathway = read(open("Raw_Data/ko03250.xml", "r"))
 
-#Read in Kegg pathway data and keep only orthologs
+# Read in Kegg pathway data and keep only orthologs
 entries_data = []
 for entry in pathway.entries.values():
-    if entry.type == 'ortholog':
-        entries_data.append({
-            'name': entry.name
-        })
+    if entry.type == "ortholog":
+        entries_data.append({"name": entry.name})
 entries_df = pd.DataFrame(entries_data)
 
-#Some orthologs have multiple ko codes in the same row
-#The following two lines move all ko codes to individual rows
-orthology_ids = entries_df['name'].str.split(' ').explode()
-orthology_ids = orthology_ids.apply(lambda x: x.split(':')[1]).tolist()
+# Some orthologs have multiple ko codes in the same row
+# The following two lines move all ko codes to individual rows
+orthology_ids = entries_df["name"].str.split(" ").explode()
+orthology_ids = orthology_ids.apply(lambda x: x.split(":")[1]).tolist()
 
-#Using bioservices KEGG class to map ortholog(ko) codes to human(hsa) codes
+# Using bioservices KEGG class to map ortholog(ko) codes to human(hsa) codes
 k = KEGG()
-ko_hsa_map = k.link('hsa', '+'.join(orthology_ids))
-ko_hsa_dict = {x.split('\t')[0].split(':')[1]: x.split('\t')[1] for x in ko_hsa_map.split('\n')[:-1]}
-ko_hsa_df = pd.DataFrame(ko_hsa_dict.items(),columns= ['KEGG_Orthology','HSA'])
-
-#Kegg .get is limited to 10 entries per call
-#The following code chunks the hsa list into sets of 10
-#then calls the .get function on each which returns kegg api data in string format
-hsa_chunked = list(chunked(ko_hsa_df['HSA'].tolist(),10))
+ko_hsa_map = k.link("hsa", "+".join(orthology_ids))
+ko_hsa_dict = {x.split("\t")[0].split(":")[1]: x.split("\t")[1] for x in ko_hsa_map.split("\n")[:-1]}
+ko_hsa_df = pd.DataFrame(ko_hsa_dict.items(), columns=["KEGG_Orthology", "HSA"])
+
+# Kegg .get is limited to 10 entries per call
+# The following code chunks the hsa list into sets of 10
+# then calls the .get function on each which returns kegg api data in string format
+hsa_chunked = list(chunked(ko_hsa_df["HSA"].tolist(), 10))
 raw_uniprot = []
 for entry in hsa_chunked:
-    raw_uniprot.append(k.get('+'.join(entry)).split('\n///\n\n'))
+    raw_uniprot.append(k.get("+".join(entry)).split("\n///\n\n"))
 
-#Raw Kegg api data is filtered to obtain hsa and uniprot codes for each protein
-#Note: Although bioservices .link and .conv return cleaner outputs, they do not support
-#one to many relationships at this time.
-#Note: bioservices also supplies a parser method for the kegg api but it is also broken at this time.
+# Raw Kegg api data is filtered to obtain hsa and uniprot codes for each protein
+# Note: Although bioservices .link and .conv return cleaner outputs, they do not support
+# one to many relationships at this time.
+# Note: bioservices also supplies a parser method for the kegg api but it is also broken at this time.
 processed_uniprot = []
 for chunk in raw_uniprot:
     for item in chunk:
-          item = item.split('\n')
-          processed_uniprot.append([(x.strip().split(' ')[1:],'hsa:'+(item[0].split(' '*7)[1]))
-                                    for x in item if 'UniProt' in x][0])
+        item = item.split("\n")
+        processed_uniprot.append([(x.strip().split(" ")[1:], "hsa:" + (item[0].split(" " * 7)[1])) for x in item if "UniProt" in x][0])
 
-#Creates a dictionary where uniprot ids are keys and hsa ids are values
+# Creates a dictionary where uniprot ids are keys and hsa ids are values
 hsa_uniprot_dict = {}
-for item in processed_uniprot :
-      for entry in item[0]:
-            hsa_uniprot_dict.update({'up:'+entry:item[1]})
+for item in processed_uniprot:
+    for entry in item[0]:
+        hsa_uniprot_dict.update({"up:" + entry: item[1]})
 
-#Creates a dataframe with uniprot and hsa values then merges with ko-hsa dataframe by hsa
+# Creates a dataframe with uniprot and hsa values then merges with ko-hsa dataframe by hsa
 hsa_uniprot_map = pd.DataFrame.from_dict(hsa_uniprot_dict.items())
-hsa_uniprot_map.columns = ['Uniprot','HSA']
-final_df = ko_hsa_df.merge(hsa_uniprot_map,on = 'HSA')
-uniprotIDs = final_df['Uniprot'].apply(lambda x: x.split(':')[1]).tolist()
+hsa_uniprot_map.columns = ["Uniprot", "HSA"]
+final_df = ko_hsa_df.merge(hsa_uniprot_map, on="HSA")
+uniprotIDs = final_df["Uniprot"].apply(lambda x: x.split(":")[1]).tolist()
 
-#Filters the combined dataframe to include only rows where the uniprot id is in swissprot
+# Filters the combined dataframe to include only rows where the uniprot id is in swissprot
 u = UniProt()
-tst = u.mapping(fr='UniProtKB', to='UniProtKB-Swiss-Prot',query = ','.join(uniprotIDs))
-failed_uniprot = pd.Series(list(set(tst['failedIds']))).apply(lambda x: 'up:'+x)
+tst = u.mapping(fr="UniProtKB", to="UniProtKB-Swiss-Prot", query=",".join(uniprotIDs))
+failed_uniprot = pd.Series(list(set(tst["failedIds"]))).apply(lambda x: "up:" + x)
 
-final_df = final_df[~final_df['Uniprot'].isin(failed_uniprot)]
+final_df = final_df[~final_df["Uniprot"].isin(failed_uniprot)]
diff --git a/datasets/hiv/Scripts/Name_Mapping.py b/datasets/hiv/Scripts/Name_Mapping.py
index 9cace88..6fb6e6a 100644
--- a/datasets/hiv/Scripts/Name_Mapping.py
+++ b/datasets/hiv/Scripts/Name_Mapping.py
@@ -17,36 +17,28 @@
 
 
 def main():
-
-    with open('Pickles/NodeIDs.pkl', 'rb') as file:
+    with open("Pickles/NodeIDs.pkl", "rb") as file:
         NodeIDs = pickle.load(file)["NodeIDs"]
 
-    job_id = submit_id_mapping(
-        from_db="UniProtKB_AC-ID", to_db="UniProtKB", ids= NodeIDs
-    )
+    job_id = submit_id_mapping(from_db="UniProtKB_AC-ID", to_db="UniProtKB", ids=NodeIDs)
     if check_id_mapping_results_ready(job_id):
         link = get_id_mapping_results_link(job_id)
         uniprot_results = get_id_mapping_results_search(link)
 
     uniprot_IDs = []
     uniprot_map = {}
-    for i in uniprot_results.get('results'):
+    for i in uniprot_results.get("results"):
         uniprot_IDs.append((i.get("to").get("uniProtkbId")))
-        uniprot_map.update({i.get("from"):i.get("to").get("uniProtkbId")})
+        uniprot_map.update({i.get("from"): i.get("to").get("uniProtkbId")})
 
-    df ={
-       "UniprotIDs": uniprot_IDs,
-       "UniprotMap": uniprot_map
-    }
+    df = {"UniprotIDs": uniprot_IDs, "UniprotMap": uniprot_map}
 
-    with open("Pickles/UniprotIDs.pkl","wb") as file:
-        pickle.dump(df,file)
+    with open("Pickles/UniprotIDs.pkl", "wb") as file:
+        pickle.dump(df, file)
 
     return
 
 
-
-
 def check_response(response):
     try:
         response.raise_for_status()
@@ -169,9 +161,7 @@ def get_id_mapping_results_search(url):
     else:
         size = 500
         query["size"] = size
-    compressed = (
-        query["compressed"][0].lower() == "true" if "compressed" in query else False
-    )
+    compressed = query["compressed"][0].lower() == "true" if "compressed" in query else False
     parsed = parsed._replace(query=urlencode(query, doseq=True))
     url = parsed.geturl()
     request = session.get(url)
@@ -195,11 +185,9 @@ def get_id_mapping_results_stream(url):
     parsed = urlparse(url)
     query = parse_qs(parsed.query)
     file_format = query["format"][0] if "format" in query else "json"
-    compressed = (
-        query["compressed"][0].lower() == "true" if "compressed" in query else False
-    )
+    compressed = query["compressed"][0].lower() == "true" if "compressed" in query else False
     return decode_results(request, file_format, compressed)
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/datasets/hiv/Scripts/SPRAS_Formatting.py b/datasets/hiv/Scripts/SPRAS_Formatting.py
index 837f52b..eb8fd99 100644
--- a/datasets/hiv/Scripts/SPRAS_Formatting.py
+++ b/datasets/hiv/Scripts/SPRAS_Formatting.py
@@ -3,26 +3,26 @@
 import os
 
 current_directory = Path(os.path.dirname(os.path.realpath(__file__)))
-PROCESSED_DIR = current_directory.parent / 'processed'
+PROCESSED_DIR = current_directory.parent / "processed"
 
-with open('Pickles/UniprotIDs.pkl', 'rb') as file:
-        UniprotIDs = pickle.load(file)
+with open("Pickles/UniprotIDs.pkl", "rb") as file:
+    UniprotIDs = pickle.load(file)
 
 UIDs = UniprotIDs["UniprotIDs"]
-UMap= UniprotIDs["UniprotMap"]
+UMap = UniprotIDs["UniprotMap"]
 
-with open('Pickles/NodeIDs.pkl','rb') as file2:
-        prizes = pickle.load(file2)
+with open("Pickles/NodeIDs.pkl", "rb") as file2:
+    prizes = pickle.load(file2)
 
 prize_05 = prizes["prize_05"]
 prize_060 = prizes["prize_060"]
 
-prize_05['Uniprot'] = prize_05['Uniprot'].apply(lambda x: UMap.get(x))
-prize_060['Uniprot'] = prize_060['Uniprot'].apply(lambda x: UMap.get(x))
+prize_05["Uniprot"] = prize_05["Uniprot"].apply(lambda x: UMap.get(x))
+prize_060["Uniprot"] = prize_060["Uniprot"].apply(lambda x: UMap.get(x))
 
-prize_05.columns = ['NODEID','prize']
-prize_060.columns = ['NODEID','prize']
+prize_05.columns = ["NODEID", "prize"]
+prize_060.columns = ["NODEID", "prize"]
 
 
-prize_05.to_csv(PROCESSED_DIR / 'processed_prize_05.txt', sep='\t', header=True, index=False)
-prize_060.to_csv(PROCESSED_DIR / 'processed_prize_060.txt', sep='\t', header=True, index=False)
+prize_05.to_csv(PROCESSED_DIR / "processed_prize_05.txt", sep="\t", header=True, index=False)
+prize_060.to_csv(PROCESSED_DIR / "processed_prize_060.txt", sep="\t", header=True, index=False)
diff --git a/datasets/rn-muscle-skeletal/process.py b/datasets/rn-muscle-skeletal/process.py
index ac56cc2..0119b84 100644
--- a/datasets/rn-muscle-skeletal/process.py
+++ b/datasets/rn-muscle-skeletal/process.py
@@ -3,18 +3,21 @@
 import os
 
 current_directory = Path(os.path.dirname(os.path.realpath(__file__)))
-PROCESSED_DIR = current_directory / 'processed'
+PROCESSED_DIR = current_directory / "processed"
+
 
 def process():
     # TODO: what are the actual last two headers called?
-    data = pandas.read_csv(current_directory / 'raw' / 'Muscle_Skeletal-Dec2018.tsv',
-                           delimiter='\t', header=None,
-                           names=["Interactome1", "Interactome2", "Type1",
-                                  "Type2", "InteractionType", "Weight",
-                                  "Const1", "Const2"])
+    data = pandas.read_csv(
+        current_directory / "raw" / "Muscle_Skeletal-Dec2018.tsv",
+        delimiter="\t",
+        header=None,
+        names=["Interactome1", "Interactome2", "Type1", "Type2", "InteractionType", "Weight", "Const1", "Const2"],
+    )
     data = data.drop(columns=["Type1", "Type2", "InteractionType", "Const1", "Const2"])
     data.insert(3, "Direction", "U")
-    data.to_csv(PROCESSED_DIR / 'interactome.tsv', sep='\t', header=False, index=False)
+    data.to_csv(PROCESSED_DIR / "interactome.tsv", sep="\t", header=False, index=False)
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     process()
diff --git a/datasets/yeast-osmotic-stress/process_prizes.py b/datasets/yeast-osmotic-stress/process_prizes.py
index 81682a6..708c1df 100644
--- a/datasets/yeast-osmotic-stress/process_prizes.py
+++ b/datasets/yeast-osmotic-stress/process_prizes.py
@@ -5,18 +5,15 @@
 
 current_directory = Path(os.path.dirname(os.path.realpath(__file__)))
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     # Get the raw prizes DF
-    prizes = current_directory / 'raw' / 'prizes.txt'
-    prizes_df = pd.read_csv(prizes, sep='\t', header=None, names=["NODEID", "prize"])
+    prizes = current_directory / "raw" / "prizes.txt"
+    prizes_df = pd.read_csv(prizes, sep="\t", header=None, names=["NODEID", "prize"])
 
     # Use the manually curated prize info
     # TODO: where did this come from?
-    prizes_df2 = pd.DataFrame(data={"NODEID": ['YGR014W','YDR420W','YER118C'],
-                                    "prize": 10.051863}, index=[1596,1597,1598])
+    prizes_df2 = pd.DataFrame(data={"NODEID": ["YGR014W", "YDR420W", "YER118C"], "prize": 10.051863}, index=[1596, 1597, 1598])
 
-    new_prizes_path = current_directory / 'processed' / 'prizes1_dummies.txt'
+    new_prizes_path = current_directory / "processed" / "prizes1_dummies.txt"
     new_prizes = pd.concat([prizes_df, prizes_df2])
-    new_prizes.to_csv(new_prizes_path, sep='\t', index=False,
-                    columns=['NODEID','prize'],
-                    header=['NODEID','prize'])
+    new_prizes.to_csv(new_prizes_path, sep="\t", index=False, columns=["NODEID", "prize"], header=["NODEID", "prize"])