pre-commit format

aditya0by0 · aditya0by0 · commit 74b9c91707fa · 2025-08-11T11:13:07.000+02:00
diff --git a/README.md b/README.md
@@ -64,7 +64,7 @@ Before running any training scripts, ensure the environment is correctly configu
       export PYTHONPATH=path/to/python-chebai
       ```
       or vice versa.
-      
+
     * If you're working within both repositories simultaneously or facing module not found errors,  we **recommend configuring both directories**:
 
       ```bash
diff --git a/chebai_proteins/preprocessing/datasets/deepGO/go_uniprot.py b/chebai_proteins/preprocessing/datasets/deepGO/go_uniprot.py
@@ -181,7 +181,7 @@ def _download_gene_ontology_data(self) -> str:
 
         if not os.path.isfile(go_path):
             print("Missing Gene Ontology raw data")
-            print(f"Downloading Gene Ontology data....")
+            print("Downloading Gene Ontology data....")
             r = requests.get(self._GO_DATA_URL, allow_redirects=True)
             r.raise_for_status()  # Check if the request was successful
             open(go_path, "wb").write(r.content)
@@ -207,7 +207,7 @@ def _download_swiss_uni_prot_data(self) -> Optional[str]:
         os.makedirs(os.path.dirname(uni_prot_file_path), exist_ok=True)
 
         if not os.path.isfile(uni_prot_file_path):
-            print(f"Downloading Swiss UniProt data....")
+            print("Downloading Swiss UniProt data....")
 
             # Create a temporary file
             with NamedTemporaryFile(delete=False) as tf:
@@ -223,7 +223,7 @@ def _download_swiss_uni_prot_data(self) -> Optional[str]:
 
             # Unpack the gzipped file
             try:
-                print(f"Unzipping the file....")
+                print("Unzipping the file....")
                 with gzip.open(temp_filename, "rb") as f_in:
                     output_file_path = uni_prot_file_path
                     with open(output_file_path, "wb") as f_out:
@@ -375,7 +375,7 @@ def _graph_to_raw_dataset(self, g: nx.DiGraph) -> pd.DataFrame:
         Returns:
             pd.DataFrame: The raw dataset created from the graph.
         """
-        print(f"Processing graph")
+        print("Processing graph")
 
         data_df = self._get_swiss_to_go_mapping()
         # add ancestors to go ids
@@ -559,8 +559,8 @@ def _get_data_splits(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
             )
         except FileNotFoundError:
             raise FileNotFoundError(
-                f"File data.pt doesn't exists. "
-                f"Please call 'prepare_data' and/or 'setup' methods to generate the dataset files"
+                "File data.pt doesn't exists. "
+                "Please call 'prepare_data' and/or 'setup' methods to generate the dataset files"
             )
 
         df_go_data = pd.DataFrame(data_go)
@@ -586,7 +586,7 @@ def base_dir(self) -> str:
         Returns:
             str: The path to the base directory, which is "data/GO_UniProt".
         """
-        return os.path.join("data", f"GO_UniProt")
+        return os.path.join("data", "GO_UniProt")
 
     @property
     def raw_file_names_dict(self) -> dict:
diff --git a/chebai_proteins/preprocessing/datasets/deepGO/protein_pretraining.py b/chebai_proteins/preprocessing/datasets/deepGO/protein_pretraining.py
@@ -223,8 +223,8 @@ def _get_data_splits(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
             )
         except FileNotFoundError:
             raise FileNotFoundError(
-                f"File data.pt doesn't exists. "
-                f"Please call 'prepare_data' and/or 'setup' methods to generate the dataset files"
+                "File data.pt doesn't exists. "
+                "Please call 'prepare_data' and/or 'setup' methods to generate the dataset files"
             )
 
         df_go_data = pd.DataFrame(data_go)
diff --git a/chebai_proteins/preprocessing/datasets/scope/scope.py b/chebai_proteins/preprocessing/datasets/scope/scope.py
@@ -130,7 +130,7 @@ def _download_pdb_sequence_data(self) -> None:
         os.makedirs(os.path.dirname(pdb_seq_file_path), exist_ok=True)
 
         if not os.path.isfile(pdb_seq_file_path):
-            print(f"Missing PDB raw data, Downloading PDB sequence data....")
+            print("Missing PDB raw data, Downloading PDB sequence data....")
 
             # Create a temporary file
             with NamedTemporaryFile(delete=False) as tf:
@@ -146,7 +146,7 @@ def _download_pdb_sequence_data(self) -> None:
 
             # Unpack the gzipped file
             try:
-                print(f"Unzipping the file....")
+                print("Unzipping the file....")
                 with gzip.open(temp_filename, "rb") as f_in:
                     output_file_path = pdb_seq_file_path
                     with open(output_file_path, "wb") as f_out:
@@ -422,7 +422,7 @@ def _graph_to_raw_dataset(self, graph: nx.DiGraph) -> pd.DataFrame:
         Raises:
             RuntimeError: If no sunids are selected.
         """
-        print(f"Process graph")
+        print("Process graph")
 
         selected_sun_ids_per_lvl = self.select_classes(graph)
 
@@ -665,8 +665,8 @@ def _get_data_splits(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
             )
         except FileNotFoundError:
             raise FileNotFoundError(
-                f"File data.pt doesn't exists. "
-                f"Please call 'prepare_data' and/or 'setup' methods to generate the dataset files"
+                "File data.pt doesn't exists. "
+                "Please call 'prepare_data' and/or 'setup' methods to generate the dataset files"
             )
 
         df_scope_version = pd.DataFrame(data_scope_version)
diff --git a/tutorials/data_exploration_scope.ipynb b/tutorials/data_exploration_scope.ipynb
@@ -1049,13 +1049,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "6dc3fd6c-7cf6-47ef-812f-54319a0cdeb9",
    "metadata": {},
    "outputs": [],
    "source": [
     "# You can specify a literal path for the `splits_file_path`, or if another `scope_class` instance is already defined,\n",
     "# you can use its existing `splits_file_path` attribute for consistency.\n",
+    "from chebai_proteins.preprocessing.datasets.scope.scope import SCOPeOver2000\n",
+    "\n",
     "scope_class_with_splits = SCOPeOver2000(\n",
     "    scope_version=\"2.08\",\n",
     "    # splits_file_path=\"data/chebi_v231/ChEBI50/processed/splits.csv\",  # Literal path option\n",

Original file line number	Diff line number	Diff line change
`@@ -223,8 +223,8 @@ def _get_data_splits(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:`
`223`	`223`	`)`
`224`	`224`	`except FileNotFoundError:`
`225`	`225`	`raise FileNotFoundError(`
`226`		`- f"File data.pt doesn't exists. "`
`227`		`- f"Please call 'prepare_data' and/or 'setup' methods to generate the dataset files"`
	`226`	`+ "File data.pt doesn't exists. "`
	`227`	`+ "Please call 'prepare_data' and/or 'setup' methods to generate the dataset files"`
`228`	`228`	`)`
`229`	`229`
`230`	`230`	`df_go_data = pd.DataFrame(data_go)`