Skip to content

Commit 49f5d40

Browse files
Merge pull request #12 from Multiomics-Analytics-Group/fmt
Formatting and linting
2 parents 21b0242 + ba1568b commit 49f5d40

19 files changed

+570
-518
lines changed

docs/source/conf.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,23 +6,22 @@
66
# -- Project information -----------------------------------------------------
77
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
88

9-
project = 'InstaNexus'
10-
copyright = '2025, Marco Reverenna'
11-
author = 'Marco Reverenna'
12-
release = '0.2.0'
9+
project = "InstaNexus"
10+
copyright = "2025, Marco Reverenna"
11+
author = "Marco Reverenna"
12+
release = "0.2.0"
1313

1414
# -- General configuration ---------------------------------------------------
1515
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
1616

1717
extensions = []
1818

19-
templates_path = ['_templates']
19+
templates_path = ["_templates"]
2020
exclude_patterns = []
2121

2222

23-
2423
# -- Options for HTML output -------------------------------------------------
2524
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
2625

27-
html_theme = 'alabaster'
28-
html_static_path = ['_static']
26+
html_theme = "alabaster"
27+
html_static_path = ["_static"]

docs/source/tutorials/case_studies/prot_optimization_dbg.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,7 @@
519519
" sequence_type=\"contigs\",\n",
520520
" output_folder=RESULTS_DIR,\n",
521521
" reference=protein_norm,\n",
522-
" **params\n",
522+
" **params,\n",
523523
" )\n",
524524
"\n",
525525
" coverage_contigs = stat_contigs.get(\"coverage\")\n",
@@ -553,7 +553,7 @@
553553
" sequence_type=\"scaffolds\",\n",
554554
" output_folder=RESULTS_DIR,\n",
555555
" reference=protein_norm,\n",
556-
" **params\n",
556+
" **params,\n",
557557
" )\n",
558558
"\n",
559559
" coverage_scaffolds = stat_scaffolds.get(\"coverage\")\n",
@@ -814,7 +814,7 @@
814814
" sequence_type=\"contigs\",\n",
815815
" output_folder=RESULTS_DIR,\n",
816816
" reference=protein_norm,\n",
817-
" **params\n",
817+
" **params,\n",
818818
" )\n",
819819
" coverage_contigs = stat_contigs.get(\"coverage\")\n",
820820
"\n",
@@ -847,7 +847,7 @@
847847
" sequence_type=\"scaffolds\",\n",
848848
" output_folder=RESULTS_DIR,\n",
849849
" reference=protein_norm,\n",
850-
" **params\n",
850+
" **params,\n",
851851
" )\n",
852852
"\n",
853853
" coverage_scaffolds = stat_scaffolds.get(\"coverage\")\n",

docs/source/tutorials/case_studies/prot_optimization_greedy.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,7 @@
508508
" sequence_type=\"contigs\",\n",
509509
" output_folder=\".\",\n",
510510
" reference=protein_norm,\n",
511-
" **params\n",
511+
" **params,\n",
512512
" )\n",
513513
"\n",
514514
" coverage_contigs = stat_contigs.get(\"coverage\")\n",
@@ -556,7 +556,7 @@
556556
" sequence_type=\"scaffolds\",\n",
557557
" output_folder=\".\",\n",
558558
" reference=protein_norm,\n",
559-
" **params\n",
559+
" **params,\n",
560560
" )\n",
561561
" coverage_scaffolds = stat_scaffolds.get(\"coverage\")\n",
562562
"\n",
@@ -740,7 +740,7 @@
740740
" sequence_type=\"contigs\",\n",
741741
" output_folder=\".\",\n",
742742
" reference=protein_norm,\n",
743-
" **params\n",
743+
" **params,\n",
744744
" )\n",
745745
" coverage_contigs = stat_contigs.get(\"coverage\")\n",
746746
"\n",
@@ -784,7 +784,7 @@
784784
" sequence_type=\"scaffolds\",\n",
785785
" output_folder=\".\",\n",
786786
" reference=protein_norm,\n",
787-
" **params\n",
787+
" **params,\n",
788788
" )\n",
789789
" coverage_scaffolds = stat_scaffolds.get(\"coverage\")\n",
790790
"\n",

docs/source/tutorials/examples/dbg_variants_workflow.ipynb

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
"outputs": [],
4949
"source": [
5050
"# read a pre cleaned data file\n",
51-
"#data = pd.read_csv(\"../outputs/bsa/comb_dbg_c0.9_ks7_ts12_mo3/cleaned/cleaned_data.csv\")"
51+
"# data = pd.read_csv(\"../outputs/bsa/comb_dbg_c0.9_ks7_ts12_mo3/cleaned/cleaned_data.csv\")"
5252
]
5353
},
5454
{
@@ -62,9 +62,9 @@
6262
"\n",
6363
"import re\n",
6464
"\n",
65-
"file_name = 'bsa'\n",
65+
"file_name = \"bsa\"\n",
6666
"\n",
67-
"data = pd.read_csv(f'../inputs/{file_name}.csv'.format(file_name=file_name))\n",
67+
"data = pd.read_csv(f\"../inputs/{file_name}.csv\".format(file_name=file_name))\n",
6868
"\n",
6969
"data[\"log_probs\"] = data[\"log_probs\"].replace(-1, -10)\n",
7070
"\n",
@@ -99,13 +99,12 @@
9999
"metadata": {},
100100
"outputs": [],
101101
"source": [
102-
"from pathlib import Path\n",
103102
"\n",
104103
"repo_folder = Path(\"../\")\n",
105104
"\n",
106105
"filtered_psms = instanexus.preprocessing.filter_contaminants(\n",
107-
" cleaned_psms, run, repo_folder / \"fasta/contaminants.fasta\"\n",
108-
" )\n",
106+
" cleaned_psms, run, repo_folder / \"fasta/contaminants.fasta\"\n",
107+
")\n",
109108
"\n",
110109
"data = data[data[\"preds\"].isin(filtered_psms)]"
111110
]
@@ -158,10 +157,10 @@
158157
"source": [
159158
"assembler = Assembler(\n",
160159
" mode=\"dbg_weighted\",\n",
161-
" kmer_size=7, \n",
162-
" size_threshold=0, \n",
163-
" min_weight=2, # filter low-weight edges\n",
164-
" refine_rounds=3, # optional iterative refinement\n",
160+
" kmer_size=7,\n",
161+
" size_threshold=0,\n",
162+
" min_weight=2, # filter low-weight edges\n",
163+
" refine_rounds=3, # optional iterative refinement\n",
165164
")"
166165
]
167166
},
@@ -172,7 +171,9 @@
172171
"metadata": {},
173172
"outputs": [],
174173
"source": [
175-
"scaffolds_dbg_w = assembler.run(sequences, output_folder=output_folder, protein_norm=None)"
174+
"scaffolds_dbg_w = assembler.run(\n",
175+
" sequences, output_folder=output_folder, protein_norm=None\n",
176+
")"
176177
]
177178
},
178179
{
@@ -242,7 +243,9 @@
242243
"metadata": {},
243244
"outputs": [],
244245
"source": [
245-
"mapped_contigs = map.process_protein_contigs_scaffold(scaffolds_dbg_w, protein_norm, max_mismatches = 10, min_identity = 0.8)"
246+
"mapped_contigs = map.process_protein_contigs_scaffold(\n",
247+
" scaffolds_dbg_w, protein_norm, max_mismatches=10, min_identity=0.8\n",
248+
")"
246249
]
247250
},
248251
{
@@ -338,8 +341,8 @@
338341
"assembler_dbgx = Assembler(\n",
339342
" mode=\"dbgX\",\n",
340343
" kmer_size=7,\n",
341-
" size_threshold=10, \n",
342-
" min_weight=2, \n",
344+
" size_threshold=10,\n",
345+
" min_weight=2,\n",
343346
")"
344347
]
345348
},
@@ -351,9 +354,7 @@
351354
"outputs": [],
352355
"source": [
353356
"scaffolds_dbgx = assembler_dbgx.run(\n",
354-
" sequences=sequences,\n",
355-
" output_folder=output_folder,\n",
356-
" protein_norm=None\n",
357+
" sequences=sequences, output_folder=output_folder, protein_norm=None\n",
357358
")"
358359
]
359360
},
@@ -364,7 +365,9 @@
364365
"metadata": {},
365366
"outputs": [],
366367
"source": [
367-
"mapped_scaffolds_dbgx = map.process_protein_contigs_scaffold(scaffolds_dbgx, protein_norm, max_mismatches = 10, min_identity = 0.8)"
368+
"mapped_scaffolds_dbgx = map.process_protein_contigs_scaffold(\n",
369+
" scaffolds_dbgx, protein_norm, max_mismatches=10, min_identity=0.8\n",
370+
")"
368371
]
369372
},
370373
{
@@ -427,7 +430,7 @@
427430
" mode=\"fusion\",\n",
428431
" kmer_size=7,\n",
429432
" size_threshold=10,\n",
430-
" min_overlap=3, \n",
433+
" min_overlap=3,\n",
431434
" min_weight=2,\n",
432435
")"
433436
]
@@ -450,9 +453,7 @@
450453
"outputs": [],
451454
"source": [
452455
"scaffolds_fusion = assembler_fusion.run(\n",
453-
" sequences=sequences,\n",
454-
" output_folder=output_folder_fusion,\n",
455-
" protein_norm=None\n",
456+
" sequences=sequences, output_folder=output_folder_fusion, protein_norm=None\n",
456457
")"
457458
]
458459
},
@@ -463,7 +464,9 @@
463464
"metadata": {},
464465
"outputs": [],
465466
"source": [
466-
"mapped_scaffolds_fusion = map.process_protein_contigs_scaffold(scaffolds_fusion, protein_norm, max_mismatches=10, min_identity=0.8)\n",
467+
"mapped_scaffolds_fusion = map.process_protein_contigs_scaffold(\n",
468+
" scaffolds_fusion, protein_norm, max_mismatches=10, min_identity=0.8\n",
469+
")\n",
467470
"\n",
468471
"# top 20\n",
469472
"mapped_scaffolds_fusion = mapped_scaffolds_fusion[:20]"

docs/source/tutorials/examples/hybrid_workflow_with_figures.ipynb

Lines changed: 27 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -39,19 +39,17 @@
3939
"import alignment as align\n",
4040
"import clustering as clus\n",
4141
"import preprocessing as prep\n",
42-
"import compute_statistics as comp_stat\n",
43-
"#import model_peptide_selector as selector\n",
42+
"\n",
43+
"# import model_peptide_selector as selector\n",
4444
"\n",
4545
"# import libraries\n",
4646
"from pathlib import Path\n",
4747
"from Bio import SeqIO\n",
4848
"\n",
49-
"#import joblib\n",
49+
"# import joblib\n",
5050
"import json\n",
5151
"import Bio\n",
52-
"import pandas as pd\n",
53-
"import matplotlib.pyplot as plt\n",
54-
"import seaborn as sns"
52+
"import pandas as pd"
5553
]
5654
},
5755
{
@@ -131,16 +129,10 @@
131129
"metadata": {},
132130
"outputs": [],
133131
"source": [
134-
"def get_combination_name(\n",
135-
" ass_method,\n",
136-
" conf,\n",
137-
" kmer_size,\n",
138-
" size_threshold,\n",
139-
" min_overlap\n",
140-
"):\n",
132+
"def get_combination_name(ass_method, conf, kmer_size, size_threshold, min_overlap):\n",
141133
" if ass_method in (\"dbg\", \"hybrid\"):\n",
142134
" return f\"comb_{ass_method}_c{conf}_ks{kmer_size}_ts{size_threshold}_mo{min_overlap}\"\n",
143-
" \n",
135+
"\n",
144136
" elif ass_method == \"greedy\":\n",
145137
" return f\"comb_{ass_method}_c{conf}_ts{size_threshold}_mo{min_overlap}\""
146138
]
@@ -186,12 +178,7 @@
186178
"metadata": {},
187179
"outputs": [],
188180
"source": [
189-
"comb = get_combination_name(\n",
190-
" ass_method,\n",
191-
" conf,\n",
192-
" kmer_size,\n",
193-
" size_threshold,\n",
194-
" min_overlap)\n",
181+
"comb = get_combination_name(ass_method, conf, kmer_size, size_threshold, min_overlap)\n",
195182
"\n",
196183
"print(comb)"
197184
]
@@ -207,7 +194,7 @@
207194
" \"ass_method\": ass_method,\n",
208195
" \"conf\": conf,\n",
209196
" \"size_threshold\": size_threshold,\n",
210-
" \"min_overlap\": min_overlap\n",
197+
" \"min_overlap\": min_overlap,\n",
211198
"}"
212199
]
213200
},
@@ -346,7 +333,9 @@
346333
" filtered_df = df[mask].copy()\n",
347334
" removed_count = (~mask).sum()\n",
348335
"\n",
349-
" print(f\"Removed {removed_count} contaminant sequences, {len(filtered_df)} remaining.\")\n",
336+
" print(\n",
337+
" f\"Removed {removed_count} contaminant sequences, {len(filtered_df)} remaining.\"\n",
338+
" )\n",
350339
" return filtered_df"
351340
]
352341
},
@@ -492,9 +481,9 @@
492481
"metadata": {},
493482
"outputs": [],
494483
"source": [
495-
"greedy_scaffolds = greedy.scaffold_iterative_greedy(assembled_contigs,\n",
496-
" min_overlap,\n",
497-
" size_threshold)"
484+
"greedy_scaffolds = greedy.scaffold_iterative_greedy(\n",
485+
" assembled_contigs, min_overlap, size_threshold\n",
486+
")"
498487
]
499488
},
500489
{
@@ -655,10 +644,12 @@
655644
"outputs": [],
656645
"source": [
657646
"mapped_scaffolds = map.process_protein_contigs_scaffold(\n",
658-
" all_scaffolds, protein_norm, max_mismatches = 0, min_identity = 0.90\n",
647+
" all_scaffolds, protein_norm, max_mismatches=0, min_identity=0.90\n",
659648
")\n",
660649
"\n",
661-
"map.mapping_substitutions(mapped_scaffolds, protein_norm, title= \"scaffolds mapped in RF-selected peptides\")"
650+
"map.mapping_substitutions(\n",
651+
" mapped_scaffolds, protein_norm, title=\"scaffolds mapped in RF-selected peptides\"\n",
652+
")"
662653
]
663654
},
664655
{
@@ -754,14 +745,14 @@
754745
"source": [
755746
"clus.cluster_fasta_files(input_folder=str(scaffolds_folder_out))\n",
756747
"\n",
757-
"fasta_input = scaffolds_folder_out / f\"scaffolds.fasta\"\n",
748+
"fasta_input = scaffolds_folder_out / \"scaffolds.fasta\"\n",
758749
"\n",
759750
"cluster_tsv_folder = clustering_out / run_id\n",
760-
" \n",
751+
"\n",
761752
"clus.process_fasta_and_clusters(\n",
762-
" fasta_file=str(fasta_input),\n",
763-
" input_folder=str(scaffolds_folder_out),\n",
764-
" )"
753+
" fasta_file=str(fasta_input),\n",
754+
" input_folder=str(scaffolds_folder_out),\n",
755+
")"
765756
]
766757
},
767758
{
@@ -798,10 +789,10 @@
798789
"outputs": [],
799790
"source": [
800791
"cons.process_alignment_files(\n",
801-
" align_folder=str(alignment_out),\n",
802-
" output_folder=str(consensus_out),\n",
803-
" run_id=run_id,\n",
804-
" )"
792+
" align_folder=str(alignment_out),\n",
793+
" output_folder=str(consensus_out),\n",
794+
" run_id=run_id,\n",
795+
")"
805796
]
806797
}
807798
],

0 commit comments

Comments
 (0)