Skip to content

Commit 327a1ac

Browse files
Merge pull request #140 from daisybio/development
New version v1.2.2
2 parents a0a5140 + 59eaa69 commit 327a1ac

File tree

12 files changed

+364
-497
lines changed

12 files changed

+364
-497
lines changed

.github/workflows/run_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ jobs:
6666
print("::set-output name=result::{}".format(result))
6767
6868
- name: Restore pre-commit cache
69-
uses: actions/cache@v4.2.0
69+
uses: actions/cache@v4.2.1
7070
if: matrix.session == 'pre-commit'
7171
with:
7272
path: ~/.cache/pre-commit

docs/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,9 @@
5656
# the built documents.
5757
#
5858
# The short X.Y version.
59-
version = "1.2.1"
59+
version = "1.2.2"
6060
# The full version, including alpha/beta/rc tags.
61-
release = "1.2.1"
61+
release = "1.2.2"
6262

6363
# The language for content autogenerated by Sphinx. Refer to documentation
6464
# for a list of supported languages.

drevalpy/models/MOLIR/molir.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,11 @@ def load_cell_line_features(self, data_path: str, dataset_name: str) -> FeatureD
165165
feature_dataset = get_multiomics_feature_dataset(
166166
data_path=data_path,
167167
dataset_name=dataset_name,
168-
gene_list=None,
168+
gene_lists=None,
169169
omics=self.cell_line_views,
170170
)
171-
# log transformation
172-
feature_dataset.apply(function=np.log, view="gene_expression")
171+
# log transformation replaced with arcsinh transformation since log(0) is undefined
172+
feature_dataset.apply(function=np.arcsinh, view="gene_expression")
173173
return feature_dataset
174174

175175
def load_drug_features(self, data_path: str, dataset_name: str) -> FeatureDataset | None:

drevalpy/models/SimpleNeuralNetwork/multiomics_neural_network.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,14 @@ def load_cell_line_features(self, data_path: str, dataset_name: str) -> FeatureD
178178
:return: FeatureDataset containing the cell line omics features, filtered through the
179179
drug target genes
180180
"""
181-
return get_multiomics_feature_dataset(data_path=data_path, dataset_name=dataset_name)
181+
gene_lists = {
182+
"gene_expression": "drug_target_genes_all_drugs",
183+
"methylation": None,
184+
"mutations": "drug_target_genes_all_drugs",
185+
"copy_number_variation_gistic": "drug_target_genes_all_drugs",
186+
"proteomics": "drug_target_genes_all_drugs_proteomics",
187+
}
188+
return get_multiomics_feature_dataset(data_path=data_path, gene_lists=gene_lists, dataset_name=dataset_name)
182189

183190
def load_drug_features(self, data_path: str, dataset_name: str) -> FeatureDataset:
184191
"""

drevalpy/models/SuperFELTR/superfeltr.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,10 +250,10 @@ def load_cell_line_features(self, data_path: str, dataset_name: str) -> FeatureD
250250
:returns: FeatureDataset containing the cell line gene expression features, mutations, and copy number variation
251251
"""
252252
feature_dataset = get_multiomics_feature_dataset(
253-
data_path=data_path, dataset_name=dataset_name, gene_list=None, omics=self.cell_line_views
253+
data_path=data_path, dataset_name=dataset_name, gene_lists=None, omics=self.cell_line_views
254254
)
255255
# log transformation
256-
feature_dataset.apply(function=np.log, view="gene_expression")
256+
feature_dataset.apply(function=np.arcsinh, view="gene_expression")
257257
return feature_dataset
258258

259259
def load_drug_features(self, data_path: str, dataset_name: str) -> FeatureDataset | None:

drevalpy/models/baselines/multi_omics_random_forest.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,14 @@ def load_cell_line_features(self, data_path: str, dataset_name: str) -> FeatureD
5555
:returns: FeatureDataset containing the cell line omics features, filtered through the
5656
drug target genes
5757
"""
58-
return get_multiomics_feature_dataset(data_path=data_path, dataset_name=dataset_name)
58+
gene_lists = {
59+
"gene_expression": "drug_target_genes_all_drugs",
60+
"methylation": None,
61+
"mutations": "drug_target_genes_all_drugs",
62+
"copy_number_variation_gistic": "drug_target_genes_all_drugs",
63+
"proteomics": "drug_target_genes_all_drugs_proteomics",
64+
}
65+
return get_multiomics_feature_dataset(data_path=data_path, gene_lists=gene_lists, dataset_name=dataset_name)
5966

6067
def train(
6168
self,

drevalpy/models/utils.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,35 +139,43 @@ def load_drug_fingerprint_features(data_path: str, dataset_name: str, default_ra
139139
def get_multiomics_feature_dataset(
140140
data_path: str,
141141
dataset_name: str,
142-
gene_list: str | None = "drug_target_genes_all_drugs",
142+
gene_lists: dict | None = None,
143143
omics: list[str] | None = None,
144144
) -> FeatureDataset:
145145
"""
146146
Get multiomics feature dataset for the given list of OMICs.
147147
148148
:param data_path: path to the data, e.g., data/
149149
:param dataset_name: name of the dataset, e.g., GDSC2
150-
:param gene_list: list of genes to include, e.g., landmark_genes
150+
:param gene_lists: dictionary of names of lists of genes to include, for each omics type,
151+
e.g., {"gene_expression": "landmark_genes"}, if None, all features are not reduced
151152
:param omics: list of omics to include, e.g., ["gene_expression", "methylation"]
152153
:returns: FeatureDataset with the multiomics features
153154
:raises ValueError: if no omics features are found
154155
"""
155156
if omics is None:
156157
omics = ["gene_expression", "methylation", "mutations", "copy_number_variation_gistic", "proteomics"]
158+
159+
if gene_lists is None:
160+
gene_lists = {o: None for o in omics}
161+
162+
if not np.all([k in omics for k in gene_lists.keys()]):
163+
raise ValueError("Gene lists must be provided for all omics types.")
164+
157165
feature_dataset = None
158166
for omic in omics:
159167
if feature_dataset is None:
160168
feature_dataset = load_and_reduce_gene_features(
161169
feature_type=omic,
162-
gene_list=None if omic == "methylation" else gene_list,
170+
gene_list=gene_lists[omic],
163171
data_path=data_path,
164172
dataset_name=dataset_name,
165173
)
166174
else:
167175
feature_dataset.add_features(
168176
load_and_reduce_gene_features(
169177
feature_type=omic,
170-
gene_list=None if omic == "methylation" else gene_list,
178+
gene_list=gene_lists[omic],
171179
data_path=data_path,
172180
dataset_name=dataset_name,
173181
)

0 commit comments

Comments
 (0)