@@ -23,7 +23,7 @@ def load_gdsc1(
2323
2424 :param path_data: Path to the dataset.
2525 :param file_name: File name of the dataset.
26- :param measure: The name of the column containing the measure to predict, default = "LN_IC50 "
26+ :param measure: The name of the column containing the measure to predict, default = "LN_IC50_curvecurator "
2727
2828 :param dataset_name: Name of the dataset.
2929 :return: DrugResponseDataset containing response, cell line IDs, and drug IDs.
@@ -49,7 +49,7 @@ def load_gdsc2(path_data: str = "data", measure: str = "LN_IC50_curvecurator", f
4949
5050 :param path_data: Path to the dataset.
5151 :param file_name: File name of the dataset.
52- :param measure: The name of the column containing the measure to predict, default = "LN_IC50 "
52+ :param measure: The name of the column containing the measure to predict, default = "LN_IC50_curvecurator "
5353
5454 :return: DrugResponseDataset containing response, cell line IDs, and drug IDs.
5555 """
@@ -64,7 +64,7 @@ def load_ccle(
6464
6565 :param path_data: Path to the dataset.
6666 :param file_name: File name of the dataset.
67- :param measure: The name of the column containing the measure to predict, default = "LN_IC50 "
67+ :param measure: The name of the column containing the measure to predict, default = "LN_IC50_curvecurator "
6868
6969 :return: DrugResponseDataset containing response, cell line IDs, and drug IDs.
7070 """
@@ -84,17 +84,19 @@ def load_ccle(
8484 )
8585
8686
87- def load_toy (path_data : str = "data" , measure : str = "LN_IC50_curvecurator" ) -> DrugResponseDataset :
87+ def _load_toy (
88+ path_data : str = "data" , measure : str = "LN_IC50_curvecurator" , dataset_name = "TOYv1"
89+ ) -> DrugResponseDataset :
8890 """
89- Loads small Toy dataset, subsampled from GDSC1 .
91+ Loads small Toy dataset, subsampled from CTRPv2 or GDSC2 .
9092
9193 :param path_data: Path to the dataset.
92- :param measure: The name of the column containing the measure to predict, default = "response"
94+ :param measure: The name of the column containing the measure to predict, default = "LN_IC50_curvecurator"
95+ :param dataset_name: Name of the dataset. Either "TOYv1" or "TOYv2".
9396
9497 :return: DrugResponseDataset containing response, cell line IDs, and drug IDs.
9598 """
96- dataset_name = "Toy_Data"
97- path = os .path .join (path_data , dataset_name , "toy_data.csv" )
99+ path = os .path .join (path_data , dataset_name , f"{ dataset_name } .csv" )
98100 if not os .path .exists (path ):
99101 download_dataset (dataset_name , path_data , redownload = True )
100102 response_data = pd .read_csv (path , dtype = {"pubchem_id" : str })
@@ -107,13 +109,37 @@ def load_toy(path_data: str = "data", measure: str = "LN_IC50_curvecurator") ->
107109 )
108110
109111
112+ def load_toyv1 (path_data : str = "data" , measure : str = "LN_IC50_curvecurator" ) -> DrugResponseDataset :
113+ """
114+ Loads small Toy dataset, subsampled from CTRPv2.
115+
116+ :param path_data: Path to the dataset.
117+ :param measure: The name of the column containing the measure to predict, default = "LN_IC50_curvecurator"
118+
119+ :return: DrugResponseDataset containing response, cell line IDs, and drug IDs.
120+ """
121+ return _load_toy (path_data , measure , "TOYv1" )
122+
123+
124+ def load_toyv2 (path_data : str = "data" , measure : str = "LN_IC50_curvecurator" ) -> DrugResponseDataset :
125+ """
126+ Loads small Toy dataset, subsampled from GDSC2. Can be used to test cross study prediction.
127+
128+ :param path_data: Path to the dataset.
129+ :param measure: The name of the column containing the measure to predict, default = "LN_IC50_curvecurator"
130+
131+ :return: DrugResponseDataset containing response, cell line IDs, and drug IDs.
132+ """
133+ return _load_toy (path_data , measure , "TOYv2" )
134+
135+
110136def _load_ctrpv (version : str , path_data : str = "data" , measure : str = "LN_IC50_curvecurator" ) -> DrugResponseDataset :
111137 """
112138 Load CTRPv1 dataset.
113139
114140 :param version: The version of the CTRP dataset to load.
115141 :param path_data: Path to location of CTRPv1 dataset
116- :param measure: The name of the column containing the measure to predict, default = "response "
142+ :param measure: The name of the column containing the measure to predict, default = "LN_IC50_curvecurator "
117143
118144 :return: DrugResponseDataset containing response, cell line IDs, and drug IDs
119145 """
@@ -171,7 +197,8 @@ def load_custom(path_data: str | Path, measure: str = "response") -> DrugRespons
171197 "GDSC1" : load_gdsc1 ,
172198 "GDSC2" : load_gdsc2 ,
173199 "CCLE" : load_ccle ,
174- "Toy_Data" : load_toy ,
200+ "TOYv1" : load_toyv1 ,
201+ "TOYv2" : load_toyv2 ,
175202 "CTRPv1" : load_ctrpv1 ,
176203 "CTRPv2" : load_ctrpv2 ,
177204}
@@ -184,7 +211,7 @@ def load_dataset(
184211 """
185212 Load a dataset based on the dataset name.
186213
187- :param dataset_name: The name of the dataset to load. Can be one of ('GDSC1', 'GDSC2', 'CCLE', or 'Toy_Data ')
214+ :param dataset_name: The name of the dataset to load. Can be one of ('GDSC1', 'GDSC2', 'CCLE', 'TOYv1', or 'TOYv2 ')
188215 to download provided datasets, or any other name to allow for custom datasets.
189216 :param path_data: The parent path in which custom or downloaded datasets should be located, or in which raw
190217 viability data is to be found for fitting with CurveCurator (see param curve_curator for details).
0 commit comments