@@ -44,21 +44,21 @@ def test_load_sc_datasets(tmp_path, test_directory_feat_ids):
4444 tokenizer = MagicMock ()
4545 sc_memmap_dataset_path0 = tmp_path / "test_data_0"
4646 ds_0 = SingleCellMemMapDataset (
47- sc_memmap_dataset_path0 , h5ad_path = test_directory_feat_ids / "adata_sample0.h5ad"
47+ str ( sc_memmap_dataset_path0 ) , h5ad_path = str ( test_directory_feat_ids / "adata_sample0.h5ad" )
4848 ) # create the memmap dataset format from h5ad for testing purposes
49- dataset0 = SingleCellDataset (sc_memmap_dataset_path0 , tokenizer )
49+ dataset0 = SingleCellDataset (str ( sc_memmap_dataset_path0 ) , tokenizer )
5050 assert len (dataset0 ) == len (ds_0 ) == 8
5151 sc_memmap_dataset_path1 = tmp_path / "test_data_1"
5252 ds_1 = SingleCellMemMapDataset (
53- sc_memmap_dataset_path1 , h5ad_path = test_directory_feat_ids / "adata_sample1.h5ad"
53+ str ( sc_memmap_dataset_path1 ) , h5ad_path = str ( test_directory_feat_ids / "adata_sample1.h5ad" )
5454 ) # create the memmap dataset format from h5ad for testing purposes
55- dataset1 = SingleCellDataset (sc_memmap_dataset_path1 , tokenizer )
55+ dataset1 = SingleCellDataset (str ( sc_memmap_dataset_path1 ) , tokenizer )
5656 assert len (dataset1 ) == len (ds_1 ) == 6
5757 sc_memmap_dataset_path2 = tmp_path / "test_data_2"
5858 ds_2 = SingleCellMemMapDataset (
59- sc_memmap_dataset_path2 , h5ad_path = test_directory_feat_ids / "adata_sample2.h5ad"
59+ str ( sc_memmap_dataset_path2 ) , h5ad_path = str ( test_directory_feat_ids / "adata_sample2.h5ad" )
6060 ) # create the memmap dataset format from h5ad for testing purposes
61- dataset2 = SingleCellDataset (sc_memmap_dataset_path2 , tokenizer )
61+ dataset2 = SingleCellDataset (str ( sc_memmap_dataset_path2 ) , tokenizer )
6262 assert len (dataset2 ) == len (ds_2 ) == 100
6363
6464
@@ -82,12 +82,12 @@ def test_gene_not_in_tok_vocab(tmp_path, test_directory_feat_ids):
8282 adata .var ["feature_id" ] = synthetic_ids
8383 adata .write (sc_h5ad_dataset_path0 )
8484 SingleCellMemMapDataset (
85- sc_memmap_dataset_path0 , h5ad_path = sc_h5ad_dataset_path0
85+ str ( sc_memmap_dataset_path0 ) , h5ad_path = str ( sc_h5ad_dataset_path0 )
8686 ) # create the memmap dataset format from h5ad for testing purposes
8787 preprocessor = GeneformerPreprocess (
88- download_directory = sc_memmap_dataset_path0 ,
89- medians_file_path = sc_memmap_dataset_path0 / "medians.json" ,
90- tokenizer_vocab_path = sc_memmap_dataset_path0 / "geneformer.vocab" ,
88+ download_directory = str ( sc_memmap_dataset_path0 ) ,
89+ medians_file_path = str ( sc_memmap_dataset_path0 / "medians.json" ) ,
90+ tokenizer_vocab_path = str ( sc_memmap_dataset_path0 / "geneformer.vocab" ) ,
9191 )
9292 match preprocessor .preprocess ():
9393 case {"tokenizer" : tokenizer , "median_dict" : median_dict }:
@@ -96,14 +96,14 @@ def test_gene_not_in_tok_vocab(tmp_path, test_directory_feat_ids):
9696 logging .error ("Preprocessing failed." )
9797
9898 dataset0 = SingleCellDataset (
99- sc_memmap_dataset_path0 , tokenizer , median_dict = median_dict , include_unrecognized_vocab_in_dataset = True
99+ str ( sc_memmap_dataset_path0 ) , tokenizer , median_dict = median_dict , include_unrecognized_vocab_in_dataset = True
100100 ) # type: ignore
101101 index = EpochIndex (epoch = 0 , idx = 3 )
102102 with pytest .raises (ValueError ) as error_info :
103103 dataset0 .__getitem__ (index )
104104 assert "not in the tokenizer vocab." in str (error_info .value )
105105 dataset0 = SingleCellDataset (
106- sc_memmap_dataset_path0 ,
106+ str ( sc_memmap_dataset_path0 ) ,
107107 tokenizer ,
108108 median_dict = median_dict ,
109109 ) # type: ignore
@@ -115,12 +115,12 @@ def test_gene_not_in_tok_vocab(tmp_path, test_directory_feat_ids):
115115def test_empty_gene_data_input (tmp_path , test_directory_feat_ids ):
116116 sc_memmap_dataset_path0 = tmp_path / "test_data_0"
117117 SingleCellMemMapDataset (
118- sc_memmap_dataset_path0 , h5ad_path = test_directory_feat_ids / "adata_sample0.h5ad"
118+ str ( sc_memmap_dataset_path0 ) , h5ad_path = str ( test_directory_feat_ids / "adata_sample0.h5ad" )
119119 ) # create the memmap dataset format from h5ad for testing purposes
120120 preprocessor = GeneformerPreprocess (
121- download_directory = sc_memmap_dataset_path0 ,
122- medians_file_path = sc_memmap_dataset_path0 / "medians.json" ,
123- tokenizer_vocab_path = sc_memmap_dataset_path0 / "geneformer.vocab" ,
121+ download_directory = str ( sc_memmap_dataset_path0 ) ,
122+ medians_file_path = str ( sc_memmap_dataset_path0 / "medians.json" ) ,
123+ tokenizer_vocab_path = str ( sc_memmap_dataset_path0 / "geneformer.vocab" ) ,
124124 )
125125 match preprocessor .preprocess ():
126126 case {"tokenizer" : tokenizer , "median_dict" : median_dict }:
@@ -139,7 +139,7 @@ def test_empty_gene_data_input(tmp_path, test_directory_feat_ids):
139139
140140def test_lookup_row (tmp_path , cellx_small_directory ):
141141 tokenizer = MagicMock ()
142- dataset = SingleCellDataset (tmp_path / cellx_small_directory / "val" , tokenizer )
142+ dataset = SingleCellDataset (str ( tmp_path / cellx_small_directory / "val" ) , tokenizer )
143143 values , feature_ids = dataset .scdl .get_row (0 , return_features = True , feature_vars = ["feature_id" ])
144144 gene_data , col_idxs = values [0 ], values [1 ]
145145 assert len (gene_data ) == 440
@@ -169,7 +169,7 @@ def test_get_item_synthetic(tmp_path, test_directory_feat_ids):
169169 case _:
170170 logging .error ("Preprocessing failed." )
171171 dataset0 = SingleCellDataset (
172- sc_memmap_dataset_path0 ,
172+ str ( sc_memmap_dataset_path0 ) ,
173173 tokenizer ,
174174 median_dict = median_dict ,
175175 mask_token_prob = 0 ,
@@ -188,17 +188,17 @@ def test_get_item_synthetic(tmp_path, test_directory_feat_ids):
188188
189189def test_GeneformerDataset_changes_with_epoch (tmp_path , cellx_small_directory ):
190190 preprocessor = GeneformerPreprocess (
191- download_directory = tmp_path / cellx_small_directory / "val" ,
192- medians_file_path = tmp_path / cellx_small_directory / "val" / "medians.json" ,
193- tokenizer_vocab_path = tmp_path / cellx_small_directory / "val" / "geneformer.vocab" ,
191+ download_directory = str ( tmp_path / cellx_small_directory / "val" ) ,
192+ medians_file_path = str ( tmp_path / cellx_small_directory / "val" / "medians.json" ) ,
193+ tokenizer_vocab_path = str ( tmp_path / cellx_small_directory / "val" / "geneformer.vocab" ) ,
194194 )
195195 match preprocessor .preprocess ():
196196 case {"tokenizer" : tokenizer , "median_dict" : median_dict }:
197197 logging .info ("*************** Preprocessing Finished ************" )
198198 case _:
199199 logging .error ("Preprocessing failed." )
200200 genformer_ds = SingleCellDataset (
201- tmp_path / cellx_small_directory / "val" ,
201+ str ( tmp_path / cellx_small_directory / "val" ) ,
202202 tokenizer , # type: ignore
203203 median_dict = median_dict , # type: ignore
204204 ) # type: ignore
@@ -212,17 +212,17 @@ def test_GeneformerDataset_changes_with_epoch(tmp_path, cellx_small_directory):
212212
213213def test_get_item_cellx (tmp_path , cellx_small_directory ):
214214 preprocessor = GeneformerPreprocess (
215- download_directory = tmp_path / cellx_small_directory / "val" ,
216- medians_file_path = tmp_path / cellx_small_directory / "val" / "medians.json" ,
217- tokenizer_vocab_path = tmp_path / cellx_small_directory / "val" / "geneformer.vocab" ,
215+ download_directory = str ( tmp_path / cellx_small_directory / "val" ) ,
216+ medians_file_path = str ( tmp_path / cellx_small_directory / "val" / "medians.json" ) ,
217+ tokenizer_vocab_path = str ( tmp_path / cellx_small_directory / "val" / "geneformer.vocab" ) ,
218218 )
219219 match preprocessor .preprocess ():
220220 case {"tokenizer" : tokenizer , "median_dict" : median_dict }:
221221 logging .info ("*************** Preprocessing Finished ************" )
222222 case _:
223223 logging .error ("Preprocessing failed." )
224224 ds = SingleCellDataset (
225- tmp_path / cellx_small_directory / "val" ,
225+ str ( tmp_path / cellx_small_directory / "val" ) ,
226226 tokenizer , # type: ignore
227227 median_dict = median_dict , # type: ignore
228228 mask_prob = 0 ,
0 commit comments