@@ -149,39 +149,22 @@ def getitem(self, idx):
149149
150150 return frame
151151
152- def prepare (self ):
153- """Prepare the dataset. The human crowd dataset has a lot of huge 4k images that drasticly slow down
154- the training. To be more effective, this method will go through all images from the dataset and will
155- save a new version of the dataset under `{self.dataset_dir_prepared}`. Once the dataset is prepared,
156- the path to the dir in /.aloception/alodataset_config.json will be replace by the new prepared one.
157-
158- Notes
159- -----
160- If the dataset is already prepared, this method will simply check that all file
161- are prepared and stored into the prepared folder. Otherwise, if the original directory is no longer
162- on the disk, the method will simply use the prepared dir as it is and the prepare step will be skiped.
163- """
152+ def _prepare (self , img_folder , ann_file , dataset_dir , idx ):
164153 from alodataset import transforms as T
165154
166- if self .sample is not None : # Nothing to do. Samples are ready
167- return
168-
169- if self .dataset_dir .endswith ("_prepared" ) and not os .path .exists (self .dataset_dir .replace ("_prepared" , "" )):
170- return
171-
172- dataset_dir_name = os .path .basename (os .path .normpath (self .dataset_dir ))
155+ dataset_dir_name = os .path .basename (os .path .normpath (dataset_dir ))
173156 if "_prepared" not in dataset_dir_name :
174157 wip_dir = f".wip_{ dataset_dir_name } _prepared"
175158 prepared_dir = f"{ dataset_dir_name } _prepared"
176- img_folder = self . img_folder
177- ann_file = self . ann_file
159+ img_folder = img_folder
160+ ann_file = ann_file
178161 else :
179162 wip_dir = f".wip_{ dataset_dir_name } "
180163 prepared_dir = dataset_dir_name
181- img_folder = os .path .join (self . dataset_dir .replace ("_prepared" , "" ), self . _img_folder , "Images" )
182- ann_file = os .path .join (self . dataset_dir .replace ("_prepared" , "" ), self . _ann_file )
164+ img_folder = os .path .join (dataset_dir .replace ("_prepared" , "" ), img_folder , "Images" )
165+ ann_file = os .path .join (dataset_dir .replace ("_prepared" , "" ), ann_file )
183166
184- base_datadir = Path (os .path .normpath (self . dataset_dir )).parent
167+ base_datadir = Path (os .path .normpath (dataset_dir )).parent
185168
186169 # Setup a new directory to work with to prepare the dataset
187170 n_wip_dir = os .path .join (base_datadir , wip_dir )
@@ -194,7 +177,7 @@ def prepare(self):
194177 if not os .path .exists (n_wip_dir ):
195178 os .makedirs (n_wip_dir )
196179
197- p = Path (self . dataset_dir )
180+ p = Path (dataset_dir )
198181 p_parts = list (p .parts )
199182 p_parts [p_parts .index (dataset_dir_name )] = wip_dir
200183
@@ -236,7 +219,7 @@ def prepare(self):
236219
237220 if not os .path .exists (tgt_ann_file ) and not os .path .exists (final_tgt_ann_file ):
238221 # Write back the file with all boxes in relative position instead of absolute.
239- content = self .load_json_lines (ann_file )
222+ content = self .load_json_lines (ann_file , idx )
240223 nb_line = len (content )
241224 for c in range (len (content )):
242225 line = content [c ]
@@ -270,16 +253,48 @@ def prepare(self):
270253
271254 print ("Preparing dataset: Moving the whole structure into the final prepared directory (if needed)" )
272255 fs .move_and_replace (n_wip_dir , prepared_dir )
273-
274256 self .set_dataset_dir (prepared_dir )
275- self .ann_file = final_tgt_ann_file
276- self .img_folder = final_tgt_image_dir
277- self .items = self .load_json_lines (self .ann_file )
257+
258+ return final_tgt_image_dir , final_tgt_ann_file
259+
260+ def prepare (self ):
261+ """Prepare the dataset. The human crowd dataset has a lot of huge 4k images that drasticly slow down
262+ the training. To be more effective, this method will go through all images from the dataset and will
263+ save a new version of the dataset under `{self.dataset_dir_prepared}`. Once the dataset is prepared,
264+ the path to the dir in /.aloception/alodataset_config.json will be replace by the new prepared one.
265+
266+ Notes
267+ -----
268+ If the dataset is already prepared, this method will simply check that all file
269+ are prepared and stored into the prepared folder. Otherwise, if the original directory is no longer
270+ on the disk, the method will simply use the prepared dir as it is and the prepare step will be skiped.
271+ """
272+ if self .sample is not None and self .sample is not False : # Nothing to do. Samples are ready
273+ return
274+
275+ if self .dataset_dir .endswith ("_prepared" ) and not os .path .exists (self .dataset_dir .replace ("_prepared" , "" )):
276+ return
277+
278+ dataset_dir = self .dataset_dir
279+ dataset_dir_name = os .path .basename (os .path .normpath (self .dataset_dir ))
280+ for idx , (img_folder , ann_file ) in enumerate (zip (self .img_folder , self .ann_file )):
281+ if "_prepared" not in dataset_dir_name :
282+ n_img_folder , n_ann_file = self ._prepare (img_folder , ann_file , dataset_dir , idx )
283+ else :
284+ n_img_folder , n_ann_file = self ._prepare (self ._img_folder [idx ], self ._ann_file [idx ], dataset_dir , idx )
285+ self .img_folder [idx ] = n_img_folder
286+ self .ann_file [idx ] = n_ann_file
287+
288+ # Set back the items with the annotation files
289+ self .items = []
290+ for a , ann_file in enumerate (self .ann_file ):
291+ line = self .load_json_lines (ann_file , a )
292+ self .items += line
278293
279294
280295def main ():
281296 """Main"""
282- crowd_human_dataset = CrowdHumanDataset (sample = True )
297+ crowd_human_dataset = CrowdHumanDataset (img_folder = "CrowdHuman_train" , ann_file = "annotation_train.odgt" )
283298
284299 crowd_human_dataset .prepare ()
285300 for i , frames in enumerate (crowd_human_dataset .train_loader (batch_size = 2 , sampler = None , num_workers = 0 )):
0 commit comments