@@ -47,6 +47,16 @@ def dataloader_by_name(readerclass,
4747
4848 files .sort ()
4949
50+ # for local cluster: discard some files if files cannot be divided equally between GPUs
51+ if (context ["device" ] == "GPU" ):
52+ selected_gpu_nums = int (os .getenv ("PADDLEREC_GPU_NUMS" ))
53+ discard_file_nums = len (files ) % selected_gpu_nums
54+ if (discard_file_nums != 0 ):
55+ print (
56+ "Warning: beacause files cannot be divided equally between GPUs,discard these files:{}" .
57+ format (files [- discard_file_nums :]))
58+ files = files [:len (files ) - discard_file_nums ]
59+
5060 need_split_files = False
5161 if context ["engine" ] == EngineMode .LOCAL_CLUSTER :
5262 # for local cluster: split files for multi process
@@ -109,6 +119,16 @@ def slotdataloader_by_name(readerclass, dataset_name, yaml_file, context):
109119
110120 files .sort ()
111121
122+ # for local cluster: discard some files if files cannot be divided equally between GPUs
123+ if (context ["device" ] == "GPU" ):
124+ selected_gpu_nums = int (os .getenv ("PADDLEREC_GPU_NUMS" ))
125+ discard_file_nums = len (files ) % selected_gpu_nums
126+ if (discard_file_nums != 0 ):
127+ print (
128+ "Warning: beacause files cannot be divided equally between GPUs, discard these files:{}" .
129+ format (files [- discard_file_nums :]))
130+ files = files [:len (files ) - discard_file_nums ]
131+
112132 need_split_files = False
113133 if context ["engine" ] == EngineMode .LOCAL_CLUSTER :
114134 # for local cluster: split files for multi process
@@ -179,6 +199,16 @@ def slotdataloader(readerclass, train, yaml_file, context):
179199
180200 files .sort ()
181201
202+ # for local cluster: discard some files if files cannot be divided equally between GPUs
203+ if (context ["device" ] == "GPU" ):
204+ selected_gpu_nums = int (os .getenv ("PADDLEREC_GPU_NUMS" ))
205+ discard_file_nums = len (files ) % selected_gpu_nums
206+ if (discard_file_nums != 0 ):
207+ print (
208+ "Warning: beacause files cannot be divided equally between GPUs,discard these files:{}" .
209+ format (files [- discard_file_nums :]))
210+ files = files [:len (files ) - discard_file_nums ]
211+
182212 need_split_files = False
183213 if context ["engine" ] == EngineMode .LOCAL_CLUSTER :
184214 # for local cluster: split files for multi process
0 commit comments