Skip to content

Commit a397d11

Browse files
make data generation buffer_size configurable.
1 parent 3f28662 commit a397d11

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

dlio_benchmark/data_generator/indexed_binary_generator.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,9 @@ def generate(self):
5555
sample_size = dim1 * dim2
5656
total_size = sample_size * self.num_samples
5757
write_size = total_size
58-
MEMORY_SIZE = 2*GB
59-
if total_size > MEMORY_SIZE:
60-
write_size = MEMORY_SIZE - (MEMORY_SIZE % sample_size)
58+
memory_size = self._args.generation_buffer_size
59+
if total_size > memory_size:
60+
write_size = memory_size - (memory_size % sample_size)
6161
out_path_spec = self.storage.get_uri(self._file_list[i])
6262
out_path_spec_off_idx = self.index_file_path_off(out_path_spec)
6363
out_path_spec_sz_idx = self.index_file_path_size(out_path_spec)

dlio_benchmark/utils/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ class ConfigArguments:
9696
do_eval: bool = False
9797
batch_size_eval: int = 1
9898
num_files_eval: int = 0
99+
generation_buffer_size: int = 2 * 1073741824 # 2 GB
99100
eval_time: float = 0.0
100101
eval_time_stdev: float = 0.0
101102
eval_after_epoch: int = 1
@@ -348,6 +349,8 @@ def LoadConfig(args, config):
348349
args.num_files_train = config['dataset']['num_files_train']
349350
if 'num_files_eval' in config['dataset']:
350351
args.num_files_eval = config['dataset']['num_files_eval']
352+
if 'generation_buffer_size' in config['dataset']:
353+
args.generation_buffer_size = config['dataset']['generation_buffer_size']
351354
if 'num_samples_per_file' in config['dataset']:
352355
args.num_samples_per_file = config['dataset']['num_samples_per_file']
353356
if 'data_folder' in config['dataset']:

0 commit comments

Comments
 (0)