Skip to content

Commit 0720984

Browse files
authored
Reopen dlio.log in non-fork reader_threads child processes (#130)
When reader_threads child processes are started with multiprocessing_context set to either spawn or forkserver, the new child processes do not have the dlio.log file open any longer. Change worker_init() to reopen the dlio.log file in those cases. When the context is set to fork, the dlio.log file remains open in the child process and does not have to be reopened.
1 parent 5acbf60 commit 0720984

File tree

3 files changed

+32
-21
lines changed

3 files changed

+32
-21
lines changed

dlio_benchmark/data_loader/torch_data_loader.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ def __init__(self, format_type, dataset_type, epoch, num_samples, num_workers, b
5454
@dlp.log
5555
def worker_init(self, worker_id):
5656
pickle.loads(self.serial_args)
57+
_args = ConfigArguments.get_instance()
58+
_args.configure_dlio_logging(True)
5759
logging.debug(f"{utcnow()} worker initialized {worker_id} with format {self.format_type}")
5860
self.reader = ReaderFactory.get_reader(type=self.format_type,
5961
dataset_type=self.dataset_type,

dlio_benchmark/main.py

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -73,15 +73,8 @@ def __init__(self, cfg):
7373
self.storage = StorageFactory().get_storage(self.args.storage_type, self.args.storage_root,
7474
self.args.framework)
7575

76-
if self.args.output_folder is None:
77-
try:
78-
hydra_cfg = hydra.core.hydra_config.HydraConfig.get()
79-
self.args.output_folder = hydra_cfg['runtime']['output_dir']
80-
except:
81-
self.args.output_folder = 'output/'
8276
self.output_folder = self.args.output_folder
8377
os.makedirs(self.args.output_folder, mode=0o755, exist_ok=True)
84-
self.logfile = os.path.join(self.args.output_folder, self.args.log_file)
8578
dlp_trace = get_trace_name(self.args.output_folder)
8679
self.comm = DLIOMPI.get_instance().comm()
8780
self.my_rank = self.args.my_rank = DLIOMPI.get_instance().rank()
@@ -99,21 +92,11 @@ def __init__(self, cfg):
9992

10093
# Delete previous logfile
10194
if self.my_rank == 0:
102-
if os.path.isfile(self.logfile):
103-
os.remove(self.logfile)
95+
if os.path.isfile(self.args.logfile_path):
96+
os.remove(self.args.logfile_path)
10497
self.comm.barrier()
10598
# Configure the logging library
106-
log_level = logging.DEBUG if self.args.debug else logging.INFO
107-
logging.basicConfig(
108-
level=log_level,
109-
force=True,
110-
handlers=[
111-
logging.FileHandler(self.logfile, mode="a", encoding='utf-8'),
112-
logging.StreamHandler()
113-
],
114-
format='[%(levelname)s] %(message)s [%(pathname)s:%(lineno)d]'
115-
# logging's max timestamp resolution is msecs, we will pass in usecs in the message
116-
)
99+
self.args.configure_dlio_logging(False)
117100
if self.args.my_rank == 0:
118101
logging.info(f"{utcnow()} Profiling DLIO {dlp_trace}")
119102
logging.info(f"{utcnow()} Running DLIO with {self.args.comm_size} process(es)")

dlio_benchmark/utils/config.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
"""
1717
import importlib
1818
import inspect
19+
import hydra
1920

2021
import logging
2122
from time import time
@@ -150,6 +151,23 @@ def get_instance():
150151
ConfigArguments()
151152
return ConfigArguments.__instance
152153

154+
def configure_dlio_logging(self, is_child=False):
155+
# with "multiprocessing_context=fork" the log file remains open in the child process
156+
if is_child and self.multiprocessing_context == "fork":
157+
return
158+
# Configure the logging library
159+
log_level = logging.DEBUG if self.debug else logging.INFO
160+
logging.basicConfig(
161+
level=log_level,
162+
force=True,
163+
handlers=[
164+
logging.FileHandler(self.logfile_path, mode="a", encoding='utf-8'),
165+
logging.StreamHandler()
166+
],
167+
format='[%(levelname)s] %(message)s [%(pathname)s:%(lineno)d]'
168+
# logging's max timestamp resolution is msecs, we will pass in usecs in the message
169+
)
170+
153171
@dlp.log
154172
def validate(self):
155173
""" validate whether the parameters are set correctly"""
@@ -438,7 +456,15 @@ def LoadConfig(args, config):
438456
args.output_folder = config['output']['folder']
439457
if 'log_file' in config['output']:
440458
args.log_file = config['output']['log_file']
441-
459+
460+
if args.output_folder is None:
461+
try:
462+
hydra_cfg = hydra.core.hydra_config.HydraConfig.get()
463+
args.output_folder = hydra_cfg['runtime']['output_dir']
464+
except:
465+
args.output_folder = 'output/'
466+
args.logfile_path = os.path.join(args.output_folder, args.log_file)
467+
442468
if 'workflow' in config:
443469
if 'generate_data' in config['workflow']:
444470
args.generate_data = config['workflow']['generate_data']

0 commit comments

Comments
 (0)