Skip to content

Commit 92fac09

Browse files
authored
Merge pull request #219 from henryleberre/master
2 parents c312cd7 + e2abac2 commit 92fac09

File tree

5 files changed

+30
-22
lines changed

5 files changed

+30
-22
lines changed

toolchain/mfc/args.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def add_common_arguments(p, mask = None):
6666
p.add_argument(f"--no-{target.name}", action="store_true", help=f"Do not build the {target.name} dependency. Use the system's instead.")
6767

6868
if "g" not in mask:
69-
p.add_argument("-g", "--gpus", nargs="+", type=int, default=[0], help="(GPU) List of GPU #s to use.")
69+
p.add_argument("-g", "--gpus", nargs="+", type=int, default=None, help="(Optional GPU override) List of GPU #s to use (environment default if unspecified).")
7070

7171
# === BUILD ===
7272
add_common_arguments(build, "g")

toolchain/mfc/run/engines.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -147,14 +147,16 @@ def run(self, targets: typing.List[MFCTarget]) -> None:
147147

148148
if not ARG("dry_run"):
149149
start_time = time.monotonic()
150+
151+
env = os.environ.copy()
152+
if ARG('gpus') is not None:
153+
env['CUDA_VISIBLE_DEVICES'] = ','.join([str(_) for _ in ARG('gpus')])
154+
150155
system(
151156
self.get_exec_cmd(target), cwd=self.input.case_dirpath,
152-
env={
153-
**os.environ.copy(),
154-
'CUDA_VISIBLE_DEVICES': ','.join([str(_) for _ in ARG('gpus')])
155-
}
157+
env=env
156158
)
157-
end_time = time.monotonic()
159+
end_time = time.monotonic()
158160
cons.print(no_indent=True)
159161

160162
cons.print(f"[bold green]Done[/bold green] (in {datetime.timedelta(seconds=end_time - start_time)})")
@@ -307,11 +309,11 @@ def __create_batch_file(self, system: queues.QueueSystem, targets: typing.List[M
307309
cons.print("> Writing batch file...")
308310
file_write(filepath, content)
309311

310-
def __execute_batch_file(self, system: queues.QueueSystem):
312+
def __execute_batch_file(self, queue: queues.QueueSystem):
311313
# We CD to the case directory before executing the batch file so that
312314
# any files the queue system generates (like .err and .out) are created
313315
# in the correct directory.
314-
cmd = system.gen_submit_cmd(self.__get_batch_filename())
316+
cmd = queue.gen_submit_cmd(self.__get_batch_filename())
315317

316318
if system(cmd, cwd=self.__get_batch_dirpath()) != 0:
317319
raise MFCException(f"Submitting batch file for {system.name} failed. It can be found here: {self.__get_batch_filepath()}. Please check the file for errors.")

toolchain/mfc/sched.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,11 @@ class Task:
3535
load: float
3636

3737

38-
def sched(tasks: typing.List[Task], nThreads: int, devices: typing.Set[int]) -> None:
38+
def sched(tasks: typing.List[Task], nThreads: int, devices: typing.Set[int] = None) -> None:
3939
nAvailable: int = nThreads
4040
threads: typing.List[WorkerThreadHolder] = []
4141

42-
sched.LOAD = { id: 0.0 for id in devices }
42+
sched.LOAD = { id: 0.0 for id in devices or [] }
4343

4444
def join_first_dead_thread(progress, complete_tracker) -> None:
4545
nonlocal threads, nAvailable
@@ -50,7 +50,7 @@ def join_first_dead_thread(progress, complete_tracker) -> None:
5050
raise threadHolder.thread.exc
5151

5252
nAvailable += threadHolder.ppn
53-
for device in threadHolder.devices:
53+
for device in threadHolder.devices or set():
5454
sched.LOAD[device] -= threadHolder.load / threadHolder.ppn
5555

5656
progress.advance(complete_tracker)
@@ -82,18 +82,21 @@ def join_first_dead_thread(progress, complete_tracker) -> None:
8282
# Launch Thread
8383
progress.advance(queue_tracker)
8484

85+
use_devices = None
8586
# Use the least loaded devices
86-
devices = set()
87-
for _ in range(task.ppn):
88-
device = min(sched.LOAD.items(), key=lambda x: x[1])[0]
89-
sched.LOAD[device] += task.load / task.ppn
87+
if devices is not None:
88+
use_devices = set()
89+
for _ in range(task.ppn):
90+
device = min(sched.LOAD.items(), key=lambda x: x[1])[0]
91+
sched.LOAD[device] += task.load / task.ppn
92+
use_devices.add(device)
9093

9194
nAvailable -= task.ppn
9295

93-
thread = WorkerThread(target=task.func, args=tuple(task.args) + (devices,))
96+
thread = WorkerThread(target=task.func, args=tuple(task.args) + (use_devices,))
9497
thread.start()
9598

96-
threads.append(WorkerThreadHolder(thread, task.ppn, task.load, devices))
99+
threads.append(WorkerThreadHolder(thread, task.ppn, task.load, use_devices))
97100

98101

99102
# Wait for the lasts tests to complete

toolchain/mfc/test/case.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,11 @@ def __init__(self, trace: str, mods: dict, ppn: int = None) -> None:
100100
super().__init__({**BASE_CFG.copy(), **mods})
101101

102102
def run(self, targets: typing.List[str], gpus: typing.Set[int]) -> subprocess.CompletedProcess:
103-
gpu_select = f"CUDA_VISIBLE_DEVICES={','.join([str(_) for _ in gpus])}"
103+
if gpus is not None and len(gpus) != 0:
104+
gpus_select = f"--gpus {' '.join([str(_) for _ in gpus])}"
105+
else:
106+
gpus_select = ""
107+
104108
filepath = f'"{self.get_dirpath()}/case.py"'
105109
tasks = f"-n {self.ppn}"
106110
jobs = f"-j {ARG('jobs')}" if ARG("case_optimization") else ""
@@ -110,8 +114,9 @@ def run(self, targets: typing.List[str], gpus: typing.Set[int]) -> subprocess.Co
110114
mfc_script = ".\mfc.bat" if os.name == 'nt' else "./mfc.sh"
111115

112116
command: str = f'''\
113-
{gpu_select} {mfc_script} run {filepath} {tasks} {binary_option} \
114-
{case_optimization} {jobs} -t {' '.join(targets)} 2>&1\
117+
{mfc_script} run {filepath} {tasks} {binary_option} \
118+
{case_optimization} {jobs} -t {' '.join(targets)} \
119+
{gpus_select} 2>&1\
115120
'''
116121

117122
return subprocess.run(command, stdout=subprocess.PIPE,

toolchain/mfc/test/test.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,6 @@ def test():
105105
cons.print(f" tests/[bold magenta]UUID[/bold magenta] Summary")
106106
cons.print()
107107

108-
_handle_case.GPU_LOAD = { id: 0 for id in ARG("gpus") }
109-
110108
# Select the correct number of threads to use to launch test CASES
111109
# We can't use ARG("jobs") when the --case-optimization option is set
112110
# because running a test case may cause it to rebuild, and thus

0 commit comments

Comments
 (0)