Skip to content

Commit f31aa3a

Browse files
committed
move mlogger.shutdown into global shutdown()
1 parent 0db9ed6 commit f31aa3a

File tree

2 files changed

+19
-5
lines changed

2 files changed

+19
-5
lines changed

apps/grpo/main.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -536,10 +536,6 @@ async def continuous_training():
536536

537537
training_task.cancel()
538538

539-
# give mlogger time to shutdown backends, otherwise they can stay running.
540-
# TODO (felipemello) find more elegant solution
541-
await mlogger.shutdown.call_one()
542-
await asyncio.sleep(2)
543539
await shutdown()
544540

545541

src/forge/controller/provisioner.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,25 @@ async def stop_proc_mesh(proc_mesh: ProcMesh):
498498
return await provisioner.stop_proc_mesh(proc_mesh=proc_mesh)
499499

500500

501+
async def shutdown_metric_logger():
502+
"""Shutdown the global metric logger and all its backends."""
503+
from forge.observability.metric_actors import get_or_create_metric_logger
504+
505+
logger.info("Shutting down metric logger...")
506+
try:
507+
mlogger = await get_or_create_metric_logger()
508+
await mlogger.shutdown.call_one()
509+
except Exception as e:
510+
logger.warning(f"Failed to shutdown metric logger: {e}")
511+
512+
501513
async def shutdown():
502514
logger.info("Shutting down provisioner..")
515+
516+
await shutdown_metric_logger()
517+
503518
provisioner = await _get_provisioner()
504-
return await provisioner.shutdown()
519+
result = await provisioner.shutdown()
520+
521+
logger.info("Shutdown completed successfully")
522+
return result

0 commit comments

Comments
 (0)