Skip to content

Commit c6325c5

Browse files
committed
improve distributed logging
Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
1 parent abaca04 commit c6325c5

File tree

3 files changed

+17
-26
lines changed

3 files changed

+17
-26
lines changed

src/llmcompressor/logger.py

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
99
- `LLM_COMPRESSOR_LOG_DISABLED`: Disable logging.
1010
Default: `False`.
11-
- `LLM_COMPRESSOR_CLEAR_LOGGERS`: Clear existing loggers from loguru.
12-
Default: `True`.
1311
- `LLM_COMPRESSOR_LOG_LEVEL`: Log level for console logging.
1412
Default: `None`. Options: `DEBUG`, `INFO`, `WARNING`, `ERROR`, `CRITICAL`.
1513
- `LLM_COMPRESSOR_LOG_FILE`: Path to the log file for file logging.
@@ -25,7 +23,6 @@
2523
configure_logger(
2624
config=LoggerConfig(
2725
disabled=False,
28-
clear_loggers=True,
2926
console_log_level="DEBUG",
3027
log_file=None,
3128
log_file_level=None,
@@ -65,9 +62,7 @@ class LoggerConfig:
6562
LOGGER_CONFIG = LoggerConfig()
6663

6764

68-
def configure_logger(
69-
logger_config: LoggerConfig = LOGGER_CONFIG, clear_loggers: bool = False
70-
):
65+
def configure_logger(logger_config: LoggerConfig = LOGGER_CONFIG):
7166
"""
7267
Configure the logger for LLM Compressor.
7368
@@ -97,8 +92,12 @@ def configure_logger(
9792
logger.enable("llmcompressor")
9893

9994
# reset logger configuration
100-
if clear_loggers:
101-
logger.remove()
95+
logger.remove()
96+
97+
# initialize metric logger on loguru
98+
logger_levels = logger._core.levels.keys()
99+
if not logger_config.metrics_disabled and "METRIC" not in logger_levels:
100+
logger.level("METRIC", no=38, color="<yellow>", icon="📈")
102101

103102
# set format (optionally adding rank)
104103
format = "{time:YYYY-MM-DDTHH:mm:ss.SSSS} | {function} | {level} - {message}"
@@ -107,7 +106,6 @@ def configure_logger(
107106
format = "[Rank {extra[rank]}] " + format
108107

109108
if logger_config.console_log_level:
110-
# log as a human readable string with the time, function, level, and message
111109
logger.add(
112110
sys.stdout,
113111
level=logger_config.console_log_level.upper(),
@@ -123,16 +121,9 @@ def configure_logger(
123121
log_file,
124122
level=log_file_level.upper(),
125123
serialize=True,
126-
format=format,
127124
filter=support_log_once,
128125
)
129126

130-
if logger_config.metrics_disabled or "METRIC" in logger._core.levels.keys():
131-
return
132-
133-
# initialize metric logger on loguru
134-
logger.level("METRIC", no=38, color="<yellow>", icon="📈")
135-
136127
# set global value for later calls
137128
global LOGGER_CONFIG
138129
LOGGER_CONFIG = logger_config
@@ -164,7 +155,7 @@ def support_log_once(record: Dict[str, Any]) -> bool:
164155

165156
def configure_distributed_logger(logger_config: LoggerConfig = LOGGER_CONFIG):
166157
logger_config.rank = dist.get_rank()
167-
configure_logger(logger_config, clear_loggers=True)
158+
configure_logger(logger_config)
168159

169160

170161
# invoke logger setup on import with default values enabling console logging with INFO

src/llmcompressor/utils/metric_logging.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,14 @@ def __exit__(self, _exc_type, _exc_val, _exc_tb):
5454
patch.log("METRIC", f"error {self._loss:.2f}")
5555

5656
for device_id in _get_visible_devices():
57-
used_memory = torch.cuda.max_memory_allocated(device_id)
58-
max_memory = torch.cuda.get_device_properties(device_id).total_memory
57+
used_memory = torch.cuda.max_memory_allocated(device_id) / 1e9
58+
max_memory = torch.cuda.get_device_properties(device_id).total_memory / 1e9
5959
perc_used = 100 * used_memory / max_memory
6060
patch.log(
6161
"METRIC",
6262
(
6363
f"GPU {device_id} | usage: {perc_used:.2f}%"
64-
f" | total memory: {max_memory:.1f} GB"
64+
f" | total memory: {max_memory:.1f} Gb"
6565
),
6666
)
6767

tests/unit/test_logger.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def test_default_logger_settings(capsys):
2626
def test_configure_logger_console_settings(capsys):
2727
# Test configuring the logger to change console log level
2828
config = LoggerConfig(console_log_level="DEBUG")
29-
configure_logger(config=config)
29+
configure_logger(config)
3030
logger.info("Info message")
3131
logger.debug("Debug message")
3232

@@ -39,7 +39,7 @@ def test_configure_logger_file_settings(tmp_path):
3939
# Test configuring the logger to log to a file
4040
log_file = tmp_path / "test.log"
4141
config = LoggerConfig(log_file=str(log_file), log_file_level="DEBUG")
42-
configure_logger(config=config)
42+
configure_logger(config)
4343
logger.info("Info message")
4444
logger.debug("Debug message")
4545

@@ -55,7 +55,7 @@ def test_configure_logger_console_and_file(capsys, tmp_path):
5555
config = LoggerConfig(
5656
console_log_level="ERROR", log_file=str(log_file), log_file_level="INFO"
5757
)
58-
configure_logger(config=config)
58+
configure_logger(config)
5959
logger.info("Info message")
6060
logger.error("Error message")
6161

@@ -75,7 +75,7 @@ def test_environment_variable_override(monkeypatch, capsys, tmp_path):
7575
monkeypatch.setenv("LLM_COMPRESSOR_LOG_FILE", str(tmp_path / "env_test.log"))
7676
monkeypatch.setenv("LLM_COMPRESSOR_LOG_FILE_LEVEL", "DEBUG")
7777

78-
configure_logger(config=LoggerConfig())
78+
configure_logger(LoggerConfig())
7979
logger.info("Info message")
8080
logger.error("Error message")
8181
logger.debug("Debug message")
@@ -96,7 +96,7 @@ def test_environment_variable_disable_logging(monkeypatch, capsys):
9696
# Test environment variable to disable logging
9797
monkeypatch.setenv("LLM_COMPRESSOR_LOG_DISABLED", "true")
9898

99-
configure_logger(config=LoggerConfig())
99+
configure_logger(LoggerConfig())
100100
logger.info("Info message")
101101
logger.error("Error message")
102102

@@ -109,7 +109,7 @@ def test_environment_variable_enable_logging(monkeypatch, capsys):
109109
# Test environment variable to enable logging
110110
monkeypatch.setenv("LLM_COMPRESSOR_LOG_DISABLED", "false")
111111

112-
configure_logger(config=LoggerConfig())
112+
configure_logger(LoggerConfig())
113113
logger.info("Info message")
114114
logger.error("Error message")
115115

0 commit comments

Comments
 (0)