Skip to content

Commit e859a31

Browse files
authored
Refactor/logging (#98)
* allow setting random seed for a pipeline from python api * remove CLI * fix typing & default calue of OptimizationConfig * upd tests (remove outdated `force_multilabel` argument) * remove CLI tutorials * refactor configs to pydantic * fix typing * finally configure post initialization * upd docstrings * remove CLI from docs * remove hydra from poetry * remove omegaconf * add logging config, setup and formatter * fix custom log level issue * add logging to file by demand * fix typing * remove unnecessary formatter * add docs for `setup_logging` * add tutorial on logging * minor bugfix * minor bugfix * another attempt * fix codestyle
1 parent 0956f13 commit e859a31

File tree

8 files changed

+220
-45
lines changed

8 files changed

+220
-45
lines changed

autointent/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""This is AutoIntent API reference."""
22

3+
from ._logging import setup_logging
34
from ._ranker import Ranker
45
from ._embedder import Embedder
56
from ._vector_index import VectorIndex
@@ -8,5 +9,4 @@
89
from .context import Context
910
from ._pipeline import Pipeline
1011

11-
12-
__all__ = ["Context", "Dataset", "Embedder", "Hasher", "Pipeline", "Ranker", "VectorIndex"]
12+
__all__ = ["Context", "Dataset", "Embedder", "Hasher", "Pipeline", "Ranker", "VectorIndex", "setup_logging"]

autointent/_logging/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .setup import setup_logging
2+
3+
__all__ = ["setup_logging"]

autointent/_logging/config.yaml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
version: 1
2+
disable_existing_loggers: false
3+
formatters:
4+
simple:
5+
format: '%(levelname)s: %(message)s'
6+
datefmt: '%Y-%m-%dT%H:%M:%S%z'
7+
json:
8+
(): autointent._logging.formatter.JSONFormatter
9+
fmt_keys:
10+
level: levelname
11+
message: message
12+
timestamp: timestamp
13+
logger: name
14+
module: module
15+
function: funcName
16+
line: lineno
17+
thread_name: threadName
18+
handlers:
19+
stdout:
20+
class: logging.StreamHandler
21+
formatter: simple
22+
stream: ext://sys.stdout
23+
loggers:
24+
root:
25+
level: DEBUG
26+
handlers:
27+
- stdout

autointent/_logging/formatter.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import datetime as dt
2+
import json
3+
import logging
4+
from typing import Any
5+
6+
LOG_RECORD_BUILTIN_ATTRS = {
7+
"args",
8+
"asctime",
9+
"created",
10+
"exc_info",
11+
"exc_text",
12+
"filename",
13+
"funcName",
14+
"levelname",
15+
"levelno",
16+
"lineno",
17+
"module",
18+
"msecs",
19+
"message",
20+
"msg",
21+
"name",
22+
"pathname",
23+
"process",
24+
"processName",
25+
"relativeCreated",
26+
"stack_info",
27+
"thread",
28+
"threadName",
29+
"taskName",
30+
}
31+
32+
33+
class JSONFormatter(logging.Formatter):
34+
"""This is a custom formatter for saving logging records as a json."""
35+
36+
def __init__(
37+
self,
38+
*,
39+
fmt_keys: dict[str, str] | None = None,
40+
) -> None:
41+
super().__init__()
42+
self.fmt_keys = fmt_keys if fmt_keys is not None else {}
43+
44+
def format(self, record: logging.LogRecord) -> str:
45+
message = self._prepare_log_dict(record)
46+
return json.dumps(message, default=str)
47+
48+
def _prepare_log_dict(self, record: logging.LogRecord) -> dict[str, Any]:
49+
always_fields = {
50+
"message": record.getMessage(),
51+
"timestamp": dt.datetime.fromtimestamp(record.created, tz=dt.timezone.utc).isoformat(),
52+
}
53+
if record.exc_info is not None:
54+
always_fields["exc_info"] = self.formatException(record.exc_info)
55+
56+
if record.stack_info is not None:
57+
always_fields["stack_info"] = self.formatStack(record.stack_info)
58+
59+
message = {
60+
key: msg_val if (msg_val := always_fields.pop(val, None)) is not None else getattr(record, val)
61+
for key, val in self.fmt_keys.items()
62+
}
63+
message.update(always_fields)
64+
65+
extra_fields = {key: val for key, val in record.__dict__.items() if key not in LOG_RECORD_BUILTIN_ATTRS}
66+
message.update(extra_fields)
67+
68+
return message

autointent/_logging/setup.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import importlib.resources as ires
2+
import logging.config
3+
import logging.handlers
4+
from pathlib import Path
5+
6+
import yaml
7+
8+
from autointent.custom_types import LogLevel
9+
10+
11+
def setup_logging(level: LogLevel | str, log_filename: Path | str | None = None) -> None:
12+
"""
13+
Set stdout and file handlers for logging autointent internal actions.
14+
15+
The first parameter affects the logs to the standard output stream. The second parameter is optional.
16+
If it is specified, then the "DEBUG" messages are logged to the file,
17+
regardless of what is specified by the first parameter.
18+
19+
:param level: one of "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
20+
:param log_to_filepath: specify location of logfile, omit extension as suffix ``.log.jsonl`` will be appended.
21+
"""
22+
config_file = ires.files("autointent._logging").joinpath("config.yaml")
23+
with config_file.open() as f_in:
24+
config = yaml.safe_load(f_in)
25+
26+
level = LogLevel(level)
27+
config["handlers"]["stdout"]["level"] = level.value
28+
29+
if log_filename is not None:
30+
config["loggers"]["root"]["handlers"].append("file")
31+
32+
filename = str(log_filename) + ".log.jsonl"
33+
config["handlers"]["file"] = {
34+
"class": "logging.FileHandler",
35+
"level": "DEBUG",
36+
"formatter": "json",
37+
"filename": filename,
38+
}
39+
Path(filename).parent.mkdir(parents=True, exist_ok=True)
40+
41+
logging.config.dictConfig(config)

autointent/context/optimization_info/_logger.py

Lines changed: 0 additions & 40 deletions
This file was deleted.

autointent/context/optimization_info/_optimization_info.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
trials, and modules during the pipeline's execution.
55
"""
66

7+
import logging
78
from dataclasses import dataclass, field
89
from typing import TYPE_CHECKING, Any, Literal
910

@@ -14,7 +15,6 @@
1415
from autointent.custom_types import NodeType
1516

1617
from ._data_models import Artifact, Artifacts, RetrieverArtifact, ScorerArtifact, Trial, Trials, TrialsIds
17-
from ._logger import get_logger
1818

1919
if TYPE_CHECKING:
2020
from autointent.modules.abc import Module
@@ -59,7 +59,7 @@ class OptimizationInfo:
5959

6060
def __init__(self) -> None:
6161
"""Initialize optimization info."""
62-
self._logger = get_logger()
62+
self._logger = logging.getLogger(__name__)
6363

6464
self.artifacts = Artifacts()
6565
self.trials = Trials()
@@ -98,7 +98,7 @@ def log_module_optimization(
9898
module_dump_dir=module_dump_dir,
9999
)
100100
self.trials.add_trial(node_type, trial)
101-
self._logger.info(trial.model_dump())
101+
self._logger.debug("module %s fitted and saved to optimization info", module_name, extra=trial.model_dump())
102102

103103
if module:
104104
self.modules.add_module(node_type, module)

user_guides/advanced/05_logging.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# %% [markdown]
2+
"""
3+
# Logging to stdout and file
4+
5+
This guide will teach you how to configure logging in AutoIntent. By default, it is fully disabled.
6+
7+
It will be demonstrated on toy search_space example:
8+
"""
9+
10+
# %%
11+
from pathlib import Path
12+
13+
from autointent import Dataset, Pipeline
14+
from autointent.configs import LoggingConfig
15+
16+
search_space = [
17+
{
18+
"node_type": "scoring",
19+
"metric": "scoring_roc_auc",
20+
"search_space": [
21+
{
22+
"module_name": "knn",
23+
"k": [1],
24+
"weights": ["uniform"],
25+
"embedder_name": ["avsolatorio/GIST-small-Embedding-v0"],
26+
},
27+
],
28+
},
29+
{
30+
"node_type": "decision",
31+
"metric": "decision_accuracy",
32+
"search_space": [
33+
{"module_name": "threshold", "thresh": [0.5]},
34+
{"module_name": "argmax"},
35+
],
36+
},
37+
]
38+
39+
log_config = LoggingConfig(dirpath=Path("logging_tutorial"))
40+
pipeline_optimizer = Pipeline.from_search_space(search_space)
41+
pipeline_optimizer.set_config(log_config)
42+
43+
dataset = Dataset.from_hub("AutoIntent/clinc150_subset")
44+
45+
# %% [markdown]
46+
"""
47+
## Fully Custom Logging
48+
49+
One can fully customize logging via python's standard module [`logging`](https://docs.python.org/3/library/logging.html). Everything you need to do is configure it before AutoIntent execution:
50+
"""
51+
# %%
52+
import logging
53+
54+
logging.basicConfig(level="INFO")
55+
pipeline_optimizer.fit(dataset)
56+
57+
# %% [markdown]
58+
"""
59+
See external tutorials and guides about `logging` module.
60+
"""
61+
62+
# %% [markdown]
63+
"""
64+
## Export from AutoIntent
65+
66+
If you don't have to customize logging, you can export our configuration. Everything you need to do is setup it before AutoIntent execution:
67+
"""
68+
69+
# %%
70+
from autointent import setup_logging
71+
72+
setup_logging("INFO", log_filename="tests/logs/my_exp")
73+
# %%
74+
"""
75+
The first parameter affects the logs to the standard output stream. The second parameter is optional. If it is specified, then the "DEBUG" messages are logged to the file, regardless of what is specified by the first parameter.
76+
"""

0 commit comments

Comments
 (0)