Skip to content

Commit 78b77e3

Browse files
authored
feat: use pydantic-settings for MLflow config and update dependencies (#1962)
* feat: use pydantic-settings for MLflow config and update dependencies * docs
1 parent 38f02d2 commit 78b77e3

File tree

3 files changed

+51
-2
lines changed

3 files changed

+51
-2
lines changed

pyproject.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ dependencies = [
2727
"pyyaml",
2828
"numpy",
2929
"pandas>=0.24",
30+
# I encoutered an Error that the set_uri does not work when downloading artifacts in mlflow 3.1.1;
31+
# But earlier versions of mlflow does not have this problem.
32+
# But when I switch to 2.*.* version, another error occurs, which is even more strange...
3033
"mlflow",
3134
"filelock>=3.16.0",
3235
"redis",
@@ -45,6 +48,7 @@ dependencies = [
4548
"jupyter",
4649
"nbconvert",
4750
"pyarrow",
51+
"pydantic-settings",
4852
]
4953

5054
[project.optional-dependencies]
@@ -90,6 +94,7 @@ test = [
9094
]
9195
analysis = [
9296
"plotly",
97+
"statsmodels",
9398
]
9499

95100
[tool.setuptools]

qlib/config.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,37 @@
2727
if TYPE_CHECKING:
2828
from qlib.utils.time import Freq
2929

30+
from pydantic_settings import BaseSettings, SettingsConfigDict
31+
32+
33+
class MLflowSettings(BaseSettings):
34+
uri: str = "file:" + str(Path(os.getcwd()).resolve() / "mlruns")
35+
default_exp_name: str = "Experiment"
36+
37+
38+
class QSettings(BaseSettings):
39+
"""
40+
Qlib's settings.
41+
It tries to provide a default settings for most of Qlib's components.
42+
But it would be a long journey to provide a comprehensive settings for all of Qlib's components.
43+
44+
Here is some design guidelines:
45+
- The priority of settings is
46+
- Actively passed-in settings, like `qlib.init(provider_uri=...)`
47+
- The default settings
48+
- QSettings tries to provide default settings for most of Qlib's components.
49+
"""
50+
51+
mlflow: MLflowSettings = MLflowSettings()
52+
53+
model_config = SettingsConfigDict(
54+
env_prefix="QLIB_",
55+
env_nested_delimiter="_",
56+
)
57+
58+
59+
QSETTINGS = QSettings()
60+
3061

3162
class Config:
3263
def __init__(self, default_conf):
@@ -187,8 +218,8 @@ def register_from_C(config, skip_register=True):
187218
"class": "MLflowExpManager",
188219
"module_path": "qlib.workflow.expm",
189220
"kwargs": {
190-
"uri": "file:" + str(Path(os.getcwd()).resolve() / "mlruns"),
191-
"default_exp_name": "Experiment",
221+
"uri": QSETTINGS.mlflow.uri,
222+
"default_exp_name": QSETTINGS.mlflow.default_exp_name,
192223
},
193224
},
194225
"pit_record_type": {

qlib/workflow/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,18 @@
11
# Copyright (c) Microsoft Corporation.
22
# Licensed under the MIT License.
3+
"""
4+
Motivation of this design (instead of using mlflow directly):
5+
- Better design than mlflow native design
6+
- we have record object with a lot of methods(more intuitive), instead of use run_id everytime in mlflow
7+
- So the recorder's interfaces like log, start, will be more intuitive.
8+
- Provide richer and tailerd features than mlflow native
9+
- Logging code diff at the start of run.
10+
- log_object and load_object to for Python object directly instead log_artifact and download_artifact
11+
- (weak) Allow diverse backend support
12+
13+
To be honest, design always add burdens. For example,
14+
- You need to create an experiment before you can get a recorder. (In MLflow, experiments are more like tags, and you often just use a run_id in many interfaces without first defining an experiment.)
15+
"""
316

417
from contextlib import contextmanager
518
from typing import Text, Optional, Any, Dict

0 commit comments

Comments
 (0)