microsoft · SunsetWolf · Dec 17, 2024 · Dec 11, 2024 · Dec 11, 2024 · Dec 11, 2024
diff --git a/.github/workflows/test_qlib_from_source.yml b/.github/workflows/test_qlib_from_source.yml
@@ -21,7 +21,7 @@ jobs:
         # so we limit the macos version to macos-13.
         os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-13]
         # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
-        python-version: ["3.8", "3.10"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
 
     steps:
     - name: Test qlib from source
@@ -63,7 +63,6 @@ jobs:
 
     - name: Set up Python tools
       run: |
-        python -m pip install pytest-timeout
         make dev
 
     - name: Lint with Black
@@ -108,7 +107,7 @@ jobs:
     # We use sys.setrecursionlimit(2000) to make the recursion depth larger to ensure that pylint works properly (the default recursion depth is 1000).
     - name: Check Qlib with pylint
       run: |
-        pylint --disable=C0104,C0114,C0115,C0116,C0301,C0302,C0411,C0413,C1802,R0401,R0801,R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,R1720,W0105,W0123,W0201,W0511,W0613,W1113,W1514,E0401,E1121,C0103,C0209,R0402,R1705,R1710,R1725,R1735,W0102,W0212,W0221,W0223,W0231,W0237,W0612,W0621,W0622,W0703,W1309,E1102,E1136 --const-rgx='[a-z_][a-z0-9_]{2,30}$' qlib --init-hook "import astroid; astroid.context.InferenceContext.max_inferred = 500; import sys; sys.setrecursionlimit(2000)"
+        pylint --disable=C0104,C0114,C0115,C0116,C0301,C0302,C0411,C0413,C1802,R0401,R0801,R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,R1720,W0105,W0123,W0201,W0511,W0613,W1113,W4904,W1514,E0401,E1121,C0103,C0209,R0402,R1705,R1710,R1725,R1735,W0102,W0212,W0221,W0223,W0231,W0237,W0612,W0621,W0622,W0703,W1309,E1102,E1136 --const-rgx='[a-z_][a-z0-9_]{2,30}$' qlib --init-hook "import astroid; astroid.context.InferenceContext.max_inferred = 500; import sys; sys.setrecursionlimit(2000)"
         pylint --disable=C0104,C0114,C0115,C0116,C0301,C0302,C0411,C0413,C1802,R0401,R0801,R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,R1720,W0105,W0123,W0201,W0511,W0613,W1113,W1514,E0401,E1121,E1123,C0103,C0209,R0402,R1705,R1710,R1725,R1735,W0102,W0212,W0221,W0223,W0231,W0237,W0246,W0612,W0621,W0622,W0703,W1309,E1102,E1136 --const-rgx='[a-z_][a-z0-9_]{2,30}$' scripts --init-hook "import astroid; astroid.context.InferenceContext.max_inferred = 500; import sys; sys.setrecursionlimit(2000)"
 
     # The following flake8 error codes were ignored:
@@ -179,4 +178,4 @@ jobs:
         max_attempts: 3
         command: |
           cd tests
-          python -m pytest . -m "not slow" --durations=0 --timeout=600
+          python -m pytest . -m "not slow" --durations=0
diff --git a/.github/workflows/test_qlib_from_source_slow.yml b/.github/workflows/test_qlib_from_source_slow.yml
@@ -21,7 +21,7 @@ jobs:
         # so we limit the macos version to macos-13.
         os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-13]
         # not supporting 3.6 due to annotations is not supported https://stackoverflow.com/a/52890129
-        python-version: [3.7, 3.8]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
 
     steps:
     - name: Test qlib from source slow
@@ -44,7 +44,6 @@ jobs:
 
     - name: Set up Python tools
       run: |
-        python -m pip install --upgrade pip
         make dev
 
     - name: Downloads dependencies data

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -3,3 +3,4 @@ include qlib/*
 include qlib/*/*
 include qlib/*/*/*
 include qlib/*/*/*/*
+include qlib/*/*/*/*/*
diff --git a/Makefile b/Makefile
@@ -19,8 +19,10 @@ SO_FILES := $(wildcard $(SO_DIR)/*.so)
 clean:
 	-rm -rf \
 		$(PUBLIC_DIR) \
-		qlib/data/_libs \
+		qlib/data/_libs/*.cpp \
+		qlib/data/_libs/*.so \
 		mlruns \
+		public \
 		build \
 		.coverage \
 		.mypy_cache \
@@ -43,12 +45,17 @@ deepclean: clean
 	if command -v pipenv >/dev/null 2>&1 && pipenv --venv >/dev/null 2>&1; then pipenv --rm; fi
 
 # Prerequisite section
+# What this code does is compile two Cython modules, rolling and expanding, using setuptools and Cython,
+# and builds them as binary expansion modules that can be imported directly into Python.
+# Since pyproject.toml can't do that, we compile it here.
 prerequisite:
 	@if [ -n "$(SO_FILES)" ]; then \
 		echo "Shared library files exist, skipping build."; \
 	else \
 		echo "No shared library files found, building..."; \
-		python -m pip install cython numpy; \
+		pip install --upgrade setuptools wheel; \
+		python -m pip install cython; \
+		python -m pip install "numpy<2.0.0"; \
 		python -c "from setuptools import setup, Extension; from Cython.Build import cythonize; import numpy; extensions = [Extension('qlib.data._libs.rolling', ['qlib/data/_libs/rolling.pyx'], language='c++', include_dirs=[numpy.get_include()]), Extension('qlib.data._libs.expanding', ['qlib/data/_libs/expanding.pyx'], language='c++', include_dirs=[numpy.get_include()])]; setup(ext_modules=cythonize(extensions, language_level='3'), script_args=['build_ext', '--inplace'])"; \
 	fi
 
@@ -74,8 +81,14 @@ docs:
 package:
 	python -m pip install -e .[package]
 
+test:
+	python -m pip install -e .[test]
+
+analysis:
+	python -m pip install -e .[analysis]
+
 all:
-	python -m pip install -e .[rl,dev,lint,docs,package]
+	python -m pip install -e .[rl,dev,lint,docs,package,test,analysis]
 
 install: prerequisite dependencies
 
@@ -91,7 +104,7 @@ black:
 
 # Check code folder with pylint.
 pylint:
-	pylint --disable=C0104,C0114,C0115,C0116,C0301,C0302,C0411,C0413,C1802,R0401,R0801,R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,R1720,W0105,W0123,W0201,W0511,W0613,W1113,W1514,E0401,E1121,C0103,C0209,R0402,R1705,R1710,R1725,R1730,R1735,W0102,W0212,W0221,W0223,W0231,W0237,W0612,W0621,W0622,W0703,W1309,E1102,E1136 --const-rgx='[a-z_][a-z0-9_]{2,30}' qlib --init-hook="import astroid; astroid.context.InferenceContext.max_inferred = 500; import sys; sys.setrecursionlimit(2000)"
+	pylint --disable=C0104,C0114,C0115,C0116,C0301,C0302,C0411,C0413,C1802,R0401,R0801,R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,R1720,W0105,W0123,W0201,W0511,W0613,W1113,W1514,W4904,E0401,E1121,C0103,C0209,R0402,R1705,R1710,R1725,R1730,R1735,W0102,W0212,W0221,W0223,W0231,W0237,W0612,W0621,W0622,W0703,W1309,E1102,E1136 --const-rgx='[a-z_][a-z0-9_]{2,30}' qlib --init-hook="import astroid; astroid.context.InferenceContext.max_inferred = 500; import sys; sys.setrecursionlimit(2000)"
 	pylint --disable=C0104,C0114,C0115,C0116,C0301,C0302,C0411,C0413,C1802,R0401,R0801,R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,R1720,W0105,W0123,W0201,W0511,W0613,W1113,W1514,E0401,E1121,E1123,C0103,C0209,R0402,R1705,R1710,R1725,R1735,W0102,W0212,W0221,W0223,W0231,W0237,W0246,W0612,W0621,W0622,W0703,W1309,E1102,E1136 --const-rgx='[a-z_][a-z0-9_]{2,30}' scripts --init-hook="import astroid; astroid.context.InferenceContext.max_inferred = 500; import sys; sys.setrecursionlimit(2000)"
 
 # Check code with flake8.

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools", "cython", "numpy"]
+requires = ["setuptools", "cython", "numpy<2.0.0"]
 build-backend = "setuptools.build_meta"
 
 [project]
@@ -11,27 +11,27 @@ classifiers = [
   "Development Status :: 3 - Alpha",
   "Programming Language :: Python",
   "Programming Language :: Python :: 3",
-  "Programming Language :: Python :: 3.7",
   "Programming Language :: Python :: 3.8",
   "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
 ]
 name = "pyqlib"
 dynamic = ["version"]
 description = "A Quantitative-research Platform"
-requires-python = ">=3.7.0"
+requires-python = ">=3.8.0"
 
 dependencies = [
   "pyyaml",
   "numpy",
   "pandas",
-  "mlflow",
+  "mlflow>=2.0.0",
   "filelock",
   "redis",
   "dill",
   "fire",
-  "ruamel.yaml",
+  "ruamel.yaml>=0.17.38",
   "python-redis-lock",
   "tqdm",
   "pymongo",
@@ -41,15 +41,13 @@ dependencies = [
   "cvxpy",
   "joblib",
   "matplotlib",
+  "jupyter",
+  "nbconvert",
 ]
 
 [project.optional-dependencies]
 dev = [
   "pytest",
-  "baostock",
-  "tianshou",
-  "yahooquery",
-  "plotly",
   "statsmodels",
 ]
 rl = [
@@ -62,8 +60,6 @@ lint = [
   "mypy<1.5.0",
   "flake8",
   "nbqa",
-  "jupyter",
-  "nbconvert",
 ]
 docs = [
   "sphinx",
@@ -74,6 +70,14 @@ package = [
   "twine",
   "build",
 ]
+# test_pit dependency packages
+test = [
+  "yahooquery",
+  "baostock",
+]
+analysis = [
+  "plotly",
+]
 
 [tool.setuptools]
 packages = [

diff --git a/qlib/contrib/model/pytorch_general_nn.py b/qlib/contrib/model/pytorch_general_nn.py
@@ -233,7 +233,15 @@ def fit(
         evals_result=dict(),
         save_path=None,
         reweighter=None,
+        batch_size=None,
+        n_jobs=None,
     ):
+        if batch_size is None:
+            batch_size = self.batch_size
+
+        if n_jobs is None:
+            n_jobs = self.n_jobs
+
         ists = isinstance(dataset, TSDatasetH)  # is this time series dataset
 
         dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
@@ -261,16 +269,16 @@ def fit(
 
         train_loader = DataLoader(
             ConcatDataset(dl_train, wl_train),
-            batch_size=self.batch_size,
+            batch_size=batch_size,
             shuffle=True,
-            num_workers=self.n_jobs,
+            num_workers=n_jobs,
             drop_last=True,
         )
         valid_loader = DataLoader(
             ConcatDataset(dl_valid, wl_valid),
-            batch_size=self.batch_size,
+            batch_size=batch_size,
             shuffle=False,
-            num_workers=self.n_jobs,
+            num_workers=n_jobs,
             drop_last=True,
         )
         del dl_train, dl_valid, wl_train, wl_valid
@@ -319,7 +327,18 @@ def fit(
         if self.use_gpu:
             torch.cuda.empty_cache()
 
-    def predict(self, dataset: Union[DatasetH, TSDatasetH]):
+    def predict(
+        self,
+        dataset: Union[DatasetH, TSDatasetH],
+        batch_size=None,
+        n_jobs=None,
+    ):
+        if batch_size is None:
+            batch_size = self.batch_size
+
+        if n_jobs is None:
+            n_jobs = self.n_jobs
+
         if not self.fitted:
             raise ValueError("model is not fitted yet!")
 
@@ -333,7 +352,7 @@ def predict(self, dataset: Union[DatasetH, TSDatasetH]):
             index = dl_test.index
             dl_test = dl_test.values
 
-        test_loader = DataLoader(dl_test, batch_size=self.batch_size, num_workers=self.n_jobs)
+        test_loader = DataLoader(dl_test, batch_size=batch_size, num_workers=n_jobs)
         self.dnn_model.eval()
         preds = []
 

diff --git a/qlib/workflow/expm.py b/qlib/workflow/expm.py
@@ -8,6 +8,7 @@
 from mlflow.entities import ViewType
 import os
 from typing import Optional, Text
+from pathlib import Path
 
 from .exp import MLflowExperiment, Experiment
 from ..config import C
@@ -233,7 +234,7 @@ def _get_or_create_exp(self, experiment_id=None, experiment_name=None) -> (objec
             # So we supported it in the interface wrapper
             pr = urlparse(self.uri)
             if pr.scheme == "file":
-                with FileLock(os.path.join(pr.netloc, pr.path, "filelock")):  # pylint: disable=E0110
+                with FileLock(Path(os.path.join(pr.netloc, pr.path.lstrip("/"), "filelock"))):  # pylint: disable=E0110
                     return self.create_exp(experiment_name), True
             # NOTE: for other schemes like http, we double check to avoid create exp conflicts
             try:
@@ -421,7 +422,11 @@ def delete_exp(self, experiment_id=None, experiment_name=None):
 
     def list_experiments(self):
         # retrieve all the existing experiments
-        exps = self.client.search_experiments(view_type=ViewType.ACTIVE_ONLY)
+        mlflow_version = int(mlflow.__version__.split(".", maxsplit=1)[0])
+        if mlflow_version >= 2:
+            exps = self.client.search_experiments(view_type=ViewType.ACTIVE_ONLY)
+        else:
+            exps = self.client.list_experiments(view_type=ViewType.ACTIVE_ONLY)  # pylint: disable=E1101
         experiments = dict()
         for exp in exps:
             experiment = MLflowExperiment(exp.experiment_id, exp.name, self.uri)

diff --git a/qlib/workflow/recorder.py b/qlib/workflow/recorder.py
@@ -9,6 +9,7 @@
 import pickle
 import tempfile
 import subprocess
+import platform
 from pathlib import Path
 from datetime import datetime
 
@@ -316,7 +317,10 @@ def get_local_dir(self):
         This function will return the directory path of this recorder.
         """
         if self.artifact_uri is not None:
-            local_dir_path = Path(self.artifact_uri.lstrip("file:")) / ".."
+            if platform.system() == "Windows":
+                local_dir_path = Path(self.artifact_uri.lstrip("file:").lstrip("/")).parent
+            else:
+                local_dir_path = Path(self.artifact_uri.lstrip("file:")).parent
             local_dir_path = str(local_dir_path.resolve())
             if os.path.isdir(local_dir_path):
                 return local_dir_path

diff --git a/tests/data_mid_layer_tests/test_dataloader.py b/tests/data_mid_layer_tests/test_dataloader.py
@@ -11,12 +11,15 @@
 from qlib.data.dataset.handler import DataHandlerLP
 from qlib.contrib.data.loader import Alpha158DL, Alpha360DL
 from qlib.data import D
+import logging
+
+logging.basicConfig(level=logging.INFO)
 
 
 class TestDataLoader(unittest.TestCase):
 
     def test_nested_data_loader(self):
-        qlib.init()
+        qlib.init(kernels=1)
         nd = NestedDataLoader(
             dataloader_l=[
                 {
@@ -30,7 +33,7 @@ def test_nested_data_loader(self):
         )
         # Of course you can use StaticDataLoader
 
-        dataset = nd.load()
+        dataset = nd.load(start_time="2020-01-01", end_time="2020-01-31")
 
         assert dataset is not None
 

diff --git a/tests/dependency_tests/test_mlflow.py b/tests/dependency_tests/test_mlflow.py
@@ -1,6 +1,7 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 import unittest
+import platform
 import mlflow
 import time
 from pathlib import Path
@@ -26,7 +27,10 @@ def test_creating_client(self):
             _ = mlflow.tracking.MlflowClient(tracking_uri=str(self.TMP_PATH))
         end = time.time()
         elapsed = end - start
-        self.assertLess(elapsed, 1e-2)  # it can be done in less than 10ms
+        if platform.system() == "Linux":
+            self.assertLess(elapsed, 1e-2)  # it can be done in less than 10ms
+        else:
+            self.assertLess(elapsed, 2e-2)
         print(elapsed)
 
 

diff --git a/tests/model/test_general_nn.py b/tests/model/test_general_nn.py
@@ -68,8 +68,8 @@ def test_both_dataset(self):
         ]
 
         for ds, model in list(zip((tsds, tbds), model_l)):
-            model.fit(ds)  # It works
-            model.predict(ds)  # It works
+            model.fit(ds, batch_size=32, n_jobs=0)  # It works
+            model.predict(ds, batch_size=32, n_jobs=0)  # It works
 
 
 if __name__ == "__main__":

diff --git a/tests/test_pit.py b/tests/test_pit.py
@@ -8,7 +8,6 @@
 import unittest
 import pytest
 import pandas as pd
-import baostock as bs
 from pathlib import Path
 
 from qlib.data import D

diff --git a/tests/test_workflow.py b/tests/test_workflow.py
@@ -20,11 +20,11 @@ def tearDown(self) -> None:
     def test_get_local_dir(self):
         """ """
         self.TMP_PATH.mkdir(parents=True, exist_ok=True)
-
-        with R.start(uri=str(self.TMP_PATH)):
+        uri = str(self.TMP_PATH.resolve().as_uri()).replace("\\", "/")
+        with R.start(uri=uri):
             pass
 
-        with R.uri_context(uri=str(self.TMP_PATH)):
+        with R.uri_context(uri=uri):
             resume_recorder = R.get_recorder()
             resume_recorder.get_local_dir()