fix fillna bug (#1914)

SunsetWolf · web-flow · commit 320bd65e1999 · 2025-04-25T11:18:09.000+08:00
* fix fillna bug

* fix flake8 error

* fix pylint error

* update ubuntu version for action

* fix pytest error

* fix pylint error

* fix black error

* fix pylint error

* add Fillna test

* fix black error

* add  instruments

* remove code
diff --git a/.github/workflows/test_qlib_from_pip.yml b/.github/workflows/test_qlib_from_pip.yml
@@ -13,7 +13,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-13, macos-14, macos-15]
+        os: [windows-latest, ubuntu-24.04, ubuntu-22.04, macos-13, macos-14, macos-15]
         # In github action, using python 3.7, pip install will not match the latest version of the package.
         # Also, python 3.7 is no longer supported from macos-14, and will be phased out from macos-13 in the near future.
         # All things considered, we have removed python 3.7.
diff --git a/.github/workflows/test_qlib_from_source.yml b/.github/workflows/test_qlib_from_source.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-13, macos-14, macos-15]
+        os: [windows-latest, ubuntu-24.04, ubuntu-22.04, macos-13, macos-14, macos-15]
         # In github action, using python 3.7, pip install will not match the latest version of the package.
         # Also, python 3.7 is no longer supported from macos-14, and will be phased out from macos-13 in the near future.
         # All things considered, we have removed python 3.7.
@@ -39,7 +39,7 @@ jobs:
         python -m pip install torch torchvision torchaudio
 
     - name: Installing pytorch for ubuntu
-      if: ${{ matrix.os == 'ubuntu-20.04' || matrix.os == 'ubuntu-22.04' }}
+      if: ${{ matrix.os == 'ubuntu-24.04' || matrix.os == 'ubuntu-22.04' }}
       run: |
         python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
 
diff --git a/.github/workflows/test_qlib_from_source_slow.yml b/.github/workflows/test_qlib_from_source_slow.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-13, macos-14, macos-15]
+        os: [windows-latest, ubuntu-24.04, ubuntu-22.04, macos-13, macos-14, macos-15]
         # In github action, using python 3.7, pip install will not match the latest version of the package.
         # Also, python 3.7 is no longer supported from macos-14, and will be phased out from macos-13 in the near future.
         # All things considered, we have removed python 3.7.
diff --git a/pyproject.toml b/pyproject.toml
@@ -23,9 +23,6 @@ description = "A Quantitative-research Platform"
 requires-python = ">=3.8.0"
 readme = {file = "README.md", content-type = "text/markdown"}
 
-# On 2025-04-02 osqp released version 1.0.2, osqp is used as a dependency for cvxpy. 
-# It would lead to errors installing qlib, so we limited the version of osqp.
-# refs: https://github.com/osqp/osqp/issues/728
 dependencies = [
   "pyyaml",
   "numpy",
@@ -42,7 +39,6 @@ dependencies = [
   "loguru",
   "lightgbm",
   "gym",
-  "osqp<1.0.2",
   "cvxpy",
   "joblib",
   "matplotlib",
diff --git a/qlib/contrib/model/pytorch_nn.py b/qlib/contrib/model/pytorch_nn.py
@@ -146,19 +146,34 @@ def __init__(
             raise NotImplementedError("optimizer {} is not supported!".format(optimizer))
 
         if scheduler == "default":
-            # Reduce learning rate when loss has stopped decrease
-            self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
-                self.train_optimizer,
-                mode="min",
-                factor=0.5,
-                patience=10,
-                verbose=True,
-                threshold=0.0001,
-                threshold_mode="rel",
-                cooldown=0,
-                min_lr=0.00001,
-                eps=1e-08,
-            )
+            # In torch version 2.7.0, the verbose parameter has been removed. Reference Link:
+            # https://github.com/pytorch/pytorch/pull/147301/files#diff-036a7470d5307f13c9a6a51c3a65dd014f00ca02f476c545488cd856bea9bcf2L1313
+            if str(torch.__version__).split("+", maxsplit=1)[0] <= "2.6.0":
+                # Reduce learning rate when loss has stopped decrease
+                self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(  # pylint: disable=E1123
+                    self.train_optimizer,
+                    mode="min",
+                    factor=0.5,
+                    patience=10,
+                    verbose=True,
+                    threshold=0.0001,
+                    threshold_mode="rel",
+                    cooldown=0,
+                    min_lr=0.00001,
+                    eps=1e-08,
+                )
+            else:
+                self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
+                    self.train_optimizer,
+                    mode="min",
+                    factor=0.5,
+                    patience=10,
+                    threshold=0.0001,
+                    threshold_mode="rel",
+                    cooldown=0,
+                    min_lr=0.00001,
+                    eps=1e-08,
+                )
         elif scheduler is None:
             self.scheduler = None
         else:
diff --git a/qlib/data/dataset/processor.py b/qlib/data/dataset/processor.py
@@ -187,14 +187,9 @@ def __call__(self, df):
         if self.fields_group is None:
             df.fillna(self.fill_value, inplace=True)
         else:
-            cols = get_group_columns(df, self.fields_group)
             # this implementation is extremely slow
             # df.fillna({col: self.fill_value for col in cols}, inplace=True)
-
-            # So we use numpy to accelerate filling values
-            nan_select = np.isnan(df.values)
-            nan_select[:, ~df.columns.isin(cols)] = False
-            df.values[nan_select] = self.fill_value
+            df[self.fields_group] = df[self.fields_group].fillna(self.fill_value)
         return df
 
 
diff --git a/tests/data_mid_layer_tests/test_dataloader.py b/tests/data_mid_layer_tests/test_dataloader.py
@@ -10,6 +10,7 @@
 from qlib.data.dataset.loader import NestedDataLoader, QlibDataLoader
 from qlib.data.dataset.handler import DataHandlerLP
 from qlib.contrib.data.loader import Alpha158DL, Alpha360DL
+from qlib.data.dataset.processor import Fillna
 from qlib.data import D
 
 
@@ -30,7 +31,7 @@ def test_nested_data_loader(self):
         )
         # Of course you can use StaticDataLoader
 
-        dataset = nd.load(start_time="2020-01-01", end_time="2020-01-31")
+        dataset = nd.load(instruments="csi300", start_time="2020-01-01", end_time="2020-01-31")
 
         assert dataset is not None
 
@@ -45,6 +46,13 @@ def test_nested_data_loader(self):
 
         assert "LABEL0" in columns_list
 
+        assert dataset.isna().any().any()
+
+        fn = Fillna(fields_group="feature", fill_value=0)
+        fn_dataset = fn.__call__(dataset)
+
+        assert not fn_dataset.isna().any().any()
+
         # Then you can use it wth DataHandler;
         # NOTE: please note that the data processors are missing!!!  You should add based on your requirements