Skip to content

Commit 320bd65

Browse files
authored
fix fillna bug (#1914)
* fix fillna bug * fix flake8 error * fix pylint error * update ubuntu version for action * fix pytest error * fix pylint error * fix black error * fix pylint error * add Fillna test * fix black error * add instruments * remove code
1 parent e7a1b5e commit 320bd65

File tree

7 files changed

+42
-28
lines changed

7 files changed

+42
-28
lines changed

.github/workflows/test_qlib_from_pip.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
runs-on: ${{ matrix.os }}
1414
strategy:
1515
matrix:
16-
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-13, macos-14, macos-15]
16+
os: [windows-latest, ubuntu-24.04, ubuntu-22.04, macos-13, macos-14, macos-15]
1717
# In github action, using python 3.7, pip install will not match the latest version of the package.
1818
# Also, python 3.7 is no longer supported from macos-14, and will be phased out from macos-13 in the near future.
1919
# All things considered, we have removed python 3.7.

.github/workflows/test_qlib_from_source.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
runs-on: ${{ matrix.os }}
1515
strategy:
1616
matrix:
17-
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-13, macos-14, macos-15]
17+
os: [windows-latest, ubuntu-24.04, ubuntu-22.04, macos-13, macos-14, macos-15]
1818
# In github action, using python 3.7, pip install will not match the latest version of the package.
1919
# Also, python 3.7 is no longer supported from macos-14, and will be phased out from macos-13 in the near future.
2020
# All things considered, we have removed python 3.7.
@@ -39,7 +39,7 @@ jobs:
3939
python -m pip install torch torchvision torchaudio
4040
4141
- name: Installing pytorch for ubuntu
42-
if: ${{ matrix.os == 'ubuntu-20.04' || matrix.os == 'ubuntu-22.04' }}
42+
if: ${{ matrix.os == 'ubuntu-24.04' || matrix.os == 'ubuntu-22.04' }}
4343
run: |
4444
python -m pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
4545

.github/workflows/test_qlib_from_source_slow.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
runs-on: ${{ matrix.os }}
1515
strategy:
1616
matrix:
17-
os: [windows-latest, ubuntu-20.04, ubuntu-22.04, macos-13, macos-14, macos-15]
17+
os: [windows-latest, ubuntu-24.04, ubuntu-22.04, macos-13, macos-14, macos-15]
1818
# In github action, using python 3.7, pip install will not match the latest version of the package.
1919
# Also, python 3.7 is no longer supported from macos-14, and will be phased out from macos-13 in the near future.
2020
# All things considered, we have removed python 3.7.

pyproject.toml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,6 @@ description = "A Quantitative-research Platform"
2323
requires-python = ">=3.8.0"
2424
readme = {file = "README.md", content-type = "text/markdown"}
2525

26-
# On 2025-04-02 osqp released version 1.0.2, osqp is used as a dependency for cvxpy.
27-
# It would lead to errors installing qlib, so we limited the version of osqp.
28-
# refs: https://github.com/osqp/osqp/issues/728
2926
dependencies = [
3027
"pyyaml",
3128
"numpy",
@@ -42,7 +39,6 @@ dependencies = [
4239
"loguru",
4340
"lightgbm",
4441
"gym",
45-
"osqp<1.0.2",
4642
"cvxpy",
4743
"joblib",
4844
"matplotlib",

qlib/contrib/model/pytorch_nn.py

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -146,19 +146,34 @@ def __init__(
146146
raise NotImplementedError("optimizer {} is not supported!".format(optimizer))
147147

148148
if scheduler == "default":
149-
# Reduce learning rate when loss has stopped decrease
150-
self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
151-
self.train_optimizer,
152-
mode="min",
153-
factor=0.5,
154-
patience=10,
155-
verbose=True,
156-
threshold=0.0001,
157-
threshold_mode="rel",
158-
cooldown=0,
159-
min_lr=0.00001,
160-
eps=1e-08,
161-
)
149+
# In torch version 2.7.0, the verbose parameter has been removed. Reference Link:
150+
# https://github.com/pytorch/pytorch/pull/147301/files#diff-036a7470d5307f13c9a6a51c3a65dd014f00ca02f476c545488cd856bea9bcf2L1313
151+
if str(torch.__version__).split("+", maxsplit=1)[0] <= "2.6.0":
152+
# Reduce learning rate when loss has stopped decrease
153+
self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( # pylint: disable=E1123
154+
self.train_optimizer,
155+
mode="min",
156+
factor=0.5,
157+
patience=10,
158+
verbose=True,
159+
threshold=0.0001,
160+
threshold_mode="rel",
161+
cooldown=0,
162+
min_lr=0.00001,
163+
eps=1e-08,
164+
)
165+
else:
166+
self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
167+
self.train_optimizer,
168+
mode="min",
169+
factor=0.5,
170+
patience=10,
171+
threshold=0.0001,
172+
threshold_mode="rel",
173+
cooldown=0,
174+
min_lr=0.00001,
175+
eps=1e-08,
176+
)
162177
elif scheduler is None:
163178
self.scheduler = None
164179
else:

qlib/data/dataset/processor.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -187,14 +187,9 @@ def __call__(self, df):
187187
if self.fields_group is None:
188188
df.fillna(self.fill_value, inplace=True)
189189
else:
190-
cols = get_group_columns(df, self.fields_group)
191190
# this implementation is extremely slow
192191
# df.fillna({col: self.fill_value for col in cols}, inplace=True)
193-
194-
# So we use numpy to accelerate filling values
195-
nan_select = np.isnan(df.values)
196-
nan_select[:, ~df.columns.isin(cols)] = False
197-
df.values[nan_select] = self.fill_value
192+
df[self.fields_group] = df[self.fields_group].fillna(self.fill_value)
198193
return df
199194

200195

tests/data_mid_layer_tests/test_dataloader.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from qlib.data.dataset.loader import NestedDataLoader, QlibDataLoader
1111
from qlib.data.dataset.handler import DataHandlerLP
1212
from qlib.contrib.data.loader import Alpha158DL, Alpha360DL
13+
from qlib.data.dataset.processor import Fillna
1314
from qlib.data import D
1415

1516

@@ -30,7 +31,7 @@ def test_nested_data_loader(self):
3031
)
3132
# Of course you can use StaticDataLoader
3233

33-
dataset = nd.load(start_time="2020-01-01", end_time="2020-01-31")
34+
dataset = nd.load(instruments="csi300", start_time="2020-01-01", end_time="2020-01-31")
3435

3536
assert dataset is not None
3637

@@ -45,6 +46,13 @@ def test_nested_data_loader(self):
4546

4647
assert "LABEL0" in columns_list
4748

49+
assert dataset.isna().any().any()
50+
51+
fn = Fillna(fields_group="feature", fill_value=0)
52+
fn_dataset = fn.__call__(dataset)
53+
54+
assert not fn_dataset.isna().any().any()
55+
4856
# Then you can use it wth DataHandler;
4957
# NOTE: please note that the data processors are missing!!! You should add based on your requirements
5058

0 commit comments

Comments
 (0)