Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
139 commits
Select commit Hold shift + click to select a range
32fe269
add finiteness_checker pybind11 bindings
icfaust Oct 23, 2024
cdbf1b5
added finiteness checker
icfaust Oct 23, 2024
62674a2
Update finiteness_checker.cpp
icfaust Oct 23, 2024
c75c23b
Update finiteness_checker.cpp
icfaust Oct 23, 2024
6a20938
Update finiteness_checker.cpp
icfaust Oct 23, 2024
382d7a1
Update finiteness_checker.cpp
icfaust Oct 23, 2024
c8ffd9c
Update finiteness_checker.cpp
icfaust Oct 23, 2024
9aa13d5
Update finiteness_checker.cpp
icfaust Oct 23, 2024
84e15d5
Rename finiteness_checker.cpp to finiteness_checker.cpp
icfaust Oct 23, 2024
63073c6
Update finiteness_checker.cpp
icfaust Oct 24, 2024
d915da5
Merge branch 'intel:main' into dev/new_assert_all_fininte
icfaust Oct 28, 2024
3dddf2d
add next step
icfaust Oct 31, 2024
1e1213e
follow conventions
icfaust Oct 31, 2024
0531713
make xtable explicit
icfaust Oct 31, 2024
e831167
remove comment
icfaust Oct 31, 2024
d6eb1d0
Update validation.py
icfaust Oct 31, 2024
fb30d6e
Update __init__.py
icfaust Nov 1, 2024
63a18c2
Update validation.py
icfaust Nov 1, 2024
76c0856
Update __init__.py
icfaust Nov 1, 2024
7deb2bb
Update __init__.py
icfaust Nov 1, 2024
ed46b29
Update validation.py
icfaust Nov 1, 2024
67d6273
Update _data_conversion.py
icfaust Nov 1, 2024
054f0a1
Merge branch 'main' into dev/new_assert_all_fininte
icfaust Nov 1, 2024
8abead9
Update _data_conversion.py
icfaust Nov 1, 2024
47d0f8b
Update policy_common.cpp
icfaust Nov 1, 2024
e48c2bd
Update policy_common.cpp
icfaust Nov 1, 2024
c6751c4
Update _policy.py
icfaust Nov 1, 2024
f3e4a3a
Update policy_common.cpp
icfaust Nov 2, 2024
39cdb5f
Rename finiteness_checker.cpp to finiteness_checker.cpp
icfaust Nov 2, 2024
0f39613
Create finiteness_checker.py
icfaust Nov 2, 2024
b42cfe3
Update validation.py
icfaust Nov 2, 2024
0ed615e
Update __init__.py
icfaust Nov 2, 2024
f101aff
attempt at fixing circular imports again
icfaust Nov 2, 2024
24c0e94
fix isort
icfaust Nov 2, 2024
3f96166
remove __init__ changes
icfaust Nov 2, 2024
d985053
last move
icfaust Nov 2, 2024
90ec48b
Update policy_common.cpp
icfaust Nov 2, 2024
8c2c854
Update policy_common.cpp
icfaust Nov 2, 2024
6fa38d7
Update policy_common.cpp
icfaust Nov 2, 2024
9c1ca9c
Update policy_common.cpp
icfaust Nov 2, 2024
4b67dbd
Update validation.py
icfaust Nov 2, 2024
fa59a3c
add testing
icfaust Nov 2, 2024
3330b33
isort
icfaust Nov 2, 2024
4895940
attempt to fix module error
icfaust Nov 2, 2024
0c6dd5d
add fptype
icfaust Nov 2, 2024
e2182fa
fix typo
icfaust Nov 2, 2024
982ef2c
Update validation.py
icfaust Nov 2, 2024
2fb52a8
remove sua_ifcae from to_table
icfaust Nov 3, 2024
28dc267
isort and black
icfaust Nov 3, 2024
2f85fd4
Update test_memory_usage.py
icfaust Nov 3, 2024
8659248
format
icfaust Nov 3, 2024
3827d6f
Update _data_conversion.py
icfaust Nov 3, 2024
55fa7d2
Update _data_conversion.py
icfaust Nov 3, 2024
175cd78
Update test_validation.py
icfaust Nov 3, 2024
7016ad0
remove unnecessary code
icfaust Nov 3, 2024
1a01859
Merge branch 'main' into dev/new_assert_all_fininte
icfaust Nov 18, 2024
2fbcdd9
merge master
icfaust Nov 18, 2024
fb7375f
make reviewer changes
icfaust Nov 19, 2024
30816bf
make dtype check change
icfaust Nov 19, 2024
abb3b16
add sparse testing
icfaust Nov 19, 2024
97aef73
try again
icfaust Nov 19, 2024
6e29651
try again
icfaust Nov 19, 2024
59363a8
try again
icfaust Nov 19, 2024
12de703
temporary commit
icfaust Nov 20, 2024
07ec3d8
first attempt
icfaust Nov 20, 2024
32c565d
missing change?
icfaust Nov 20, 2024
a571a4e
Merge branch 'intel:main' into dev/sklearnex_assert_all_finite
icfaust Nov 20, 2024
5093ed7
modify DummyEstimator for testing
icfaust Nov 20, 2024
f04deba
generalize DummyEstimator
icfaust Nov 20, 2024
740a5e7
switch test
icfaust Nov 20, 2024
27050bd
further testing changes
icfaust Nov 20, 2024
53c8f7b
add initial validate_data test, will be refactored
icfaust Nov 20, 2024
90f59c4
fixes for CI
icfaust Nov 20, 2024
7f170e2
Update validation.py
icfaust Nov 20, 2024
81e2bbc
Update validation.py
icfaust Nov 20, 2024
116bdba
Update test_memory_usage.py
icfaust Nov 20, 2024
076ebc4
Update base.py
icfaust Nov 20, 2024
e1d0743
Update base.py
icfaust Nov 20, 2024
f59cdd3
improve tests
icfaust Nov 20, 2024
7f9ea25
fix logic
icfaust Nov 20, 2024
51247c0
fix logic
icfaust Nov 20, 2024
6e5c0ef
fix logic again
icfaust Nov 20, 2024
8d47744
rename file
icfaust Nov 20, 2024
1ae9af5
Revert "rename file"
icfaust Nov 20, 2024
bf9b46e
remove duplication
icfaust Nov 20, 2024
3101c3f
fix imports
icfaust Nov 20, 2024
6da176b
Merge branch 'intel:main' into dev/sklearnex_assert_all_finite
icfaust Nov 20, 2024
ee799f6
Rename test_finite.py to test_validation.py
icfaust Nov 20, 2024
db4a6c6
Revert "Rename test_finite.py to test_validation.py"
icfaust Nov 20, 2024
b5acbac
updates
icfaust Nov 21, 2024
ed57c15
Update validation.py
icfaust Nov 21, 2024
414f897
fixes for some test failures
icfaust Nov 21, 2024
83253b3
fix text
icfaust Nov 21, 2024
b22e23a
fixes for some failures
icfaust Nov 21, 2024
2f8ec16
make consistent
icfaust Nov 21, 2024
1fd9973
fix bad logic
icfaust Nov 21, 2024
c20c8cc
fix in string
icfaust Nov 21, 2024
1ce1b10
attempt tp see if dataframe conversion is causing the issue
icfaust Nov 21, 2024
5355039
fix iter problem
icfaust Nov 21, 2024
b5b8442
fix testing issues
icfaust Nov 21, 2024
d025c89
formatting
icfaust Nov 21, 2024
428bfb6
revert change
icfaust Nov 21, 2024
da23138
fixes for pandas
icfaust Nov 21, 2024
1d0c330
there is a slowdown with pandas that needs to be solved
icfaust Nov 21, 2024
f3f63a6
swap to transpose for speed
icfaust Nov 21, 2024
56c8054
more clarity
icfaust Nov 21, 2024
1580d77
add _check_sample_weight
icfaust Nov 22, 2024
ffc9f1f
add more testing'
icfaust Nov 22, 2024
d184ed0
rename
icfaust Nov 22, 2024
c68616f
remove unnecessary imports
icfaust Nov 22, 2024
e7ea94e
fix test slowness
icfaust Nov 22, 2024
dbe108d
focus get_dataframes_and_queues
icfaust Nov 22, 2024
7284b59
put config_context around
icfaust Nov 22, 2024
e1be91d
Update test_validation.py
icfaust Nov 24, 2024
8a0f9e9
Update base.py
icfaust Nov 24, 2024
5272207
Update test_validation.py
icfaust Nov 24, 2024
21a7896
Merge branch 'intel:main' into dev/sklearnex_assert_all_finite
icfaust Nov 24, 2024
56b5c4c
generalize regex
icfaust Nov 25, 2024
0d1b306
add fixes for sklearn 1.0 and input_name
icfaust Nov 25, 2024
8ff312e
fixes for test failures
icfaust Nov 25, 2024
87b7e3b
Update validation.py
icfaust Nov 25, 2024
29e8f8c
Update test_validation.py
icfaust Nov 25, 2024
527ce22
Merge branch 'intel:main' into dev/sklearnex_assert_all_finite
icfaust Nov 25, 2024
27ce5fc
Update validation.py
icfaust Nov 27, 2024
5d31988
formattintg
icfaust Nov 27, 2024
c4dccd6
make suggested changes
icfaust Nov 27, 2024
f83f1ef
follow changes made in #2126
icfaust Nov 27, 2024
0356a90
Merge branch 'intel:main' into dev/sklearnex_assert_all_finite
icfaust Nov 27, 2024
e43c047
fix future device problem
icfaust Nov 27, 2024
a9504a8
Merge branch 'dev/sklearnex_assert_all_finite' of https://github.com/…
icfaust Nov 27, 2024
b799d44
Merge branch 'intel:main' into dev/sklearnex_assert_all_finite
icfaust Nov 27, 2024
5c81f9d
Update validation.py
icfaust Nov 27, 2024
6ef96b1
merge main
icfaust Nov 28, 2024
1db7575
Merge branch 'uxlfoundation:main' into dev/sklearnex_assert_all_finite
icfaust Dec 2, 2024
38d0079
finished movement
icfaust Dec 2, 2024
bde3f3b
fix first error
icfaust Dec 2, 2024
ebf1ac4
next mistake
icfaust Dec 2, 2024
c2e5757
remove bad dtypes check
icfaust Dec 2, 2024
f8900e3
Merge branch 'main' into dev/new_RF
icfaust Sep 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 7 additions & 125 deletions onedal/ensemble/forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,7 @@
import numbers
import warnings
from abc import ABCMeta, abstractmethod
from math import ceil

import numpy as np
from sklearn.ensemble import BaseEnsemble
from sklearn.utils import check_random_state
import math

from daal4py.sklearn._utils import daal_check_version
from onedal._device_offload import supports_queue
Expand All @@ -34,13 +30,6 @@
from ..common._mixin import ClassifierMixin, RegressorMixin
from ..datatypes import from_table, to_table
from ..utils._array_api import _get_sycl_namespace
from ..utils.validation import (
_check_array,
_check_n_features,
_check_X_y,
_column_or_1d,
_validate_targets,
)


class BaseForest(BaseEnsemble, metaclass=ABCMeta):
Expand Down Expand Up @@ -110,8 +99,8 @@ def _to_absolute_max_features(self, n_features):
if self.max_features is None:
return n_features
elif isinstance(self.max_features, str):
return max(1, int(getattr(np, self.max_features)(n_features)))
elif isinstance(self.max_features, (numbers.Integral, np.integer)):
return max(1, int(getattr(math, self.max_features)(n_features)))
elif isinstance(self.max_features, numbers.Integral):
return self.max_features
elif self.max_features > 0.0:
return max(1, int(self.max_features * n_features))
Expand Down Expand Up @@ -142,31 +131,18 @@ def _get_onedal_params(self, data):
self.observations_per_tree_fraction = (
self.observations_per_tree_fraction if bool(self.bootstrap) else 1.0
)

if not self.bootstrap and self.max_samples is not None:
raise ValueError(
"`max_sample` cannot be set if `bootstrap=False`. "
"Either switch to `bootstrap=True` or set "
"`max_sample=None`."
)
if not self.bootstrap and self.oob_score:
raise ValueError("Out of bag estimation only available" " if bootstrap=True")

min_observations_in_leaf_node = (
self.min_samples_leaf
if isinstance(self.min_samples_leaf, numbers.Integral)
else int(ceil(self.min_samples_leaf * n_samples))
else int(math.ceil(self.min_samples_leaf * n_samples))
)

min_observations_in_split_node = (
self.min_samples_split
if isinstance(self.min_samples_split, numbers.Integral)
else int(ceil(self.min_samples_split * n_samples))
else int(math.ceil(self.min_samples_split * n_samples))
)

rs = check_random_state(self.random_state)
seed = rs.randint(0, np.iinfo("i").max)

onedal_params = {
"fptype": data.dtype,
"method": self.algorithm,
Expand All @@ -186,7 +162,7 @@ def _get_onedal_params(self, data):
"max_leaf_nodes": (0 if self.max_leaf_nodes is None else self.max_leaf_nodes),
"max_bins": self.max_bins,
"min_bin_size": self.min_bin_size,
"seed": seed,
"seed": self.random_state,
"memory_saving_mode": False,
"bootstrap": bool(self.bootstrap),
"error_metric_mode": self.error_metric_mode,
Expand All @@ -200,81 +176,6 @@ def _get_onedal_params(self, data):
onedal_params["splitter_mode"] = self.splitter_mode
return onedal_params

def _check_parameters(self):
if isinstance(self.min_samples_leaf, numbers.Integral):
if not 1 <= self.min_samples_leaf:
raise ValueError(
"min_samples_leaf must be at least 1 "
"or in (0, 0.5], got %s" % self.min_samples_leaf
)
else: # float
if not 0.0 < self.min_samples_leaf <= 0.5:
raise ValueError(
"min_samples_leaf must be at least 1 "
"or in (0, 0.5], got %s" % self.min_samples_leaf
)
if isinstance(self.min_samples_split, numbers.Integral):
if not 2 <= self.min_samples_split:
raise ValueError(
"min_samples_split must be an integer "
"greater than 1 or a float in (0.0, 1.0]; "
"got the integer %s" % self.min_samples_split
)
else: # float
if not 0.0 < self.min_samples_split <= 1.0:
raise ValueError(
"min_samples_split must be an integer "
"greater than 1 or a float in (0.0, 1.0]; "
"got the float %s" % self.min_samples_split
)
if not 0 <= self.min_weight_fraction_leaf <= 0.5:
raise ValueError("min_weight_fraction_leaf must in [0, 0.5]")
if self.min_impurity_split is not None:
warnings.warn(
"The min_impurity_split parameter is deprecated. "
"Its default value has changed from 1e-7 to 0 in "
"version 0.23, and it will be removed in 0.25. "
"Use the min_impurity_decrease parameter instead.",
FutureWarning,
)

if self.min_impurity_split < 0.0:
raise ValueError(
"min_impurity_split must be greater than " "or equal to 0"
)
if self.min_impurity_decrease < 0.0:
raise ValueError(
"min_impurity_decrease must be greater than " "or equal to 0"
)
if self.max_leaf_nodes is not None:
if not isinstance(self.max_leaf_nodes, numbers.Integral):
raise ValueError(
"max_leaf_nodes must be integral number but was "
"%r" % self.max_leaf_nodes
)
if self.max_leaf_nodes < 2:
raise ValueError(
("max_leaf_nodes {0} must be either None " "or larger than 1").format(
self.max_leaf_nodes
)
)
if isinstance(self.max_bins, numbers.Integral):
if not 2 <= self.max_bins:
raise ValueError("max_bins must be at least 2, got %s" % self.max_bins)
else:
raise ValueError(
"max_bins must be integral number but was " "%r" % self.max_bins
)
if isinstance(self.min_bin_size, numbers.Integral):
if not 1 <= self.min_bin_size:
raise ValueError(
"min_bin_size must be at least 1, got %s" % self.min_bin_size
)
else:
raise ValueError(
"min_bin_size must be integral number but was " "%r" % self.min_bin_size
)

def _validate_targets(self, y, dtype):
self.class_weight_ = None
self.classes_ = None
Expand Down Expand Up @@ -342,25 +243,9 @@ def _fit(self, X, y, sample_weight):
self.oob_decision_function_ = from_table(
train_result.oob_err_decision_function
)
if xp.any(self.oob_decision_function_ == 0):
warnings.warn(
"Some inputs do not have OOB scores. This probably means "
"too few trees were used to compute any reliable OOB "
"estimates.",
UserWarning,
)
else:
self.oob_score_ = from_table(train_result.oob_err_r2).item()
self.oob_prediction_ = from_table(
train_result.oob_err_prediction
).reshape(-1)
if np.any(self.oob_prediction_ == 0):
warnings.warn(
"Some inputs do not have OOB scores. This probably means "
"too few trees were used to compute any reliable OOB "
"estimates.",
UserWarning,
)
self.oob_prediction_ = from_table(train_result.oob_err_prediction)

return self

Expand Down Expand Up @@ -563,7 +448,6 @@ def __init__(
error_metric_mode="none",
variable_importance_mode="none",
algorithm="hist",
**kwargs,
):
super().__init__(
n_estimators=n_estimators,
Expand Down Expand Up @@ -641,7 +525,6 @@ def __init__(
error_metric_mode="none",
variable_importance_mode="none",
algorithm="hist",
**kwargs,
):
super().__init__(
n_estimators=n_estimators,
Expand Down Expand Up @@ -732,7 +615,6 @@ def __init__(
error_metric_mode="none",
variable_importance_mode="none",
algorithm="hist",
**kwargs,
):
super().__init__(
n_estimators=n_estimators,
Expand Down
Loading
Loading