diff --git a/.github/workflows/actions.yaml b/.github/workflows/actions.yaml
new file mode 100644
index 0000000..75fd1f8
--- /dev/null
+++ b/.github/workflows/actions.yaml
@@ -0,0 +1,27 @@
+name: Build
+
+on: [push]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+
+    steps:
+      - name: Checkout the ${{ github.repository }} repository
+        uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install .[test]
+      - name: Test with Pytest
+        run: |
+          pytest --cov=vecstack --cov-report=term-missing tests
+      - name: Coveralls
+        uses: coverallsapp/github-action@v2
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 30d59e9..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-# .travis.yml needs to be present on all active branches
-# check if .travis.yml is valid: http://lint.travis-ci.org/
-# to skip build for given commit put [ci skip] or [skip ci] in commit message
-
-# required for Python >= 3.7
-dist: xenial
-
-language: python
-
-# versions supported by scikit-learn and some additional versions
-python:
-  - "3.7"
-  - "3.6"
-  - "3.5"
-  - "3.4"
-  - "2.7"
-
-# safelist
-branches:
-  only:
-  - master
-  - dev
-  - py2
-
-install:
-  - pip install numpy
-  - pip install scipy
-  - pip install pandas
-  - pip install scikit-learn
-  - pip install nose
-  - pip install coverage
-  - pip install coveralls
-  - pip install .
-
-script:
-  - nosetests --with-coverage --cover-package=vecstack
-
-after_success:
-  - coverage report -m
-  - coveralls
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c3afb9b..e8dea5a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,17 @@
 # Changelog
 
+### v0.5.0 -- September 8, 2025 -- Maintenance release
+
+* Python 3.9+
+* Testing: pytest and pytest-cov
+* CI: GitHub Actions
+
+* Scikit-learn API:
+  * Fixed `_set_params` method which was not resetting individual estimators in the `estimators` collection
+
+* Functional API
+  * Fixed saving OOF arrays in file
+
 ### v0.4.0 -- August 12, 2019
 
 Since v0.4.0 vecstack provides official support for Python 3.5 and higher only,  
diff --git a/LICENSE.txt b/LICENSE.txt
index 38165f0..13d488c 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,7 +1,7 @@
 MIT License
 
 Vecstack. Python package for stacking (machine learning technique)
-Copyright (c) 2016-2019 Igor Ivanov
+Copyright (c) 2016-2025 Igor Ivanov
 Email: vecxoz@gmail.com
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
diff --git a/README.md b/README.md
index 195df98..6c5013c 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 [![PyPI version](https://img.shields.io/pypi/v/vecstack.svg?colorB=4cc61e)](https://pypi.python.org/pypi/vecstack)
 [![PyPI license](https://img.shields.io/pypi/l/vecstack.svg)](https://github.com/vecxoz/vecstack/blob/master/LICENSE.txt)
-[![Build Status](https://travis-ci.org/vecxoz/vecstack.svg?branch=master)](https://travis-ci.org/vecxoz/vecstack)
+[![Build status](https://github.com/vecxoz/vecstack/actions/workflows/actions.yaml/badge.svg?branch=master)](https://github.com/vecxoz/vecstack/actions)
 [![Coverage Status](https://coveralls.io/repos/github/vecxoz/vecstack/badge.svg?branch=master)](https://coveralls.io/github/vecxoz/vecstack?branch=master)
 [![PyPI pyversions](https://img.shields.io/pypi/pyversions/vecstack.svg)](https://pypi.python.org/pypi/vecstack/)
 
@@ -11,18 +11,18 @@ Convenient way to automate OOF computation, prediction and bagging using any num
 * [Functional API](https://github.com/vecxoz/vecstack#usage-functional-api):
     * Minimalistic. Get your stacked features in a single line
     * RAM-friendly. The lowest possible memory consumption
-    * Kaggle-ready. Stacked features and hyperparameters from each run can be [automatically saved](https://github.com/vecxoz/vecstack/blob/master/vecstack/core.py#L209) in files. No more mess at the end of the competition.  [Log example](https://github.com/vecxoz/vecstack/blob/master/examples/03_log_example.txt)
+    * Kaggle-ready. Stacked features and hyperparameters from each run can be [automatically saved](https://github.com/vecxoz/vecstack/blob/master/vecstack/core.py#L210) in files. No more mess at the end of the competition.  [Log example](https://github.com/vecxoz/vecstack/blob/master/examples/03_log_example.txt)
 * [Scikit-learn API](https://github.com/vecxoz/vecstack#usage-scikit-learn-api):
     * Standardized. Fully scikit-learn compatible transformer class exposing `fit` and `transform` methods
     * Pipeline-certified. Implement and deploy [multilevel stacking](https://github.com/vecxoz/vecstack/blob/master/examples/04_sklearn_api_regression_pipeline.ipynb) like it's no big deal using `sklearn.pipeline.Pipeline` 
     * And of course `FeatureUnion` is also invited to the party
 * Overall specs:
     * Use any sklearn-like estimators
-    * Perform [classification and regression](https://github.com/vecxoz/vecstack/blob/master/vecstack/coresk.py#L83) tasks
-    * Predict [class labels or probabilities](https://github.com/vecxoz/vecstack/blob/master/vecstack/coresk.py#L119) in classification task
-    * Apply any [user-defined metric](https://github.com/vecxoz/vecstack/blob/master/vecstack/coresk.py#L124)
-    * Apply any [user-defined transformations](https://github.com/vecxoz/vecstack/blob/master/vecstack/coresk.py#L87) for target and prediction
-    * Python 3.5 and higher, [unofficial support for Python 2.7 and 3.4](https://github.com/vecxoz/vecstack/blob/master/PY2.md)
+    * Perform [classification and regression](https://github.com/vecxoz/vecstack/blob/master/vecstack/coresk.py#L85) tasks
+    * Predict [class labels or probabilities](https://github.com/vecxoz/vecstack/blob/master/vecstack/coresk.py#L121) in classification task
+    * Apply any [user-defined metric](https://github.com/vecxoz/vecstack/blob/master/vecstack/coresk.py#L126)
+    * Apply any [user-defined transformations](https://github.com/vecxoz/vecstack/blob/master/vecstack/coresk.py#L89) for target and prediction
+    * Python 3.9+, [unofficial support for Python 2.7 and 3.4](https://github.com/vecxoz/vecstack/blob/master/PY2.md)
     * Win, Linux, Mac
     * [MIT license](https://github.com/vecxoz/vecstack/blob/master/LICENSE.txt)
     * Depends on **numpy**, **scipy**, **scikit-learn>=0.18**
@@ -44,11 +44,11 @@ Convenient way to automate OOF computation, prediction and bagging using any num
         * [Regression + Multilevel stacking using Pipeline](https://github.com/vecxoz/vecstack/blob/master/examples/04_sklearn_api_regression_pipeline.ipynb)
 * Documentation:
     * [Functional API](https://github.com/vecxoz/vecstack/blob/master/vecstack/core.py#L133) or type ```>>> help(stacking)```
-    * [Scikit-learn API](https://github.com/vecxoz/vecstack/blob/master/vecstack/coresk.py#L64) or type ```>>> help(StackingTransformer)```
+    * [Scikit-learn API](https://github.com/vecxoz/vecstack/blob/master/vecstack/coresk.py#L66) or type ```>>> help(StackingTransformer)```
 
 # Installation
 
-***Note:*** Python 3.5 or higher is required. If you’re still using Python 2.7 or 3.4 see [installation details here](https://github.com/vecxoz/vecstack/blob/master/PY2.md)  
+***Note:*** Python 3.9+ is officially supported and tested. If you’re still using Python 2.7 or 3.4 see [installation details here](https://github.com/vecxoz/vecstack/blob/master/PY2.md)  
 
 * ***Classic 1st time installation (recommended):*** 
     * `pip install vecstack`
@@ -56,7 +56,7 @@ Convenient way to automate OOF computation, prediction and bagging using any num
     * `pip install --user vecstack`
 * If your PATH doesn't work: 
     * `/usr/bin/python -m pip install vecstack`
-    * `C:/Python36/python -m pip install vecstack`
+    * `C:/Python3/python -m pip install vecstack`
 * Upgrade vecstack and all dependencies:
     * `pip install --upgrade vecstack`
 * Upgrade vecstack WITHOUT upgrading dependencies:
@@ -137,6 +137,7 @@ S_test = stack.transform(X_test)
 28. [Can I use `(Randomized)GridSearchCV` to tune the whole stacking Pipeline?](https://github.com/vecxoz/vecstack#28-can-i-use-randomizedgridsearchcv-to-tune-the-whole-stacking-pipeline)
 29. [How to define custom metric, especially AUC?](https://github.com/vecxoz/vecstack#29-how-to-define-custom-metric-especially-auc)
 30. [Do folds (splits) have to be the same across estimators and stacking levels? How does `random_state` work?](https://github.com/vecxoz/vecstack#30-do-folds-splits-have-to-be-the-same-across-estimators-and-stacking-levels-how-does-random_state-work)
+31. [How does `vecstack.StackingTransformer` differ from `sklearn.ensemble.StackingClassifier`?](https://github.com/vecxoz/vecstack#31-how-does-vecstackstackingtransformer-differ-from-sklearnensemblestackingclassifier)
 
 ### 1. How can I report an issue? How can I ask a question about stacking or vecstack package?
 
@@ -167,7 +168,7 @@ Main idea is to use predictions as features.
 More specifically we predict train set (in CV-like fashion) and test set using some 1st level model(s), and then use these predictions as features for 2nd level model. You can find more details (concept, pictures, code) in [stacking tutorial](https://github.com/vecxoz/vecstack/blob/master/examples/00_stacking_concept_pictures_code.ipynb).  
 Also make sure to check out: 
 * [Ensemble Learning](https://en.wikipedia.org/wiki/Ensemble_learning) ([Stacking](https://en.wikipedia.org/wiki/Ensemble_learning#Stacking)) in Wikipedia
-* Classical [Kaggle Ensembling Guide](https://mlwave.com/kaggle-ensembling-guide/)
+* Classical [Kaggle Ensembling Guide](https://mlwave.com/kaggle-ensembling-guide/) or try [another link](https://web.archive.org/web/20210727094233/https://mlwave.com/kaggle-ensembling-guide/)
 * [Stacked Generalization](https://www.researchgate.net/publication/222467943_Stacked_Generalization) paper by David H. Wolpert
     
 ### 5. What about stacking name?
@@ -216,7 +217,7 @@ Speaking about inner stacking mechanics, you should remember that when you have
 ### 12. What is *blending*? How is it related to stacking?
 
 Basically it is the same thing. Both approaches use predictions as features.  
-Often this terms are used interchangeably.  
+Often these terms are used interchangeably.  
 The difference is how we generate features (predictions) for the next level:  
 * *stacking*: perform cross-validation procedure and predict each part of train set (OOF)
 * *blending*: predict fixed holdout set
@@ -387,10 +388,14 @@ def auc(y_true, y_pred):
 
 To ensure better result, folds (splits) have to be the same across all estimators and all stacking levels. It means that `random_state` has to be the same in every call to `stacking` function or `StackingTransformer`. This is default behavior of `stacking` function and `StackingTransformer` (by default `random_state=0`). If you want to try different folds (splits) try to set different `random_state` values.  
 
+### 31. How does `vecstack.StackingTransformer` differ from `sklearn.ensemble.StackingClassifier`?
+
+It significantly differs. Please see a [detailed explanation](https://github.com/vecxoz/vecstack/issues/37).
+
 
 # Stacking concept
 
-1. We want to predict train set and test set with some 1st level model(s), and then use these predictions as features for 2nd level model(s).  
+1. We want to predict train set and test set with some 1st level model(s), and then use these predictions as features for 2nd level model(s).
 2. Any model can be used as 1st level model or 2nd level model.
 3. To avoid overfitting (for train set) we use cross-validation technique and in each fold we predict out-of-fold (OOF) part of train set.
 4. The common practice is to use from 3 to 10 folds.
@@ -404,6 +409,7 @@ To ensure better result, folds (splits) have to be the same across all estimator
 8. We can repeat this cycle using other 1st level models to get more features for 2nd level model.
 9. You can also look at animation of [Variant A](https://github.com/vecxoz/vecstack#variant-a-animation) and [Variant B](https://github.com/vecxoz/vecstack#variant-b-animation).
 
+
 # Variant A
 
 ![Fold 1 of 3](https://github.com/vecxoz/vecstack/raw/master/pic/dia1.png "Fold 1 of 3")
@@ -429,3 +435,10 @@ To ensure better result, folds (splits) have to be the same across all estimator
 # Variant B. Animation
 
 ![Variant B. Animation](https://github.com/vecxoz/vecstack/raw/master/pic/animation2.gif "Variant B. Animation")
+
+
+# References
+
+* [Ensemble Learning](https://en.wikipedia.org/wiki/Ensemble_learning) ([Stacking](https://en.wikipedia.org/wiki/Ensemble_learning#Stacking)) in Wikipedia
+* Classical [Kaggle Ensembling Guide](https://mlwave.com/kaggle-ensembling-guide/) or try [another link](https://web.archive.org/web/20210727094233/https://mlwave.com/kaggle-ensembling-guide/)
+* [Stacked Generalization](https://www.researchgate.net/publication/222467943_Stacked_Generalization) paper by David H. Wolpert
diff --git a/examples/00_stacking_concept_pictures_code.ipynb b/examples/00_stacking_concept_pictures_code.ipynb
index 45e165c..61fb330 100644
--- a/examples/00_stacking_concept_pictures_code.ipynb
+++ b/examples/00_stacking_concept_pictures_code.ipynb
@@ -121,14 +121,13 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {
-    "collapsed": true,
     "deletable": true,
     "editable": true
    },
    "outputs": [],
    "source": [
     "import numpy as np\n",
-    "from sklearn.datasets import load_boston\n",
+    "from sklearn.datasets import fetch_california_housing\n",
     "from sklearn.model_selection import cross_val_predict\n",
     "from sklearn.model_selection import cross_val_score\n",
     "from sklearn.model_selection import train_test_split\n",
@@ -153,14 +152,12 @@
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {
-    "collapsed": true,
     "deletable": true,
     "editable": true
    },
    "outputs": [],
    "source": [
-    "boston = load_boston()\n",
-    "X, y = boston.data, boston.target\n",
+    "X, y = fetch_california_housing(return_X_y=True)\n",
     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)"
    ]
   },
@@ -190,19 +187,22 @@
    "metadata": {
     "collapsed": false,
     "deletable": true,
-    "editable": true
+    "editable": true,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "fold 0: [3.38044832]\n",
-      "fold 1: [3.21036931]\n",
-      "fold 2: [3.49229353]\n",
+      "fold 0: [0.53428460]\n",
+      "fold 1: [0.53291416]\n",
+      "fold 2: [0.52626198]\n",
       "\n",
-      "MEAN:   [3.36103705] + [0.11591064]\n",
-      "FULL:   [3.36071216]\n"
+      "MEAN:   [0.53115358] + [0.00350384]\n",
+      "FULL:   [0.53115358]\n"
      ]
     }
    ],
@@ -218,7 +218,7 @@
     "# Empty list to store scores from each fold\n",
     "scores = []\n",
     "# Split initialization\n",
-    "kf = KFold(n_splits=n_folds, shuffle=False, random_state=0)\n",
+    "kf = KFold(n_splits=n_folds, shuffle=False)\n",
     "\n",
     "# Loop across folds\n",
     "for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):\n",
@@ -271,7 +271,10 @@
    "metadata": {
     "collapsed": true,
     "deletable": true,
-    "editable": true
+    "editable": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "source": [
     "There are no suitable standard Scikit-learn tools for Variant A.  \n",
@@ -294,25 +297,28 @@
    "metadata": {
     "collapsed": false,
     "deletable": true,
-    "editable": true
+    "editable": true,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "task:       [regression]\n",
-      "metric:     [mean_absolute_error]\n",
-      "mode:       [oof_pred_bag]\n",
-      "n_models:   [1]\n",
+      "task:         [regression]\n",
+      "metric:       [mean_absolute_error]\n",
+      "mode:         [oof_pred_bag]\n",
+      "n_models:     [1]\n",
       "\n",
-      "model 0:    [LinearRegression]\n",
-      "    fold 0: [3.38044832]\n",
-      "    fold 1: [3.21036931]\n",
-      "    fold 2: [3.49229353]\n",
+      "model  0:     [LinearRegression]\n",
+      "    fold  0:  [0.53428460]\n",
+      "    fold  1:  [0.53291416]\n",
+      "    fold  2:  [0.52626198]\n",
       "    ----\n",
-      "    MEAN:   [3.36103705] + [0.11591064]\n",
-      "    FULL:   [3.36071216]\n",
+      "    MEAN:     [0.53115358] + [0.00350384]\n",
+      "    FULL:     [0.53115358]\n",
       "\n"
      ]
     }
@@ -339,24 +345,27 @@
    "cell_type": "code",
    "execution_count": 5,
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[[ 32.87287178]\n",
-      " [ 22.02957522]\n",
-      " [ 27.16855956]\n",
-      " [ 23.77791521]\n",
-      " [  7.70569251]]\n",
+      "[[1.69019642]\n",
+      " [2.88448817]\n",
+      " [2.20976598]\n",
+      " [1.38160949]\n",
+      " [2.23334858]]\n",
       "\n",
-      "[[ 32.87287178]\n",
-      " [ 22.02957522]\n",
-      " [ 27.16855956]\n",
-      " [ 23.77791521]\n",
-      " [  7.70569251]]\n"
+      "[[1.69019642]\n",
+      " [2.88448817]\n",
+      " [2.20976598]\n",
+      " [1.38160949]\n",
+      " [2.23334858]]\n"
      ]
     }
    ],
@@ -368,24 +377,27 @@
    "cell_type": "code",
    "execution_count": 6,
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[[ 24.95478501]\n",
-      " [ 23.63277494]\n",
-      " [ 29.34879363]\n",
-      " [ 12.0744784 ]\n",
-      " [ 21.46079309]]\n",
+      "[[2.2792878 ]\n",
+      " [2.78905713]\n",
+      " [1.9027686 ]\n",
+      " [1.02009234]\n",
+      " [2.95248053]]\n",
       "\n",
-      "[[ 24.95478501]\n",
-      " [ 23.63277494]\n",
-      " [ 29.34879363]\n",
-      " [ 12.0744784 ]\n",
-      " [ 21.46079309]]\n"
+      "[[2.2792878 ]\n",
+      " [2.78905713]\n",
+      " [1.9027686 ]\n",
+      " [1.02009234]\n",
+      " [2.95248053]]\n"
      ]
     }
    ],
@@ -409,7 +421,10 @@
    "metadata": {
     "collapsed": false,
     "deletable": true,
-    "editable": true
+    "editable": true,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "outputs": [
     {
@@ -460,19 +475,22 @@
    "metadata": {
     "collapsed": false,
     "deletable": true,
-    "editable": true
+    "editable": true,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "fold 0: [3.38044832]\n",
-      "fold 1: [3.21036931]\n",
-      "fold 2: [3.49229353]\n",
+      "fold 0: [0.53428460]\n",
+      "fold 1: [0.53291416]\n",
+      "fold 2: [0.52626198]\n",
       "\n",
-      "MEAN:   [3.36103705] + [0.11591064]\n",
-      "FULL:   [3.36071216]\n"
+      "MEAN:   [0.53115358] + [0.00350384]\n",
+      "FULL:   [0.53115358]\n"
      ]
     }
    ],
@@ -486,7 +504,7 @@
     "# Empty list to store scores from each fold\n",
     "scores = []\n",
     "# Split initialization\n",
-    "kf = KFold(n_splits=n_folds, shuffle=False, random_state=0)\n",
+    "kf = KFold(n_splits=n_folds, shuffle=False)\n",
     "\n",
     "# Loop across folds\n",
     "for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):\n",
@@ -538,19 +556,22 @@
    "metadata": {
     "collapsed": false,
     "deletable": true,
-    "editable": true
+    "editable": true,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "fold 0: [3.38044832]\n",
-      "fold 1: [3.21036931]\n",
-      "fold 2: [3.49229353]\n",
+      "fold 0: [0.53428460]\n",
+      "fold 1: [0.53291416]\n",
+      "fold 2: [0.52626198]\n",
       "\n",
-      "MEAN:   [3.36103705] + [0.11591064]\n",
-      "FULL:   [3.36071216]\n"
+      "MEAN:   [0.53115358] + [0.00350384]\n",
+      "FULL:   [0.53115358]\n"
      ]
     }
    ],
@@ -609,25 +630,28 @@
    "metadata": {
     "collapsed": false,
     "deletable": true,
-    "editable": true
+    "editable": true,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "task:       [regression]\n",
-      "metric:     [mean_absolute_error]\n",
-      "mode:       [oof_pred]\n",
-      "n_models:   [1]\n",
+      "task:         [regression]\n",
+      "metric:       [mean_absolute_error]\n",
+      "mode:         [oof_pred]\n",
+      "n_models:     [1]\n",
       "\n",
-      "model 0:    [LinearRegression]\n",
-      "    fold 0: [3.38044832]\n",
-      "    fold 1: [3.21036931]\n",
-      "    fold 2: [3.49229353]\n",
+      "model  0:     [LinearRegression]\n",
+      "    fold  0:  [0.53428460]\n",
+      "    fold  1:  [0.53291416]\n",
+      "    fold  2:  [0.52626198]\n",
       "    ----\n",
-      "    MEAN:   [3.36103705] + [0.11591064]\n",
-      "    FULL:   [3.36071216]\n",
+      "    MEAN:     [0.53115358] + [0.00350384]\n",
+      "    FULL:     [0.53115358]\n",
       "\n",
       "    Fitting on full train set...\n",
       "\n"
@@ -656,30 +680,33 @@
    "cell_type": "code",
    "execution_count": 11,
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[[ 32.87287178]\n",
-      " [ 22.02957522]\n",
-      " [ 27.16855956]\n",
-      " [ 23.77791521]\n",
-      " [  7.70569251]]\n",
+      "[[1.69019642]\n",
+      " [2.88448817]\n",
+      " [2.20976598]\n",
+      " [1.38160949]\n",
+      " [2.23334858]]\n",
       "\n",
-      "[[ 32.87287178]\n",
-      " [ 22.02957522]\n",
-      " [ 27.16855956]\n",
-      " [ 23.77791521]\n",
-      " [  7.70569251]]\n",
+      "[[1.69019642]\n",
+      " [2.88448817]\n",
+      " [2.20976598]\n",
+      " [1.38160949]\n",
+      " [2.23334858]]\n",
       "\n",
-      "[[ 32.87287178]\n",
-      " [ 22.02957522]\n",
-      " [ 27.16855956]\n",
-      " [ 23.77791521]\n",
-      " [  7.70569251]]\n"
+      "[[1.69019642]\n",
+      " [2.88448817]\n",
+      " [2.20976598]\n",
+      " [1.38160949]\n",
+      " [2.23334858]]\n"
      ]
     }
    ],
@@ -691,30 +718,33 @@
    "cell_type": "code",
    "execution_count": 12,
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[[ 24.89012999]\n",
-      " [ 23.72488246]\n",
-      " [ 29.37213304]\n",
-      " [ 12.14010251]\n",
-      " [ 21.4468654 ]]\n",
+      "[[2.28110738]\n",
+      " [2.79009128]\n",
+      " [1.90332794]\n",
+      " [1.01760331]\n",
+      " [2.94852425]]\n",
       "\n",
-      "[[ 24.89012999]\n",
-      " [ 23.72488246]\n",
-      " [ 29.37213304]\n",
-      " [ 12.14010251]\n",
-      " [ 21.4468654 ]]\n",
+      "[[2.28110738]\n",
+      " [2.79009128]\n",
+      " [1.90332794]\n",
+      " [1.01760331]\n",
+      " [2.94852425]]\n",
       "\n",
-      "[[ 24.89012999]\n",
-      " [ 23.72488246]\n",
-      " [ 29.37213304]\n",
-      " [ 12.14010251]\n",
-      " [ 21.4468654 ]]\n"
+      "[[2.28110738]\n",
+      " [2.79009128]\n",
+      " [1.90332794]\n",
+      " [1.01760331]\n",
+      " [2.94852425]]\n"
      ]
     }
    ],
@@ -738,7 +768,10 @@
    "metadata": {
     "collapsed": false,
     "deletable": true,
-    "editable": true
+    "editable": true,
+    "jupyter": {
+     "outputs_hidden": false
+    }
    },
    "outputs": [
     {
@@ -770,11 +803,18 @@
     "print(all(S_test_B_scratch == S_test_B_vecstack))\n",
     "print(all(S_test_B_sklearn == S_test_B_vecstack))"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -788,9 +828,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.1"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/examples/01_regression.ipynb b/examples/01_regression.ipynb
index 89ac1d7..c6fc652 100644
--- a/examples/01_regression.ipynb
+++ b/examples/01_regression.ipynb
@@ -21,7 +21,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from sklearn.datasets import load_boston\n",
+    "from sklearn.datasets import fetch_california_housing\n",
     "from sklearn.model_selection import train_test_split\n",
     "from sklearn.metrics import mean_absolute_error\n",
     "from sklearn.ensemble import ExtraTreesRegressor\n",
@@ -43,8 +43,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "boston = load_boston()\n",
-    "X, y = boston.data, boston.target\n",
+    "X, y = fetch_california_housing(return_X_y=True)\n",
     "\n",
     "# Make train/test split\n",
     "# As usual in machine learning task we have X_train, y_train, and X_test\n",
@@ -94,37 +93,37 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "task:       [regression]\n",
-      "metric:     [mean_absolute_error]\n",
-      "mode:       [oof_pred_bag]\n",
-      "n_models:   [3]\n",
+      "task:         [regression]\n",
+      "metric:       [mean_absolute_error]\n",
+      "mode:         [oof_pred_bag]\n",
+      "n_models:     [3]\n",
       "\n",
-      "model 0:    [ExtraTreesRegressor]\n",
-      "    fold 0: [3.20733439]\n",
-      "    fold 1: [2.87943130]\n",
-      "    fold 2: [2.53026486]\n",
-      "    fold 3: [2.83618694]\n",
+      "model  0:     [ExtraTreesRegressor]\n",
+      "    fold  0:  [0.65383143]\n",
+      "    fold  1:  [0.65059961]\n",
+      "    fold  2:  [0.66233582]\n",
+      "    fold  3:  [0.64449777]\n",
       "    ----\n",
-      "    MEAN:   [2.86330437] + [0.23993093]\n",
-      "    FULL:   [2.86330437]\n",
+      "    MEAN:     [0.65281616] + [0.00643746]\n",
+      "    FULL:     [0.65281616]\n",
       "\n",
-      "model 1:    [RandomForestRegressor]\n",
-      "    fold 0: [3.11110485]\n",
-      "    fold 1: [2.78404210]\n",
-      "    fold 2: [2.55707729]\n",
-      "    fold 3: [2.32209992]\n",
+      "model  1:     [RandomForestRegressor]\n",
+      "    fold  0:  [0.58416160]\n",
+      "    fold  1:  [0.56449564]\n",
+      "    fold  2:  [0.57730149]\n",
+      "    fold  3:  [0.55014073]\n",
       "    ----\n",
-      "    MEAN:   [2.69358104] + [0.29117900]\n",
-      "    FULL:   [2.69358104]\n",
+      "    MEAN:     [0.56902487] + [0.01298795]\n",
+      "    FULL:     [0.56902487]\n",
       "\n",
-      "model 2:    [XGBRegressor]\n",
-      "    fold 0: [2.40318942]\n",
-      "    fold 1: [2.37286943]\n",
-      "    fold 2: [1.89121526]\n",
-      "    fold 3: [1.95382805]\n",
+      "model  2:     [XGBRegressor]\n",
+      "    fold  0:  [0.37287275]\n",
+      "    fold  1:  [0.36827074]\n",
+      "    fold  2:  [0.37315715]\n",
+      "    fold  3:  [0.36447933]\n",
       "    ----\n",
-      "    MEAN:   [2.15527554] + [0.23404984]\n",
-      "    FULL:   [2.15527554]\n",
+      "    MEAN:     [0.36969499] + [0.00358177]\n",
+      "    FULL:     [0.36969499]\n",
       "\n"
      ]
     }
@@ -164,11 +163,11 @@
     {
      "data": {
       "text/plain": [
-       "array([[ 27.21782522,  28.23561508,  27.78520966],\n",
-       "       [ 22.25443115,  22.32927929,  22.57203102],\n",
-       "       [ 26.03879794,  25.80114661,  26.27923012],\n",
-       "       [ 21.82927308,  21.30478775,  21.39201546],\n",
-       "       [ 13.02143285,  12.04667683,   8.88440514]])"
+       "array([[2.1381431 , 1.89449961, 1.85192811],\n",
+       "       [2.29310757, 1.89309918, 2.92809105],\n",
+       "       [2.07256939, 1.89449961, 2.10903692],\n",
+       "       [1.51938275, 1.53835871, 1.37909698],\n",
+       "       [1.93450337, 2.737813  , 3.23252964]])"
       ]
      },
      "execution_count": 5,
@@ -188,11 +187,11 @@
     {
      "data": {
       "text/plain": [
-       "array([[ 24.89602382,  23.85490698,  24.85046005],\n",
-       "       [ 20.85135955,  25.05068336,  26.30952454],\n",
-       "       [ 23.13164045,  21.56864103,  23.67526102],\n",
-       "       [ 13.47709586,  11.81606315,  11.02050447],\n",
-       "       [ 21.93179664,  21.30652111,  21.75125122]])"
+       "array([[2.12570438, 1.88503507, 1.57685581],\n",
+       "       [2.57631542, 2.67168873, 2.70525175],\n",
+       "       [2.06940157, 1.88669837, 1.69479051],\n",
+       "       [1.64434775, 1.20196782, 0.96695787],\n",
+       "       [2.33799194, 2.98206787, 3.8881467 ]])"
       ]
      },
      "execution_count": 6,
@@ -220,7 +219,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Final prediction score: [2.78409081]\n"
+      "Final prediction score: [0.35320658]\n"
      ]
     }
    ],
@@ -238,11 +237,18 @@
     "# Final prediction score\n",
     "print('Final prediction score: [%.8f]' % mean_absolute_error(y_test, y_pred))"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -256,9 +262,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.2"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/examples/02_classification_with_class_labels.ipynb b/examples/02_classification_with_class_labels.ipynb
index 327ad3f..6b6d700 100644
--- a/examples/02_classification_with_class_labels.ipynb
+++ b/examples/02_classification_with_class_labels.ipynb
@@ -94,38 +94,38 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "task:       [classification]\n",
-      "n_classes:  [3]\n",
-      "metric:     [accuracy_score]\n",
-      "mode:       [oof_pred_bag]\n",
-      "n_models:   [3]\n",
+      "task:         [classification]\n",
+      "n_classes:    [3]\n",
+      "metric:       [accuracy_score]\n",
+      "mode:         [oof_pred_bag]\n",
+      "n_models:     [3]\n",
       "\n",
-      "model 0:    [ExtraTreesClassifier]\n",
-      "    fold 0: [0.93548387]\n",
-      "    fold 1: [0.96666667]\n",
-      "    fold 2: [1.00000000]\n",
-      "    fold 3: [0.89655172]\n",
+      "model  0:     [ExtraTreesClassifier]\n",
+      "    fold  0:  [1.00000000]\n",
+      "    fold  1:  [0.90000000]\n",
+      "    fold  2:  [1.00000000]\n",
+      "    fold  3:  [0.90000000]\n",
       "    ----\n",
-      "    MEAN:   [0.94967557] + [0.03822562]\n",
-      "    FULL:   [0.95000000]\n",
+      "    MEAN:     [0.95000000] + [0.05000000]\n",
+      "    FULL:     [0.95000000]\n",
       "\n",
-      "model 1:    [RandomForestClassifier]\n",
-      "    fold 0: [0.87096774]\n",
-      "    fold 1: [0.96666667]\n",
-      "    fold 2: [1.00000000]\n",
-      "    fold 3: [0.93103448]\n",
+      "model  1:     [RandomForestClassifier]\n",
+      "    fold  0:  [0.96666667]\n",
+      "    fold  1:  [0.90000000]\n",
+      "    fold  2:  [1.00000000]\n",
+      "    fold  3:  [0.90000000]\n",
       "    ----\n",
-      "    MEAN:   [0.94216722] + [0.04779685]\n",
-      "    FULL:   [0.94166667]\n",
+      "    MEAN:     [0.94166667] + [0.04330127]\n",
+      "    FULL:     [0.94166667]\n",
       "\n",
-      "model 2:    [XGBClassifier]\n",
-      "    fold 0: [0.83870968]\n",
-      "    fold 1: [0.93333333]\n",
-      "    fold 2: [1.00000000]\n",
-      "    fold 3: [0.93103448]\n",
+      "model  2:     [XGBClassifier]\n",
+      "    fold  0:  [0.93333333]\n",
+      "    fold  1:  [0.90000000]\n",
+      "    fold  2:  [0.93333333]\n",
+      "    fold  3:  [0.90000000]\n",
       "    ----\n",
-      "    MEAN:   [0.92576937] + [0.05739014]\n",
-      "    FULL:   [0.92500000]\n",
+      "    MEAN:     [0.91666667] + [0.01666667]\n",
+      "    FULL:     [0.91666667]\n",
       "\n"
      ]
     }
@@ -171,7 +171,7 @@
        "array([[2, 2, 2],\n",
        "       [1, 1, 1],\n",
        "       [0, 0, 0],\n",
-       "       [2, 1, 2],\n",
+       "       [2, 2, 2],\n",
        "       [2, 2, 2]])"
       ]
      },
@@ -246,7 +246,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -260,9 +260,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.2"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/examples/03_classification_with_proba_detailed_workflow.ipynb b/examples/03_classification_with_proba_detailed_workflow.ipynb
index a7e363b..a3b696a 100644
--- a/examples/03_classification_with_proba_detailed_workflow.ipynb
+++ b/examples/03_classification_with_proba_detailed_workflow.ipynb
@@ -18,22 +18,34 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Using Theano backend.\n"
+      "2025-09-06 15:55:39.761844: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
+      "2025-09-06 15:55:39.792129: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2025-09-06 15:55:40.527567: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
+      "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
+      "W0000 00:00:1757163340.650380   23114 gpu_device.cc:2342] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.\n",
+      "Skipping registering GPU devices...\n"
      ]
     }
    ],
    "source": [
+    "import warnings\n",
+    "warnings.filterwarnings('ignore', message='The y_prob values do not sum to one')\n",
+    "warnings.filterwarnings('ignore', message='Skipping variable loading for optimizer')\n",
+    "warnings.filterwarnings('ignore', message='X does not have valid feature names')\n",
+    "import re\n",
     "from glob import glob\n",
-    "import re \n",
     "import numpy as np\n",
     "np.random.seed(0) # ensure reproducibility\n",
-    "np.set_printoptions(suppress = True)\n",
+    "np.set_printoptions(suppress=True)\n",
     "from sklearn.model_selection import train_test_split\n",
     "from sklearn.datasets import make_classification\n",
     "from sklearn.metrics import log_loss\n",
@@ -45,9 +57,15 @@
     "from xgboost import XGBClassifier\n",
     "from lightgbm import LGBMClassifier\n",
     "# NN\n",
+    "import tensorflow as tf\n",
+    "# Data is small so do NOT use GPU for simplicity\n",
+    "tf.config.set_visible_devices([], 'GPU')\n",
+    "# Suppress Python level warnings from tensorflow\n",
+    "tf.get_logger().setLevel('ERROR') \n",
     "from keras.models import Sequential\n",
     "from keras.layers import Dense\n",
-    "from keras.wrappers.scikit_learn import KerasClassifier\n",
+    "from keras.layers import Input\n",
+    "from scikeras.wrappers import KerasClassifier, KerasRegressor\n",
     "# Stacking\n",
     "from vecstack import stacking"
    ]
@@ -105,15 +123,15 @@
    "source": [
     "def build_keras_model_1():\n",
     "    model = Sequential()\n",
-    "    model.add(Dense(64, \n",
-    "                    input_dim=X_train.shape[1], \n",
+    "    model.add(Input(shape=(X_train.shape[1],)))\n",
+    "    model.add(Dense(64,\n",
     "                    kernel_initializer='normal', \n",
     "                    activation='relu'))\n",
     "    model.add(Dense(n_classes, \n",
     "                    kernel_initializer='normal', \n",
     "                    activation='softmax'))\n",
     "    model.compile(optimizer='rmsprop', \n",
-    "                  loss='categorical_crossentropy', \n",
+    "                  loss='sparse_categorical_crossentropy', \n",
     "                  metrics=['categorical_accuracy'])\n",
     "    return model\n",
     "\n",
@@ -134,9 +152,9 @@
     "                  n_estimators=100, max_depth=3),\n",
     "                  \n",
     "    LGBMClassifier(random_state=0, n_jobs=-1, learning_rate=0.1, \n",
-    "                   n_estimators=100, max_depth=3),\n",
-    "                  \n",
-    "    KerasClassifier(build_fn=build_keras_model_1, epochs=2, \n",
+    "                   n_estimators=100, max_depth=3, verbose=-1),\n",
+    "\n",
+    "    KerasClassifier(model=build_keras_model_1(), epochs=2, \n",
     "                    batch_size=32, verbose=0)\n",
     "]"
    ]
@@ -151,103 +169,105 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "task:       [classification]\n",
-      "n_classes:  [3]\n",
-      "metric:     [log_loss]\n",
-      "mode:       [oof_pred]\n",
-      "n_models:   [7]\n",
+      "task:         [classification]\n",
+      "n_classes:    [3]\n",
+      "metric:       [log_loss]\n",
+      "mode:         [oof_pred]\n",
+      "n_models:     [7]\n",
       "\n",
-      "model 0:    [GaussianNB]\n",
-      "    fold 0: [0.65551778]\n",
-      "    fold 1: [0.42335961]\n",
-      "    fold 2: [0.38132309]\n",
-      "    fold 3: [0.57180128]\n",
-      "    fold 4: [0.30426116]\n",
+      "model  0:     [GaussianNB]\n",
+      "    fold  0:  [0.57030626]\n",
+      "    fold  1:  [0.28256165]\n",
+      "    fold  2:  [0.35609357]\n",
+      "    fold  3:  [0.57833219]\n",
+      "    fold  4:  [0.60933411]\n",
       "    ----\n",
-      "    MEAN:   [0.46725258] + [0.12825825]\n",
-      "    FULL:   [0.46842847]\n",
+      "    MEAN:     [0.47932556] + [0.13332980]\n",
+      "    FULL:     [0.47932556]\n",
       "\n",
       "    Fitting on full train set...\n",
       "\n",
-      "model 1:    [LogisticRegression]\n",
-      "    fold 0: [0.59821304]\n",
-      "    fold 1: [0.54202039]\n",
-      "    fold 2: [0.55194968]\n",
-      "    fold 3: [0.46887313]\n",
-      "    fold 4: [0.44995007]\n",
+      "model  1:     [LogisticRegression]\n",
+      "    fold  0:  [0.58074650]\n",
+      "    fold  1:  [0.27626266]\n",
+      "    fold  2:  [0.40797434]\n",
+      "    fold  3:  [0.48192230]\n",
+      "    fold  4:  [0.73155308]\n",
       "    ----\n",
-      "    MEAN:   [0.52220126] + [0.05499033]\n",
-      "    FULL:   [0.52280210]\n",
+      "    MEAN:     [0.49569178] + [0.15420632]\n",
+      "    FULL:     [0.49569178]\n",
       "\n",
       "    Fitting on full train set...\n",
       "\n",
-      "model 2:    [ExtraTreesClassifier]\n",
-      "    fold 0: [0.79961086]\n",
-      "    fold 1: [0.75093790]\n",
-      "    fold 2: [0.77930597]\n",
-      "    fold 3: [0.76984042]\n",
-      "    fold 4: [0.75288684]\n",
+      "model  2:     [ExtraTreesClassifier]\n",
+      "    fold  0:  [0.80523428]\n",
+      "    fold  1:  [0.72214703]\n",
+      "    fold  2:  [0.72851161]\n",
+      "    fold  3:  [0.78859646]\n",
+      "    fold  4:  [0.82512546]\n",
       "    ----\n",
-      "    MEAN:   [0.77051640] + [0.01799067]\n",
-      "    FULL:   [0.77062834]\n",
+      "    MEAN:     [0.77392297] + [0.04137715]\n",
+      "    FULL:     [0.77392297]\n",
       "\n",
       "    Fitting on full train set...\n",
       "\n",
-      "model 3:    [RandomForestClassifier]\n",
-      "    fold 0: [0.61575788]\n",
-      "    fold 1: [0.40598536]\n",
-      "    fold 2: [0.37631065]\n",
-      "    fold 3: [0.48927717]\n",
-      "    fold 4: [0.35383760]\n",
+      "model  3:     [RandomForestClassifier]\n",
+      "    fold  0:  [0.49744464]\n",
+      "    fold  1:  [0.30045852]\n",
+      "    fold  2:  [0.38866325]\n",
+      "    fold  3:  [0.49116102]\n",
+      "    fold  4:  [0.51274923]\n",
       "    ----\n",
-      "    MEAN:   [0.44823373] + [0.09551690]\n",
-      "    FULL:   [0.44901890]\n",
+      "    MEAN:     [0.43809533] + [0.08160752]\n",
+      "    FULL:     [0.43809533]\n",
       "\n",
       "    Fitting on full train set...\n",
       "\n",
-      "model 4:    [XGBClassifier]\n",
-      "    fold 0: [0.79844773]\n",
-      "    fold 1: [0.29219564]\n",
-      "    fold 2: [0.32080725]\n",
-      "    fold 3: [0.47567222]\n",
-      "    fold 4: [0.28695253]\n",
+      "model  4:     [XGBClassifier]\n",
+      "    fold  0:  [0.44344452]\n",
+      "    fold  1:  [0.19888857]\n",
+      "    fold  2:  [0.37032590]\n",
+      "    fold  3:  [0.50328894]\n",
+      "    fold  4:  [0.46400776]\n",
       "    ----\n",
-      "    MEAN:   [0.43481507] + [0.19447336]\n",
-      "    FULL:   [0.43610692]\n",
+      "    MEAN:     [0.39599114] + [0.10760951]\n",
+      "    FULL:     [0.39599114]\n",
       "\n",
       "    Fitting on full train set...\n",
       "\n",
-      "model 5:    [LGBMClassifier]\n",
-      "    fold 0: [0.72121715]\n",
-      "    fold 1: [0.28661436]\n",
-      "    fold 2: [0.32019533]\n",
-      "    fold 3: [0.44805274]\n",
-      "    fold 4: [0.29008710]\n",
+      "model  5:     [LGBMClassifier]\n",
+      "    fold  0:  [0.46739007]\n",
+      "    fold  1:  [0.20192817]\n",
+      "    fold  2:  [0.34714699]\n",
+      "    fold  3:  [0.49378077]\n",
+      "    fold  4:  [0.50732813]\n",
       "    ----\n",
-      "    MEAN:   [0.41323334] + [0.16487562]\n",
-      "    FULL:   [0.41430248]\n",
+      "    MEAN:     [0.40351483] + [0.11560251]\n",
+      "    FULL:     [0.40351483]\n",
       "\n",
       "    Fitting on full train set...\n",
       "\n",
-      "model 6:    [KerasClassifier]\n",
-      "    fold 0: [0.95679030]\n",
-      "    fold 1: [0.97577886]\n",
-      "    fold 2: [0.99147471]\n",
-      "    fold 3: [0.97480903]\n",
-      "    fold 4: [0.96731000]\n",
+      "model  6:     [KerasClassifier]\n",
+      "    fold  0:  [0.99057170]\n",
+      "    fold  1:  [0.82460226]\n",
+      "    fold  2:  [0.70419501]\n",
+      "    fold  3:  [0.66333640]\n",
+      "    fold  4:  [0.66650450]\n",
       "    ----\n",
-      "    MEAN:   [0.97323258] + [0.01137690]\n",
-      "    FULL:   [0.97322745]\n",
+      "    MEAN:     [0.76984197] + [0.12494150]\n",
+      "    FULL:     [0.76984197]\n",
       "\n",
       "    Fitting on full train set...\n",
       "\n",
-      "Result was saved to [./[2018.02.01].[15.41.41].305268.0eadc0.npy]\n"
+      "Result was saved to [./[2025.09.06].[15.55.45].032643.7f0aac.npy]\n"
      ]
     }
    ],
@@ -309,16 +329,16 @@
     {
      "data": {
       "text/plain": [
-       "array([[ 0.00118767,  0.02222581,  0.97658652,  0.06963771,  0.05618856,\n",
-       "         0.87417373,  0.21796766,  0.21491663,  0.56711571,  0.03741131,\n",
-       "         0.08982228,  0.87276641,  0.00182469,  0.00537052,  0.99280483,\n",
-       "         0.00040829,  0.00281319,  0.99677852,  0.3023589 ,  0.26565766,\n",
-       "         0.43198347],\n",
-       "       [ 0.96030684,  0.03969316,  0.        ,  0.75245808,  0.24720408,\n",
-       "         0.00033784,  0.5615216 ,  0.26871071,  0.16976769,  0.85696824,\n",
-       "         0.12811857,  0.01491319,  0.9877857 ,  0.01111581,  0.00109853,\n",
-       "         0.99732125,  0.00258249,  0.00009626,  0.38591456,  0.31510866,\n",
-       "         0.29897675]])"
+       "array([[0.00083161, 0.01828022, 0.98088816, 0.01605728, 0.02724415,\n",
+       "        0.95669857, 0.22361998, 0.20286901, 0.57351102, 0.0365039 ,\n",
+       "        0.10297998, 0.86051611, 0.00304293, 0.01768309, 0.97927397,\n",
+       "        0.00074538, 0.00395689, 0.99529773, 0.32723224, 0.28824005,\n",
+       "        0.38452768],\n",
+       "       [0.95026182, 0.04973818, 0.        , 0.89084281, 0.10909541,\n",
+       "        0.00006178, 0.55396899, 0.27694952, 0.16908149, 0.85772772,\n",
+       "        0.13275189, 0.00952038, 0.9821493 , 0.01676223, 0.00108847,\n",
+       "        0.99746299, 0.00249197, 0.00004503, 0.39017811, 0.32593027,\n",
+       "        0.28389156]])"
       ]
      },
      "execution_count": 6,
@@ -338,16 +358,16 @@
     {
      "data": {
       "text/plain": [
-       "array([[ 0.38824186,  0.37434678,  0.23741136,  0.35886342,  0.38701687,\n",
-       "         0.2541197 ,  0.31662764,  0.29004533,  0.39332704,  0.27655712,\n",
-       "         0.55408115,  0.16936173,  0.58901626,  0.3923738 ,  0.01860998,\n",
-       "         0.60639131,  0.3588515 ,  0.03475718,  0.33691144,  0.31800038,\n",
-       "         0.34508815],\n",
-       "       [ 0.32313599,  0.67239959,  0.00446442,  0.32348396,  0.54466285,\n",
-       "         0.13185319,  0.31999925,  0.36345201,  0.31654874,  0.10054021,\n",
-       "         0.81354061,  0.08591918,  0.02955116,  0.95850134,  0.01194747,\n",
-       "         0.03609523,  0.90174785,  0.06215692,  0.3260029 ,  0.37157273,\n",
-       "         0.3024244 ]])"
+       "array([[0.38824186, 0.37434678, 0.23741136, 0.307547  , 0.52253329,\n",
+       "        0.16991971, 0.31662764, 0.29004533, 0.39332704, 0.27655712,\n",
+       "        0.55408115, 0.16936173, 0.58477622, 0.38952848, 0.02569526,\n",
+       "        0.72381327, 0.24995384, 0.02623288, 0.30865759, 0.34028247,\n",
+       "        0.35106   ],\n",
+       "       [0.32313599, 0.67239959, 0.00446442, 0.28539557, 0.62320346,\n",
+       "        0.09140096, 0.31999925, 0.36345201, 0.31654874, 0.10054021,\n",
+       "        0.81354061, 0.08591918, 0.02924692, 0.95591789, 0.01483521,\n",
+       "        0.01189152, 0.94377175, 0.04433673, 0.27213532, 0.52776176,\n",
+       "        0.20010303]])"
       ]
      },
      "execution_count": 7,
@@ -376,10 +396,10 @@
      "output_type": "stream",
      "text": [
       "Arrays:\n",
-      "[2018.02.01].[15.41.41].305268.0eadc0.npy\n",
+      "[2025.09.06].[15.55.45].032643.7f0aac.npy\n",
       "\n",
       "Logs:\n",
-      "[2018.02.01].[15.41.41].305268.0eadc0.log.txt\n"
+      "[2025.09.06].[15.55.45].032643.7f0aac.log.txt\n"
      ]
     }
    ],
@@ -417,8 +437,8 @@
    "source": [
     "def build_keras_model_2():\n",
     "    model = Sequential()\n",
-    "    model.add(Dense(256, \n",
-    "                    input_dim=X_train.shape[1], \n",
+    "    model.add(Input(shape=(X_train.shape[1],)))\n",
+    "    model.add(Dense(256,\n",
     "                    kernel_initializer='normal', \n",
     "                    activation='relu'))\n",
     "    model.add(Dense(64, \n",
@@ -428,14 +448,14 @@
     "                    kernel_initializer='normal', \n",
     "                    activation='softmax'))\n",
     "    model.compile(optimizer='rmsprop', \n",
-    "                  loss='categorical_crossentropy', \n",
+    "                  loss='sparse_categorical_crossentropy', \n",
     "                  metrics=['categorical_accuracy'])\n",
     "    return model\n",
     "\n",
     "# Caution! All models and parameter values are just \n",
     "# demonstrational and shouldn't be considered as recommended.\n",
     "models_2 = [        \n",
-    "    KerasClassifier(build_fn=build_keras_model_2, epochs=5, \n",
+    "    KerasClassifier(model=build_keras_model_2(), epochs=5, \n",
     "                    batch_size=32, verbose=0)\n",
     "]"
    ]
@@ -450,31 +470,33 @@
   {
    "cell_type": "code",
    "execution_count": 10,
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "task:       [classification]\n",
-      "n_classes:  [3]\n",
-      "metric:     [log_loss]\n",
-      "mode:       [oof_pred]\n",
-      "n_models:   [1]\n",
+      "task:         [classification]\n",
+      "n_classes:    [3]\n",
+      "metric:       [log_loss]\n",
+      "mode:         [oof_pred]\n",
+      "n_models:     [1]\n",
       "\n",
-      "model 0:    [KerasClassifier]\n",
-      "    fold 0: [0.54741578]\n",
-      "    fold 1: [0.42787166]\n",
-      "    fold 2: [0.40649939]\n",
-      "    fold 3: [0.45298407]\n",
-      "    fold 4: [0.39133918]\n",
+      "model  0:     [KerasClassifier]\n",
+      "    fold  0:  [0.51636764]\n",
+      "    fold  1:  [0.22029691]\n",
+      "    fold  2:  [0.28535020]\n",
+      "    fold  3:  [0.39152662]\n",
+      "    fold  4:  [0.43074365]\n",
       "    ----\n",
-      "    MEAN:   [0.44522202] + [0.05515007]\n",
-      "    FULL:   [0.44570354]\n",
+      "    MEAN:     [0.36885700] + [0.10502070]\n",
+      "    FULL:     [0.36885700]\n",
       "\n",
       "    Fitting on full train set...\n",
       "\n",
-      "Result was saved to [./[2018.02.01].[15.42.30].250441.2c4c1b.npy]\n"
+      "Result was saved to [./[2025.09.06].[15.55.47].511377.c4aa2f.npy]\n"
      ]
     }
    ],
@@ -514,12 +536,12 @@
      "output_type": "stream",
      "text": [
       "Arrays:\n",
-      "[2018.02.01].[15.41.41].305268.0eadc0.npy\n",
-      "[2018.02.01].[15.42.30].250441.2c4c1b.npy\n",
+      "[2025.09.06].[15.55.45].032643.7f0aac.npy\n",
+      "[2025.09.06].[15.55.47].511377.c4aa2f.npy\n",
       "\n",
       "Logs:\n",
-      "[2018.02.01].[15.41.41].305268.0eadc0.log.txt\n",
-      "[2018.02.01].[15.42.30].250441.2c4c1b.log.txt\n"
+      "[2025.09.06].[15.55.45].032643.7f0aac.log.txt\n",
+      "[2025.09.06].[15.55.47].511377.c4aa2f.log.txt\n"
      ]
     }
    ],
@@ -570,22 +592,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Let's open this log: [2018.02.01].[15.41.41].305268.0eadc0.log.txt\n",
+      "Let's open this log: [2025.09.06].[15.55.45].032643.7f0aac.log.txt\n",
       "Let's look what models did we build in those session.\n",
-      "\n",
-      "model 0:    [GaussianNB]\n",
-      "\n",
-      "model 1:    [LogisticRegression]\n",
-      "\n",
-      "model 2:    [ExtraTreesClassifier]\n",
-      "\n",
-      "model 3:    [RandomForestClassifier]\n",
-      "\n",
-      "model 4:    [XGBClassifier]\n",
-      "\n",
-      "model 5:    [LGBMClassifier]\n",
-      "\n",
-      "model 6:    [KerasClassifier]\n",
       "\n"
      ]
     }
@@ -621,13 +629,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Let's load this .npy file: [2018.02.01].[15.41.41].305268.0eadc0.npy\n"
+      "Let's load this .npy file: [2025.09.06].[15.55.45].032643.7f0aac.npy\n"
      ]
     }
    ],
    "source": [
     "print(\"Let's load this .npy file: %s\" % npy_1_name)\n",
-    "S = np.load(npy_1_name)\n",
+    "S = np.load(npy_1_name, allow_pickle=True)\n",
     "S_train_lgbm = S[0][:, 15:18]\n",
     "S_test_lgbm = S[1][:, 15:18]"
    ]
@@ -640,11 +648,11 @@
     {
      "data": {
       "text/plain": [
-       "array([[ 0.00040829,  0.00281319,  0.99677852],\n",
-       "       [ 0.99732125,  0.00258249,  0.00009626],\n",
-       "       [ 0.98322854,  0.01610955,  0.00066191],\n",
-       "       [ 0.00107737,  0.99633895,  0.00258368],\n",
-       "       [ 0.97101719,  0.02843959,  0.00054321]])"
+       "array([[0.00074538, 0.00395689, 0.99529773],\n",
+       "       [0.99746299, 0.00249197, 0.00004503],\n",
+       "       [0.99599212, 0.00369756, 0.00031032],\n",
+       "       [0.00109104, 0.99659281, 0.00231615],\n",
+       "       [0.98498265, 0.01489686, 0.00012049]])"
       ]
      },
      "execution_count": 14,
@@ -664,11 +672,11 @@
     {
      "data": {
       "text/plain": [
-       "array([[ 0.60639131,  0.3588515 ,  0.03475718],\n",
-       "       [ 0.03609523,  0.90174785,  0.06215692],\n",
-       "       [ 0.08650007,  0.89717473,  0.0163252 ],\n",
-       "       [ 0.00068572,  0.98858075,  0.01073353],\n",
-       "       [ 0.00122693,  0.99814513,  0.00062793]])"
+       "array([[0.72381327, 0.24995384, 0.02623288],\n",
+       "       [0.01189152, 0.94377175, 0.04433673],\n",
+       "       [0.0896902 , 0.90432675, 0.00598305],\n",
+       "       [0.00034138, 0.99091816, 0.00874045],\n",
+       "       [0.0001139 , 0.99955083, 0.00033527]])"
       ]
      },
      "execution_count": 15,
@@ -696,7 +704,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "LGBMCLassifier log loss: 0.41430248\n"
+      "LGBMCLassifier log loss: 0.40351483\n"
      ]
     }
    ],
@@ -741,8 +749,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Loading: [2018.02.01].[15.41.41].305268.0eadc0.npy\n",
-      "Loading: [2018.02.01].[15.42.30].250441.2c4c1b.npy\n",
+      "Loading: [2025.09.06].[15.55.45].032643.7f0aac.npy\n",
+      "Loading: [2025.09.06].[15.55.47].511377.c4aa2f.npy\n",
       "\n",
       "S_train_all shape: (400, 24)\n",
       "S_test_all shape:  (100, 24)\n"
@@ -757,7 +765,7 @@
     "# Load results\n",
     "for name in sorted(glob('*.npy')):\n",
     "    print('Loading: %s' % name)\n",
-    "    S = np.load(name)\n",
+    "    S = np.load(name, allow_pickle=True)\n",
     "    S_train_all = np.c_[S_train_all, S[0]]\n",
     "    S_test_all = np.c_[S_test_all, S[1]]\n",
     "    \n",
@@ -781,7 +789,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Final prediction score: 0.38636334\n"
+      "Final prediction score: 0.37246788\n"
      ]
     }
    ],
@@ -799,11 +807,18 @@
     "# Final prediction score\n",
     "print('Final prediction score: %.8f' % log_loss(y_test, y_pred))"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -817,9 +832,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.2"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/examples/04_sklearn_api_regression_pipeline.ipynb b/examples/04_sklearn_api_regression_pipeline.ipynb
index f29f146..502c1ee 100644
--- a/examples/04_sklearn_api_regression_pipeline.ipynb
+++ b/examples/04_sklearn_api_regression_pipeline.ipynb
@@ -24,13 +24,13 @@
    "outputs": [],
    "source": [
     "import pandas as pd\n",
-    "from sklearn.datasets import load_boston\n",
+    "from sklearn.datasets import fetch_california_housing\n",
     "from sklearn.model_selection import train_test_split\n",
     "from sklearn.metrics import mean_absolute_error\n",
     "from sklearn.ensemble import ExtraTreesRegressor\n",
     "from sklearn.ensemble import RandomForestRegressor\n",
     "from sklearn.pipeline import Pipeline\n",
-    "from sklearn.externals import joblib\n",
+    "import joblib\n",
     "from xgboost import XGBRegressor\n",
     "from vecstack import StackingTransformer"
    ]
@@ -48,8 +48,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "boston = load_boston()\n",
-    "X, y = boston.data, boston.target\n",
+    "X, y = fetch_california_housing(return_X_y=True)\n",
     "\n",
     "# Make train/test split\n",
     "# As usual in machine learning task we have X_train, y_train, and X_test\n",
@@ -147,28 +146,28 @@
       "n_estimators: [3]\n",
       "\n",
       "estimator  0: [et: ExtraTreesRegressor]\n",
-      "    fold  0:  [3.20733439]\n",
-      "    fold  1:  [2.87943130]\n",
-      "    fold  2:  [2.53026486]\n",
-      "    fold  3:  [2.83618694]\n",
+      "    fold  0:  [0.65383143]\n",
+      "    fold  1:  [0.65059961]\n",
+      "    fold  2:  [0.66233582]\n",
+      "    fold  3:  [0.64449777]\n",
       "    ----\n",
-      "    MEAN:     [2.86330437] + [0.23993093]\n",
+      "    MEAN:     [0.65281616] + [0.00643746]\n",
       "\n",
       "estimator  1: [rf: RandomForestRegressor]\n",
-      "    fold  0:  [3.11110485]\n",
-      "    fold  1:  [2.78404210]\n",
-      "    fold  2:  [2.55707729]\n",
-      "    fold  3:  [2.32209992]\n",
+      "    fold  0:  [0.58416160]\n",
+      "    fold  1:  [0.56449564]\n",
+      "    fold  2:  [0.57730149]\n",
+      "    fold  3:  [0.55014073]\n",
       "    ----\n",
-      "    MEAN:     [2.69358104] + [0.29117900]\n",
+      "    MEAN:     [0.56902487] + [0.01298795]\n",
       "\n",
       "estimator  2: [xgb: XGBRegressor]\n",
-      "    fold  0:  [2.40318942]\n",
-      "    fold  1:  [2.37286943]\n",
-      "    fold  2:  [1.89121526]\n",
-      "    fold  3:  [1.95382805]\n",
+      "    fold  0:  [0.37287275]\n",
+      "    fold  1:  [0.36827074]\n",
+      "    fold  2:  [0.37315715]\n",
+      "    fold  3:  [0.36447933]\n",
       "    ----\n",
-      "    MEAN:     [2.15527554] + [0.23404984]\n",
+      "    MEAN:     [0.36969499] + [0.00358177]\n",
       "\n"
      ]
     }
@@ -287,11 +286,11 @@
     {
      "data": {
       "text/plain": [
-       "array([[27.21782522, 28.23561508, 27.78520966],\n",
-       "       [22.25443115, 22.32927929, 22.57203102],\n",
-       "       [26.03879794, 25.80114661, 26.27923012],\n",
-       "       [21.82927308, 21.30478775, 21.39201546],\n",
-       "       [13.02143285, 12.04667683,  8.88440514]])"
+       "array([[2.1381431 , 1.89449961, 1.85192811],\n",
+       "       [2.29310757, 1.89309918, 2.92809105],\n",
+       "       [2.07256939, 1.89449961, 2.10903692],\n",
+       "       [1.51938275, 1.53835871, 1.37909698],\n",
+       "       [1.93450337, 2.737813  , 3.23252964]])"
       ]
      },
      "execution_count": 8,
@@ -311,11 +310,11 @@
     {
      "data": {
       "text/plain": [
-       "array([[24.89602382, 23.85490698, 24.85046005],\n",
-       "       [20.85135955, 25.05068336, 26.30952454],\n",
-       "       [23.13164045, 21.56864103, 23.67526102],\n",
-       "       [13.47709586, 11.81606315, 11.02050447],\n",
-       "       [21.93179664, 21.30652111, 21.75125122]])"
+       "array([[2.12570438, 1.88503507, 1.57685581],\n",
+       "       [2.57631542, 2.67168873, 2.70525175],\n",
+       "       [2.06940157, 1.88669837, 1.69479051],\n",
+       "       [1.64434775, 1.20196782, 0.96695787],\n",
+       "       [2.33799194, 2.98206787, 3.8881467 ]])"
       ]
      },
      "execution_count": 9,
@@ -343,7 +342,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Final prediction score: [2.78409081]\n"
+      "Final prediction score: [0.35320658]\n"
      ]
     }
    ],
@@ -399,9 +398,9 @@
     {
      "data": {
       "text/plain": [
-       "array([[3.20733439, 2.8794313 , 2.53026486, 2.83618694],\n",
-       "       [3.11110485, 2.7840421 , 2.55707729, 2.32209992],\n",
-       "       [2.40318942, 2.37286943, 1.89121526, 1.95382805]])"
+       "array([[0.65383143, 0.65059961, 0.66233582, 0.64449777],\n",
+       "       [0.5841616 , 0.56449564, 0.57730149, 0.55014073],\n",
+       "       [0.37287275, 0.36827074, 0.37315715, 0.36447933]])"
       ]
      },
      "execution_count": 12,
@@ -423,9 +422,9 @@
     {
      "data": {
       "text/plain": [
-       "[('et', 2.8633043735634116, 0.23993092887498238),\n",
-       " ('rf', 2.6935810393014306, 0.2911789973137302),\n",
-       " ('xgb', 2.15527553747196, 0.23404984189134637)]"
+       "[('et', np.float64(0.6528161581671601), np.float64(0.006437456871932304)),\n",
+       " ('rf', np.float64(0.5690248659195717), np.float64(0.012987952596095562)),\n",
+       " ('xgb', np.float64(0.3696949910225933), np.float64(0.00358176792828805))]"
       ]
      },
      "execution_count": 13,
@@ -448,6 +447,19 @@
      "data": {
       "text/html": [
        "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
@@ -461,20 +473,20 @@
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>xgb</td>\n",
-       "      <td>2.155276</td>\n",
-       "      <td>0.234050</td>\n",
+       "      <td>0.369695</td>\n",
+       "      <td>0.003582</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>rf</td>\n",
-       "      <td>2.693581</td>\n",
-       "      <td>0.291179</td>\n",
+       "      <td>0.569025</td>\n",
+       "      <td>0.012988</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>et</td>\n",
-       "      <td>2.863304</td>\n",
-       "      <td>0.239931</td>\n",
+       "      <td>0.652816</td>\n",
+       "      <td>0.006437</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -482,9 +494,9 @@
       ],
       "text/plain": [
        "  name      mean       std\n",
-       "2  xgb  2.155276  0.234050\n",
-       "1   rf  2.693581  0.291179\n",
-       "0   et  2.863304  0.239931"
+       "2  xgb  0.369695  0.003582\n",
+       "1   rf  0.569025  0.012988\n",
+       "0   et  0.652816  0.006437"
       ]
      },
      "execution_count": 14,
@@ -538,7 +550,7 @@
     "# If we have several stacking levels our Pipeline steps would be:\n",
     "# steps = [('stack_L1', stack_L1),\n",
     "#          ('stack_L2', stack_L2),\n",
-    "#          ('stack_L99', stack_L99), # :-)\n",
+    "#          ('stack_L99', stack_L99),  # :-)\n",
     "#          ('final_estimator', final_estimator)]"
    ]
   },
@@ -582,7 +594,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Final prediction score using Pipeline: [2.78409081]\n"
+      "Final prediction score using Pipeline: [0.35320658]\n"
      ]
     }
    ],
@@ -623,7 +635,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Final prediction score using loaded Pipeline: [2.78409081]\n"
+      "Final prediction score using loaded Pipeline: [0.35320658]\n"
      ]
     }
    ],
@@ -659,7 +671,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -673,9 +685,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.2"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..eb62c16
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,3 @@
+[build-system]
+requires = ["setuptools >= 42.0.0"]
+build-backend = "setuptools.build_meta"
diff --git a/setup.py b/setup.py
index 96289c1..19eca14 100644
--- a/setup.py
+++ b/setup.py
@@ -5,10 +5,11 @@
 long_desc = '''
 Python package for stacking (stacked generalization) featuring lightweight functional API and fully compatible scikit-learn API.
 Convenient way to automate OOF computation, prediction and bagging using any number of models.
+All details, FAQ, and tutorials: https://github.com/vecxoz/vecstack
 '''
 
 setup(name='vecstack',
-      version='0.4.0',
+      version='0.5.0',
       description='Python package for stacking (machine learning technique)',
       long_description=long_desc,
       classifiers=[
@@ -19,9 +20,10 @@
           'Operating System :: Unix',
           'Programming Language :: Python',
           'Programming Language :: Python :: 3',
-          'Programming Language :: Python :: 3.5',
-          'Programming Language :: Python :: 3.6',
-          'Programming Language :: Python :: 3.7',
+          'Programming Language :: Python :: 3.9',
+          'Programming Language :: Python :: 3.10',
+          'Programming Language :: Python :: 3.11',
+          'Programming Language :: Python :: 3.12',
           'Topic :: Scientific/Engineering',
           'Topic :: Scientific/Engineering :: Artificial Intelligence',
           'Topic :: Scientific/Engineering :: Information Analysis',
@@ -41,6 +43,12 @@
           'scipy',
           'scikit-learn>=0.18'
       ],
-      test_suite='nose.collector',
-      tests_require=['nose'],
+      extras_require={
+          'test': [
+              'pytest',
+              'pytest-cov',
+              'pandas',
+              'pyarrow'
+          ]
+      },
       zip_safe=False)
diff --git a/tests/test_func_api_classification_binary.py b/tests/test_func_api_classification_binary.py
index efabc24..a663f5e 100644
--- a/tests/test_func_api_classification_binary.py
+++ b/tests/test_func_api_classification_binary.py
@@ -35,6 +35,8 @@
 from sklearn.naive_bayes import GaussianNB
 from vecstack import stacking
 
+from sklearn.multiclass import OneVsRestClassifier
+
 n_classes = 2
 n_folds = 5
 temp_dir = 'tmpdw35lg54ms80eb42'
@@ -90,6 +92,20 @@ def predict(self, X):
     def predict_proba(self, X):
         return super(LogisticRegressionUnrolled, self).predict_proba(X.reshape(X.shape[0], -1))
 
+
+class OneVsRestClassifierUnrolled(OneVsRestClassifier):
+    """
+    Just to avoid data shape checks
+    """
+    def fit(self, X, y):
+        return super(OneVsRestClassifierUnrolled, self).fit(X.reshape(X.shape[0], -1), y)
+
+    def predict(self, X):
+        return super(OneVsRestClassifierUnrolled, self).predict(X.reshape(X.shape[0], -1))
+
+    def predict_proba(self, X):
+        return super(OneVsRestClassifierUnrolled, self).predict_proba(X.reshape(X.shape[0], -1))
+
 #-------------------------------------------------------------------------------
 #-------------------------------------------------------------------------------
 
@@ -130,13 +146,13 @@ def tearDown(self):
 
     def test_oof_pred_mode(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
         _ = model.fit(X_train, y_train)
         S_test_1 = model.predict(X_test).reshape(-1, 1)
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir, 
             mode = 'oof_pred', random_state = 0, verbose = 0, stratified = True)
@@ -157,12 +173,12 @@ def test_oof_pred_mode(self):
         
     def test_oof_mode(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
         S_test_1 = None
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir, 
             mode = 'oof', random_state = 0, verbose = 0, stratified = True)
@@ -183,12 +199,12 @@ def test_oof_mode(self):
         
     def test_pred_mode(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = None
         _ = model.fit(X_train, y_train)
         S_test_1 = model.predict(X_test).reshape(-1, 1)
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir, 
             mode = 'pred', random_state = 0, verbose = 0, stratified = True)
@@ -211,23 +227,23 @@ def test_oof_pred_bag_mode(self):
         
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
         # Usind StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             X_te = X_train[te_index]
             y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             _ = model.fit(X_tr, y_tr)
             S_test_temp[:, fold_counter] = model.predict(X_test)
-        S_test_1 = st.mode(S_test_temp, axis = 1)[0]
+        S_test_1 = st.mode(S_test_temp, axis = 1, keepdims=True)[0]
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
             mode = 'oof_pred_bag', random_state = 0, verbose = 0, stratified = True)
@@ -250,21 +266,21 @@ def test_pred_bag_mode(self):
         
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
         # Usind StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             X_te = X_train[te_index]
             y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             _ = model.fit(X_tr, y_tr)
             S_test_temp[:, fold_counter] = model.predict(X_test)
-        S_test_1 = st.mode(S_test_temp, axis = 1)[0]
+        S_test_1 = st.mode(S_test_temp, axis = 1, keepdims=True)[0]
     
         S_train_1 = None
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
             mode = 'pred_bag', random_state = 0, verbose = 0, stratified = True)
@@ -289,13 +305,13 @@ def test_pred_bag_mode(self):
         
     def test_oof_pred_mode_proba(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict_proba')
         _ = model.fit(X_train, y_train)
         S_test_1 = model.predict_proba(X_test)
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, stratified = True,
             mode = 'oof_pred', random_state = 0, verbose = 0, needs_proba = True, save_dir=temp_dir)
@@ -316,12 +332,12 @@ def test_oof_pred_mode_proba(self):
         
     def test_oof_mode_proba(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict_proba')
         S_test_1 = None
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, stratified = True, 
             mode = 'oof', random_state = 0, verbose = 0, needs_proba = True, save_dir=temp_dir)
@@ -342,12 +358,12 @@ def test_oof_mode_proba(self):
         
     def test_pred_mode_proba(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = None
         _ = model.fit(X_train, y_train)
         S_test_1 = model.predict_proba(X_test)
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, stratified = True, 
             mode = 'pred', random_state = 0, verbose = 0, needs_proba = True, save_dir=temp_dir)
@@ -371,25 +387,25 @@ def test_oof_pred_bag_mode_proba(self):
         S_test_1 = np.zeros((X_test.shape[0], n_classes))
         S_test_temp = np.zeros((X_test.shape[0], n_folds * n_classes))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             X_te = X_train[te_index]
             y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             _ = model.fit(X_tr, y_tr)
             col_slice_fold = slice(fold_counter * n_classes, fold_counter * n_classes + n_classes)
             S_test_temp[:, col_slice_fold] = model.predict_proba(X_test)
         for class_id in range(n_classes):
             S_test_1[:, class_id] = np.mean(S_test_temp[:, class_id::n_classes], axis = 1)
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict_proba')
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
             mode = 'oof_pred_bag', random_state = 0, verbose = 0, stratified = True, needs_proba = True)
@@ -422,14 +438,14 @@ def test_pred_bag_mode_proba(self):
         S_test_1 = np.zeros((X_test.shape[0], n_classes))
         S_test_temp = np.zeros((X_test.shape[0], n_folds * n_classes))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             X_te = X_train[te_index]
             y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             _ = model.fit(X_tr, y_tr)
             col_slice_fold = slice(fold_counter * n_classes, fold_counter * n_classes + n_classes)
             S_test_temp[:, col_slice_fold] = model.predict_proba(X_test)
@@ -438,7 +454,7 @@ def test_pred_bag_mode_proba(self):
     
         S_train_1 = None
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
             mode = 'pred_bag', random_state = 0, verbose = 0, stratified = True, needs_proba = True)
@@ -472,17 +488,17 @@ def test_oof_pred_bag_mode_shuffle(self):
             y_tr = y_train[tr_index]
             X_te = X_train[te_index]
             y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             _ = model.fit(X_tr, y_tr)
             S_test_temp[:, fold_counter] = model.predict(X_test)
-        S_test_1 = st.mode(S_test_temp, axis = 1)[0]
+        S_test_1 = st.mode(S_test_temp, axis = 1, keepdims=True)[0]
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         # !!! Important. Here we pass CV-generator not number of folds <cv = kf>
         S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = kf, 
             n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = True, save_dir=temp_dir,
             mode = 'oof_pred_bag', random_state = 0, verbose = 0, stratified = True)
@@ -509,7 +525,7 @@ def test_oof_pred_bag_mode_shuffle(self):
     #---------------------------------------------------------------------------
     def test_oof_mode_metric(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         scorer = make_scorer(accuracy_score)
         scores = cross_val_score(model, X_train, y = y_train, cv = n_folds, 
             scoring = scorer, n_jobs = 1, verbose = 0)
@@ -517,7 +533,7 @@ def test_oof_mode_metric(self):
         std_str_1 = '%.8f' % np.std(scores)
         
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train, S_test = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, save_dir=temp_dir, 
             mode = 'oof', random_state = 0, verbose = 0, stratified = True)
@@ -546,15 +562,15 @@ def test_oof_mode_metric(self):
     #---------------------------------------------------------------------------
     def test_oof_mode_metric_proba(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
-        scorer = make_scorer(log_loss, needs_proba = True)
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
+        scorer = make_scorer(log_loss, response_method='predict_proba')
         scores = cross_val_score(model, X_train, y = y_train, cv = n_folds, 
             scoring = scorer, n_jobs = 1, verbose = 0)
         mean_str_1 = '%.8f' % np.mean(scores)
         std_str_1 = '%.8f' % np.std(scores)
         
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train, S_test = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, save_dir=temp_dir, 
             mode = 'oof', random_state = 0, verbose = 0, stratified = True, 
@@ -583,7 +599,7 @@ def test_oof_mode_metric_proba(self):
     def test_oof_pred_mode_2_models(self):
 
         # Model a
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1_a = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
         _ = model.fit(X_train, y_train)
@@ -599,7 +615,7 @@ def test_oof_pred_mode_2_models(self):
         S_train_1 = np.c_[S_train_1_a, S_train_1_b]
         S_test_1 = np.c_[S_test_1_a, S_test_1_b]
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'),
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')),
                   GaussianNB()]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir, 
@@ -624,26 +640,26 @@ def test_oof_pred_bag_mode_2_models(self):
         # Model a
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
         # Usind StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             X_te = X_train[te_index]
             y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             _ = model.fit(X_tr, y_tr)
             S_test_temp[:, fold_counter] = model.predict(X_test)
-        S_test_1_a = st.mode(S_test_temp, axis = 1)[0]
+        S_test_1_a = st.mode(S_test_temp, axis = 1, keepdims=True)[0]
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1_a = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
             
         # Model b
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
         # Usind StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
@@ -653,7 +669,7 @@ def test_oof_pred_bag_mode_2_models(self):
             model = GaussianNB()
             _ = model.fit(X_tr, y_tr)
             S_test_temp[:, fold_counter] = model.predict(X_test)
-        S_test_1_b = st.mode(S_test_temp, axis = 1)[0]
+        S_test_1_b = st.mode(S_test_temp, axis = 1, keepdims=True)[0]
     
         model = GaussianNB()
         S_train_1_b = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
@@ -662,7 +678,7 @@ def test_oof_pred_bag_mode_2_models(self):
         S_train_1 = np.c_[S_train_1_a, S_train_1_b]
         S_test_1 = np.c_[S_test_1_a, S_test_1_b]
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'),
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')),
                   GaussianNB()]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
@@ -686,7 +702,7 @@ def test_oof_pred_bag_mode_2_models(self):
     def test_oof_pred_mode_proba_2_models(self):
 
         # Model a
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1_a = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict_proba')
         _ = model.fit(X_train, y_train)
@@ -702,7 +718,7 @@ def test_oof_pred_mode_proba_2_models(self):
         S_train_1 = np.c_[S_train_1_a, S_train_1_b]
         S_test_1 = np.c_[S_test_1_a, S_test_1_b]
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'),
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')),
                   GaussianNB()]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, stratified = True,
@@ -729,21 +745,21 @@ def test_oof_pred_bag_mode_proba_2_models(self):
         S_test_1_a = np.zeros((X_test.shape[0], n_classes))
         S_test_temp = np.zeros((X_test.shape[0], n_folds * n_classes))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             X_te = X_train[te_index]
             y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             _ = model.fit(X_tr, y_tr)
             col_slice_fold = slice(fold_counter * n_classes, fold_counter * n_classes + n_classes)
             S_test_temp[:, col_slice_fold] = model.predict_proba(X_test)
         for class_id in range(n_classes):
             S_test_1_a[:, class_id] = np.mean(S_test_temp[:, class_id::n_classes], axis = 1)
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1_a = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict_proba')
             
@@ -751,7 +767,7 @@ def test_oof_pred_bag_mode_proba_2_models(self):
         S_test_1_b = np.zeros((X_test.shape[0], n_classes))
         S_test_temp = np.zeros((X_test.shape[0], n_folds * n_classes))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
@@ -774,7 +790,7 @@ def test_oof_pred_bag_mode_proba_2_models(self):
         
         
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'),
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')),
                   GaussianNB()]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
@@ -809,23 +825,23 @@ def test_N_dim_input(self):
         """
         S_test_temp = np.zeros((X_test_4d_unrolled.shape[0], n_folds))
         # Usind StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train_4d_unrolled, y_train_4d)):
             # Split data and target
             X_tr = X_train_4d_unrolled[tr_index]
             y_tr = y_train_4d[tr_index]
             X_te = X_train_4d_unrolled[te_index]
             y_te = y_train_4d[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             _ = model.fit(X_tr, y_tr)
             S_test_temp[:, fold_counter] = model.predict(X_test_4d_unrolled)
-        S_test_1 = st.mode(S_test_temp, axis = 1)[0]
+        S_test_1 = st.mode(S_test_temp, axis = 1, keepdims=True)[0]
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train_4d_unrolled, y = y_train_4d, cv = n_folds,
             n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
 
-        models = [LogisticRegressionUnrolled(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifierUnrolled(LogisticRegressionUnrolled(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train_4d, y_train_4d, X_test_4d,
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
             mode = 'oof_pred_bag', random_state = 0, verbose = 0, stratified = True)
diff --git a/tests/test_func_api_classification_multiclass.py b/tests/test_func_api_classification_multiclass.py
index f74eb4d..db2ae54 100644
--- a/tests/test_func_api_classification_multiclass.py
+++ b/tests/test_func_api_classification_multiclass.py
@@ -32,6 +32,8 @@
 from sklearn.naive_bayes import GaussianNB
 from vecstack import stacking
 
+from sklearn.multiclass import OneVsRestClassifier
+
 n_classes = 3
 n_folds = 5
 temp_dir = 'tmpdw35lg54ms80eb42'
@@ -87,6 +89,20 @@ def predict(self, X):
     def predict_proba(self, X):
         return super(LogisticRegressionUnrolled, self).predict_proba(X.reshape(X.shape[0], -1))
 
+
+class OneVsRestClassifierUnrolled(OneVsRestClassifier):
+    """
+    Just to avoid data shape checks
+    """
+    def fit(self, X, y):
+        return super(OneVsRestClassifierUnrolled, self).fit(X.reshape(X.shape[0], -1), y)
+
+    def predict(self, X):
+        return super(OneVsRestClassifierUnrolled, self).predict(X.reshape(X.shape[0], -1))
+
+    def predict_proba(self, X):
+        return super(OneVsRestClassifierUnrolled, self).predict_proba(X.reshape(X.shape[0], -1))
+
 #-------------------------------------------------------------------------------
 #-------------------------------------------------------------------------------
 
@@ -127,13 +143,13 @@ def tearDown(self):
 
     def test_oof_pred_mode(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
         _ = model.fit(X_train, y_train)
         S_test_1 = model.predict(X_test).reshape(-1, 1)
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir, 
             mode = 'oof_pred', random_state = 0, verbose = 0, stratified = True)
@@ -154,12 +170,12 @@ def test_oof_pred_mode(self):
         
     def test_oof_mode(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
         S_test_1 = None
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir, 
             mode = 'oof', random_state = 0, verbose = 0, stratified = True)
@@ -180,12 +196,12 @@ def test_oof_mode(self):
         
     def test_pred_mode(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = None
         _ = model.fit(X_train, y_train)
         S_test_1 = model.predict(X_test).reshape(-1, 1)
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir, 
             mode = 'pred', random_state = 0, verbose = 0, stratified = True)
@@ -208,23 +224,23 @@ def test_oof_pred_bag_mode(self):
         
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
         # Usind StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             X_te = X_train[te_index]
             y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             _ = model.fit(X_tr, y_tr)
             S_test_temp[:, fold_counter] = model.predict(X_test)
-        S_test_1 = st.mode(S_test_temp, axis = 1)[0]
+        S_test_1 = st.mode(S_test_temp, axis = 1, keepdims=True)[0]
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
             mode = 'oof_pred_bag', random_state = 0, verbose = 0, stratified = True)
@@ -247,21 +263,21 @@ def test_pred_bag_mode(self):
         
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
         # Usind StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             X_te = X_train[te_index]
             y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             _ = model.fit(X_tr, y_tr)
             S_test_temp[:, fold_counter] = model.predict(X_test)
-        S_test_1 = st.mode(S_test_temp, axis = 1)[0]
+        S_test_1 = st.mode(S_test_temp, axis = 1, keepdims=True)[0]
     
         S_train_1 = None
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
             mode = 'pred_bag', random_state = 0, verbose = 0, stratified = True)
@@ -286,13 +302,13 @@ def test_pred_bag_mode(self):
         
     def test_oof_pred_mode_proba(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict_proba')
         _ = model.fit(X_train, y_train)
         S_test_1 = model.predict_proba(X_test)
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, stratified = True,
             mode = 'oof_pred', random_state = 0, verbose = 0, needs_proba = True, save_dir=temp_dir)
@@ -313,12 +329,12 @@ def test_oof_pred_mode_proba(self):
         
     def test_oof_mode_proba(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict_proba')
         S_test_1 = None
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, stratified = True, 
             mode = 'oof', random_state = 0, verbose = 0, needs_proba = True, save_dir=temp_dir)
@@ -339,12 +355,12 @@ def test_oof_mode_proba(self):
         
     def test_pred_mode_proba(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = None
         _ = model.fit(X_train, y_train)
         S_test_1 = model.predict_proba(X_test)
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, stratified = True, 
             mode = 'pred', random_state = 0, verbose = 0, needs_proba = True, save_dir=temp_dir)
@@ -368,25 +384,25 @@ def test_oof_pred_bag_mode_proba(self):
         S_test_1 = np.zeros((X_test.shape[0], n_classes))
         S_test_temp = np.zeros((X_test.shape[0], n_folds * n_classes))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             X_te = X_train[te_index]
             y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             _ = model.fit(X_tr, y_tr)
             col_slice_fold = slice(fold_counter * n_classes, fold_counter * n_classes + n_classes)
             S_test_temp[:, col_slice_fold] = model.predict_proba(X_test)
         for class_id in range(n_classes):
             S_test_1[:, class_id] = np.mean(S_test_temp[:, class_id::n_classes], axis = 1)
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict_proba')
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
             mode = 'oof_pred_bag', random_state = 0, verbose = 0, stratified = True, needs_proba = True)
@@ -419,14 +435,14 @@ def test_pred_bag_mode_proba(self):
         S_test_1 = np.zeros((X_test.shape[0], n_classes))
         S_test_temp = np.zeros((X_test.shape[0], n_folds * n_classes))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             X_te = X_train[te_index]
             y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             _ = model.fit(X_tr, y_tr)
             col_slice_fold = slice(fold_counter * n_classes, fold_counter * n_classes + n_classes)
             S_test_temp[:, col_slice_fold] = model.predict_proba(X_test)
@@ -435,7 +451,7 @@ def test_pred_bag_mode_proba(self):
     
         S_train_1 = None
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
             mode = 'pred_bag', random_state = 0, verbose = 0, stratified = True, needs_proba = True)
@@ -469,17 +485,17 @@ def test_oof_pred_bag_mode_shuffle(self):
             y_tr = y_train[tr_index]
             X_te = X_train[te_index]
             y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             _ = model.fit(X_tr, y_tr)
             S_test_temp[:, fold_counter] = model.predict(X_test)
-        S_test_1 = st.mode(S_test_temp, axis = 1)[0]
+        S_test_1 = st.mode(S_test_temp, axis = 1, keepdims=True)[0]
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         # !!! Important. Here we pass CV-generator not number of folds <cv = kf>
         S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = kf, 
             n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = True, save_dir=temp_dir,
             mode = 'oof_pred_bag', random_state = 0, verbose = 0, stratified = True)
@@ -506,7 +522,7 @@ def test_oof_pred_bag_mode_shuffle(self):
     #---------------------------------------------------------------------------
     def test_oof_mode_metric(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         scorer = make_scorer(accuracy_score)
         scores = cross_val_score(model, X_train, y = y_train, cv = n_folds, 
             scoring = scorer, n_jobs = 1, verbose = 0)
@@ -514,7 +530,7 @@ def test_oof_mode_metric(self):
         std_str_1 = '%.8f' % np.std(scores)
         
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train, S_test = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, save_dir=temp_dir, 
             mode = 'oof', random_state = 0, verbose = 0, stratified = True)
@@ -543,15 +559,15 @@ def test_oof_mode_metric(self):
     #---------------------------------------------------------------------------
     def test_oof_mode_metric_proba(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
-        scorer = make_scorer(log_loss, needs_proba = True)
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
+        scorer = make_scorer(log_loss, response_method='predict_proba')
         scores = cross_val_score(model, X_train, y = y_train, cv = n_folds, 
             scoring = scorer, n_jobs = 1, verbose = 0)
         mean_str_1 = '%.8f' % np.mean(scores)
         std_str_1 = '%.8f' % np.std(scores)
         
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))]
         S_train, S_test = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, save_dir=temp_dir, 
             mode = 'oof', random_state = 0, verbose = 0, stratified = True, 
@@ -580,7 +596,7 @@ def test_oof_mode_metric_proba(self):
     def test_oof_pred_mode_2_models(self):
 
         # Model a
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1_a = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
         _ = model.fit(X_train, y_train)
@@ -596,7 +612,7 @@ def test_oof_pred_mode_2_models(self):
         S_train_1 = np.c_[S_train_1_a, S_train_1_b]
         S_test_1 = np.c_[S_test_1_a, S_test_1_b]
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'),
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')),
                   GaussianNB()]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir, 
@@ -621,26 +637,26 @@ def test_oof_pred_bag_mode_2_models(self):
         # Model a
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
         # Usind StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             X_te = X_train[te_index]
             y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             _ = model.fit(X_tr, y_tr)
             S_test_temp[:, fold_counter] = model.predict(X_test)
-        S_test_1_a = st.mode(S_test_temp, axis = 1)[0]
+        S_test_1_a = st.mode(S_test_temp, axis = 1, keepdims=True)[0]
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1_a = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
             
         # Model b
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
         # Usind StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
@@ -650,7 +666,7 @@ def test_oof_pred_bag_mode_2_models(self):
             model = GaussianNB()
             _ = model.fit(X_tr, y_tr)
             S_test_temp[:, fold_counter] = model.predict(X_test)
-        S_test_1_b = st.mode(S_test_temp, axis = 1)[0]
+        S_test_1_b = st.mode(S_test_temp, axis = 1, keepdims=True)[0]
     
         model = GaussianNB()
         S_train_1_b = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
@@ -659,7 +675,7 @@ def test_oof_pred_bag_mode_2_models(self):
         S_train_1 = np.c_[S_train_1_a, S_train_1_b]
         S_test_1 = np.c_[S_test_1_a, S_test_1_b]
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'),
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')),
                   GaussianNB()]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
@@ -683,7 +699,7 @@ def test_oof_pred_bag_mode_2_models(self):
     def test_oof_pred_mode_proba_2_models(self):
 
         # Model a
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1_a = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict_proba')
         _ = model.fit(X_train, y_train)
@@ -699,7 +715,7 @@ def test_oof_pred_mode_proba_2_models(self):
         S_train_1 = np.c_[S_train_1_a, S_train_1_b]
         S_test_1 = np.c_[S_test_1_a, S_test_1_b]
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'),
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')),
                   GaussianNB()]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, stratified = True,
@@ -726,21 +742,21 @@ def test_oof_pred_bag_mode_proba_2_models(self):
         S_test_1_a = np.zeros((X_test.shape[0], n_classes))
         S_test_temp = np.zeros((X_test.shape[0], n_folds * n_classes))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             X_te = X_train[te_index]
             y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             _ = model.fit(X_tr, y_tr)
             col_slice_fold = slice(fold_counter * n_classes, fold_counter * n_classes + n_classes)
             S_test_temp[:, col_slice_fold] = model.predict_proba(X_test)
         for class_id in range(n_classes):
             S_test_1_a[:, class_id] = np.mean(S_test_temp[:, class_id::n_classes], axis = 1)
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1_a = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict_proba')
             
@@ -748,7 +764,7 @@ def test_oof_pred_bag_mode_proba_2_models(self):
         S_test_1_b = np.zeros((X_test.shape[0], n_classes))
         S_test_temp = np.zeros((X_test.shape[0], n_folds * n_classes))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
@@ -771,7 +787,7 @@ def test_oof_pred_bag_mode_proba_2_models(self):
         
         
 
-        models = [LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'),
+        models = [OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')),
                   GaussianNB()]
         S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test, 
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
@@ -807,23 +823,23 @@ def test_N_dim_input(self):
         """
         S_test_temp = np.zeros((X_test_4d_unrolled.shape[0], n_folds))
         # Usind StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = StratifiedKFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train_4d_unrolled, y_train_4d)):
             # Split data and target
             X_tr = X_train_4d_unrolled[tr_index]
             y_tr = y_train_4d[tr_index]
             X_te = X_train_4d_unrolled[te_index]
             y_te = y_train_4d[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             _ = model.fit(X_tr, y_tr)
             S_test_temp[:, fold_counter] = model.predict(X_test_4d_unrolled)
-        S_test_1 = st.mode(S_test_temp, axis = 1)[0]
+        S_test_1 = st.mode(S_test_temp, axis = 1, keepdims=True)[0]
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train_4d_unrolled, y = y_train_4d, cv = n_folds,
             n_jobs = 1, verbose = 0, method = 'predict').reshape(-1, 1)
 
-        models = [LogisticRegressionUnrolled(random_state=0, solver='liblinear', multi_class='ovr')]
+        models = [OneVsRestClassifierUnrolled(LogisticRegressionUnrolled(random_state=0, solver='liblinear'))]
         S_train_2, S_test_2 = stacking(models, X_train_4d, y_train_4d, X_test_4d,
             regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
             mode = 'oof_pred_bag', random_state = 0, verbose = 0, stratified = True)
diff --git a/tests/test_func_api_regression.py b/tests/test_func_api_regression.py
index a086401..29835dc 100644
--- a/tests/test_func_api_regression.py
+++ b/tests/test_func_api_regression.py
@@ -26,7 +26,8 @@
 from sklearn.model_selection import cross_val_score
 # from sklearn.model_selection import train_test_split
 from sklearn.model_selection import KFold
-from sklearn.datasets import load_boston
+# from sklearn.datasets import load_boston
+from sklearn.datasets import fetch_openml
 from sklearn.metrics import mean_absolute_error
 from sklearn.metrics import make_scorer
 from sklearn.linear_model import LinearRegression
@@ -37,8 +38,10 @@
 n_folds = 5
 temp_dir = 'tmpdw35lg54ms80eb42'
 
-boston = load_boston()
-X, y = boston.data, boston.target
+# boston = load_boston()
+boston = fetch_openml(name='boston', version=1, as_frame=False, parser='auto')
+# X, y = boston.data, boston.target
+X, y = boston.data.astype(float), boston.target.astype(float)
 # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
 
 
@@ -219,7 +222,7 @@ def test_pred_mode(self):
     def test_oof_pred_bag_mode(self):
         
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
-        kf = KFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = KFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
@@ -257,7 +260,7 @@ def test_oof_pred_bag_mode(self):
     def test_A_mode(self):
         """ 'A' is alias for 'oof_pred_bag' """
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
-        kf = KFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = KFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
@@ -295,7 +298,7 @@ def test_A_mode(self):
     def test_pred_bag_mode(self):
         
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
-        kf = KFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = KFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
@@ -338,7 +341,7 @@ def test_oof_pred_mode_sample_weight_one(self):
         model = LinearRegression()
         S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict', 
-            fit_params = {'sample_weight': sw}).reshape(-1, 1)
+            params = {'sample_weight': sw}).reshape(-1, 1)
         _ = model.fit(X_train, y_train, sample_weight = sw)
         S_test_1 = model.predict(X_test).reshape(-1, 1)
 
@@ -373,7 +376,7 @@ def test_oof_pred_mode_sample_weight_random(self):
         model = LinearRegression()
         S_train_1 = cross_val_predict(model, X_train, y = y_train, cv = n_folds, 
             n_jobs = 1, verbose = 0, method = 'predict', 
-            fit_params = {'sample_weight': sw}).reshape(-1, 1)
+            params = {'sample_weight': sw}).reshape(-1, 1)
         _ = model.fit(X_train, y_train, sample_weight = sw)
         S_test_1 = model.predict(X_test).reshape(-1, 1)
 
@@ -571,7 +574,7 @@ def test_oof_pred_bag_mode_2_models(self):
         
         # Model a
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
-        kf = KFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = KFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
@@ -589,7 +592,7 @@ def test_oof_pred_bag_mode_2_models(self):
             
         # Model b
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
-        kf = KFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = KFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
@@ -909,7 +912,7 @@ def test_small_input(self):
         Test: 10 examples
         """
         S_test_temp = np.zeros((X_test[:10].shape[0], n_folds))
-        kf = KFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = KFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train[:20], y_train[:20])):
             # Split data and target
             X_tr = X_train[:20][tr_index]
@@ -984,7 +987,7 @@ def test_all_defaults(self):
         n_folds=4
 
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
-        kf = KFold(n_splits = n_folds, shuffle = False, random_state = 0)
+        kf = KFold(n_splits = n_folds, shuffle = False, random_state = None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
diff --git a/tests/test_sklearn_api_classification_binary.py b/tests/test_sklearn_api_classification_binary.py
index 82d8db6..5c52b6c 100644
--- a/tests/test_sklearn_api_classification_binary.py
+++ b/tests/test_sklearn_api_classification_binary.py
@@ -39,6 +39,8 @@
 from sklearn.dummy import DummyClassifier
 from vecstack import StackingTransformer
 
+from sklearn.multiclass import OneVsRestClassifier
+
 n_classes = 2
 n_folds = 5
 # temp_dir = 'tmpdw35lg54ms80eb42'
@@ -85,7 +87,7 @@ def roc_auc_score_universal(y_true, y_pred):
     y_pred - 2d numpy array
         Predicted probabilities for each class
     """
-    ohe = OneHotEncoder(sparse=False)
+    ohe = OneHotEncoder(sparse_output=False)
     y_true = ohe.fit_transform(y_true.reshape(-1, 1))
     #@@@@
     if len(y_pred.shape) == 1:
@@ -106,7 +108,7 @@ class TestSklearnClassificationBinary(unittest.TestCase):
 
     def test_variant_B_labels(self):
         # reference
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y=y_train,
                                       cv=n_folds, n_jobs=1, verbose=0,
                                       method='predict').reshape(-1, 1)
@@ -114,7 +116,7 @@ def test_variant_B_labels(self):
         S_test_1 = model.predict(X_test).reshape(-1, 1)
 
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='B', random_state=0,
@@ -143,25 +145,25 @@ def test_variant_A_labels(self):
         
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=0)
+        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             # X_te = X_train[te_index]
             # y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             model = model.fit(X_tr, y_tr)
             S_test_temp[:, fold_counter] = model.predict(X_test)
-        S_test_1 = st.mode(S_test_temp, axis=1)[0]
+        S_test_1 = st.mode(S_test_temp, axis=1, keepdims=True)[0]
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y=y_train,
                                       cv=n_folds, n_jobs=1, verbose=0,
                                       method='predict').reshape(-1, 1)
 
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='A', random_state=0,
@@ -188,7 +190,7 @@ def test_variant_A_labels(self):
 
     def test_variant_B_proba(self):
         # reference
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y=y_train,
                                       cv=n_folds, n_jobs=1, verbose=0,
                                       method='predict_proba')
@@ -196,7 +198,7 @@ def test_variant_B_proba(self):
         S_test_1 = model.predict_proba(X_test)
 
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='B', random_state=0,
@@ -227,27 +229,27 @@ def test_variant_A_proba(self):
         S_test_1 = np.zeros((X_test.shape[0], n_classes))
         S_test_temp = np.zeros((X_test.shape[0], n_folds * n_classes))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=0)
+        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             # X_te = X_train[te_index]
             # y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             model = model.fit(X_tr, y_tr)
             col_slice_fold = slice(fold_counter * n_classes, fold_counter * n_classes + n_classes)
             S_test_temp[:, col_slice_fold] = model.predict_proba(X_test)
         for class_id in range(n_classes):
             S_test_1[:, class_id] = np.mean(S_test_temp[:, class_id::n_classes], axis=1)
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y=y_train,
                                       cv=n_folds, n_jobs=1, verbose=0,
                                       method='predict_proba')
 
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='A', random_state=0,
@@ -284,21 +286,21 @@ def test_variant_A_proba_shuffle_random_state(self):
             y_tr = y_train[tr_index]
             # X_te = X_train[te_index]
             # y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             model = model.fit(X_tr, y_tr)
             col_slice_fold = slice(fold_counter * n_classes, fold_counter * n_classes + n_classes)
             S_test_temp[:, col_slice_fold] = model.predict_proba(X_test)
         for class_id in range(n_classes):
             S_test_1[:, class_id] = np.mean(S_test_temp[:, class_id::n_classes], axis=1)
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         # !!! Important. Here we pass CV-generator ``cv=kf`` not number of folds
         S_train_1 = cross_val_predict(model, X_train, y=y_train,
                                       cv=kf, n_jobs=1, verbose=0,
                                       method='predict_proba')
 
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=True,
                                     variant='A', random_state=0,
@@ -328,14 +330,14 @@ def test_variant_A_proba_shuffle_random_state(self):
     # -------------------------------------------------------------------------
     def test_default_metric_and_scores_labels(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         scorer = make_scorer(accuracy_score)
         scores_1 = cross_val_score(model, X_train, y=y_train,
                                    cv=n_folds, scoring=scorer,
                                    n_jobs=1, verbose=0)
         
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='B', random_state=0,
@@ -366,14 +368,14 @@ def test_default_metric_and_scores_labels(self):
     # -------------------------------------------------------------------------
     def test_custom_metric_and_scores_labels(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         scorer = make_scorer(zero_one_loss)
         scores_1 = cross_val_score(model, X_train, y=y_train,
                                    cv=n_folds, scoring=scorer,
                                    n_jobs=1, verbose=0)
         
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='B', random_state=0,
@@ -405,14 +407,14 @@ def test_custom_metric_and_scores_labels(self):
     # -------------------------------------------------------------------------
     def test_default_metric_and_scores_proba(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
-        scorer = make_scorer(log_loss, needs_proba=True)
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
+        scorer = make_scorer(log_loss, response_method='predict_proba')
         scores_1 = cross_val_score(model, X_train, y=y_train,
                                    cv=n_folds, scoring=scorer,
                                    n_jobs=1, verbose=0)
         
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='B', random_state=0,
@@ -444,14 +446,14 @@ def test_default_metric_and_scores_proba(self):
     # -------------------------------------------------------------------------
     def test_custom_metric_and_scores_proba(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
-        scorer = make_scorer(roc_auc_score_universal, needs_proba=True)
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
+        scorer = make_scorer(roc_auc_score_universal, response_method='predict_proba')
         scores_1 = cross_val_score(model, X_train, y=y_train,
                                    cv=n_folds, scoring=scorer,
                                    n_jobs=1, verbose=0)
         
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='B', random_state=0,
@@ -484,7 +486,7 @@ def test_custom_metric_and_scores_proba(self):
 
     def test_variant_B_2_estimators_labels(self):
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1_e1 = cross_val_predict(model, X_train, y=y_train,
                                         cv=n_folds, n_jobs=1, verbose=0,
                                         method='predict').reshape(-1, 1)
@@ -502,7 +504,7 @@ def test_variant_B_2_estimators_labels(self):
         S_test_1 = np.c_[S_test_1_e1, S_test_1_e2]
         
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')),
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))),
                       ('bayes', GaussianNB())]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
@@ -529,7 +531,7 @@ def test_variant_B_2_estimators_labels(self):
 
     def test_variant_B_2_estimators_proba(self):
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1_e1 = cross_val_predict(model, X_train, y=y_train,
                                         cv=n_folds, n_jobs=1, verbose=0,
                                         method='predict_proba')
@@ -547,7 +549,7 @@ def test_variant_B_2_estimators_proba(self):
         S_test_1 = np.c_[S_test_1_e1, S_test_1_e2]
         
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')),
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))),
                       ('bayes', GaussianNB())]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
@@ -578,19 +580,19 @@ def test_variant_A_2_estimators_labels(self):
         # Estimator 1
         S_test_temp_e1 = np.zeros((X_test.shape[0], n_folds))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=0)
+        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             # X_te = X_train[te_index]
             # y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             model = model.fit(X_tr, y_tr)
             S_test_temp_e1[:, fold_counter] = model.predict(X_test)
-        S_test_1_e1 = st.mode(S_test_temp_e1, axis=1)[0]
+        S_test_1_e1 = st.mode(S_test_temp_e1, axis=1, keepdims=True)[0]
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1_e1 = cross_val_predict(model, X_train, y=y_train,
                                       cv=n_folds, n_jobs=1, verbose=0,
                                       method='predict').reshape(-1, 1)
@@ -598,7 +600,7 @@ def test_variant_A_2_estimators_labels(self):
         # Estimator 2
         S_test_temp_e2 = np.zeros((X_test.shape[0], n_folds))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=0)
+        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
@@ -608,7 +610,7 @@ def test_variant_A_2_estimators_labels(self):
             model = GaussianNB()
             model = model.fit(X_tr, y_tr)
             S_test_temp_e2[:, fold_counter] = model.predict(X_test)
-        S_test_1_e2 = st.mode(S_test_temp_e2, axis=1)[0]
+        S_test_1_e2 = st.mode(S_test_temp_e2, axis=1, keepdims=True)[0]
     
         model = GaussianNB()
         S_train_1_e2 = cross_val_predict(model, X_train, y=y_train,
@@ -619,7 +621,7 @@ def test_variant_A_2_estimators_labels(self):
         S_test_1 = np.c_[S_test_1_e1, S_test_1_e2]
         
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')),
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))),
                       ('bayes', GaussianNB())]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
@@ -651,21 +653,21 @@ def test_variant_A_2_estimators_proba(self):
         S_test_1_e1 = np.zeros((X_test.shape[0], n_classes))
         S_test_temp_e1 = np.zeros((X_test.shape[0], n_folds * n_classes))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=0)
+        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             # X_te = X_train[te_index]
             # y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             model = model.fit(X_tr, y_tr)
             col_slice_fold = slice(fold_counter * n_classes, fold_counter * n_classes + n_classes)
             S_test_temp_e1[:, col_slice_fold] = model.predict_proba(X_test)
         for class_id in range(n_classes):
             S_test_1_e1[:, class_id] = np.mean(S_test_temp_e1[:, class_id::n_classes], axis=1)
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1_e1 = cross_val_predict(model, X_train, y=y_train,
                                          cv=n_folds, n_jobs=1, verbose=0,
                                          method='predict_proba')
@@ -674,7 +676,7 @@ def test_variant_A_2_estimators_proba(self):
         S_test_1_e2 = np.zeros((X_test.shape[0], n_classes))
         S_test_temp_e2 = np.zeros((X_test.shape[0], n_folds * n_classes))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=0)
+        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
@@ -697,7 +699,7 @@ def test_variant_A_2_estimators_proba(self):
         S_test_1 = np.c_[S_test_1_e1, S_test_1_e2]                              
 
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')),
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))),
                       ('bayes', GaussianNB())]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
@@ -791,7 +793,7 @@ def test_variant_B_default_classifier_proba(self):
     #---------------------------------------------------------------------------
     def test_variant_B_verbose(self):
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y=y_train,
                                       cv=n_folds, n_jobs=1, verbose=0,
                                       method='predict').reshape(-1, 1)
@@ -800,7 +802,7 @@ def test_variant_B_verbose(self):
         
         # verbose=0
         # fit then transform
-        estimators = [('lr', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('lr', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='B', random_state=0,
@@ -816,7 +818,7 @@ def test_variant_B_verbose(self):
         
         # verbose=1
         # fit then transform
-        estimators = [('lr', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('lr', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='B', random_state=0,
@@ -832,7 +834,7 @@ def test_variant_B_verbose(self):
         
         # verbose=2
         # fit then transform
-        estimators = [('lr', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('lr', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='B', random_state=0,
diff --git a/tests/test_sklearn_api_classification_multiclass.py b/tests/test_sklearn_api_classification_multiclass.py
index 4153f7f..767225c 100644
--- a/tests/test_sklearn_api_classification_multiclass.py
+++ b/tests/test_sklearn_api_classification_multiclass.py
@@ -14,6 +14,7 @@
 from numpy.testing import assert_array_equal
 # from numpy.testing import assert_allclose
 from numpy.testing import assert_equal
+from numpy.testing import assert_raises
 
 # import os
 # import glob
@@ -36,6 +37,14 @@
 from sklearn.dummy import DummyClassifier
 from vecstack import StackingTransformer
 
+from sklearn.multiclass import OneVsRestClassifier
+
+from sklearn.ensemble import ExtraTreesClassifier
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.ensemble import StackingClassifier
+from sklearn.pipeline import Pipeline
+
+
 n_classes = 3
 n_folds = 5
 # temp_dir = 'tmpdw35lg54ms80eb42'
@@ -82,7 +91,7 @@ def roc_auc_score_universal(y_true, y_pred):
     y_pred - 2d numpy array
         Predicted probabilities for each class
     """
-    ohe = OneHotEncoder(sparse=False)
+    ohe = OneHotEncoder(sparse_output=False)
     y_true = ohe.fit_transform(y_true.reshape(-1, 1))
     auc_score = roc_auc_score(y_true, y_pred)
     return auc_score
@@ -98,7 +107,7 @@ class TestSklearnClassificationMulticlass(unittest.TestCase):
 
     def test_variant_B_labels(self):
         # reference
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y=y_train,
                                       cv=n_folds, n_jobs=1, verbose=0,
                                       method='predict').reshape(-1, 1)
@@ -106,7 +115,7 @@ def test_variant_B_labels(self):
         S_test_1 = model.predict(X_test).reshape(-1, 1)
 
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='B', random_state=0,
@@ -135,25 +144,25 @@ def test_variant_A_labels(self):
         
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=0)
+        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             # X_te = X_train[te_index]
             # y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             model = model.fit(X_tr, y_tr)
             S_test_temp[:, fold_counter] = model.predict(X_test)
-        S_test_1 = st.mode(S_test_temp, axis=1)[0]
+        S_test_1 = st.mode(S_test_temp, axis=1, keepdims=True)[0]
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y=y_train,
                                       cv=n_folds, n_jobs=1, verbose=0,
                                       method='predict').reshape(-1, 1)
 
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='A', random_state=0,
@@ -180,7 +189,7 @@ def test_variant_A_labels(self):
 
     def test_variant_B_proba(self):
         # reference
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y=y_train,
                                       cv=n_folds, n_jobs=1, verbose=0,
                                       method='predict_proba')
@@ -188,7 +197,7 @@ def test_variant_B_proba(self):
         S_test_1 = model.predict_proba(X_test)
 
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='B', random_state=0,
@@ -219,27 +228,27 @@ def test_variant_A_proba(self):
         S_test_1 = np.zeros((X_test.shape[0], n_classes))
         S_test_temp = np.zeros((X_test.shape[0], n_folds * n_classes))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=0)
+        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             # X_te = X_train[te_index]
             # y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             model = model.fit(X_tr, y_tr)
             col_slice_fold = slice(fold_counter * n_classes, fold_counter * n_classes + n_classes)
             S_test_temp[:, col_slice_fold] = model.predict_proba(X_test)
         for class_id in range(n_classes):
             S_test_1[:, class_id] = np.mean(S_test_temp[:, class_id::n_classes], axis=1)
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y=y_train,
                                       cv=n_folds, n_jobs=1, verbose=0,
                                       method='predict_proba')
 
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='A', random_state=0,
@@ -276,21 +285,21 @@ def test_variant_A_proba_shuffle_random_state(self):
             y_tr = y_train[tr_index]
             # X_te = X_train[te_index]
             # y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             model = model.fit(X_tr, y_tr)
             col_slice_fold = slice(fold_counter * n_classes, fold_counter * n_classes + n_classes)
             S_test_temp[:, col_slice_fold] = model.predict_proba(X_test)
         for class_id in range(n_classes):
             S_test_1[:, class_id] = np.mean(S_test_temp[:, class_id::n_classes], axis=1)
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         # !!! Important. Here we pass CV-generator ``cv=kf`` not number of folds
         S_train_1 = cross_val_predict(model, X_train, y=y_train,
                                       cv=kf, n_jobs=1, verbose=0,
                                       method='predict_proba')
 
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=True,
                                     variant='A', random_state=0,
@@ -320,14 +329,14 @@ def test_variant_A_proba_shuffle_random_state(self):
     # -------------------------------------------------------------------------
     def test_default_metric_and_scores_labels(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         scorer = make_scorer(accuracy_score)
         scores_1 = cross_val_score(model, X_train, y=y_train,
                                    cv=n_folds, scoring=scorer,
                                    n_jobs=1, verbose=0)
         
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='B', random_state=0,
@@ -358,14 +367,14 @@ def test_default_metric_and_scores_labels(self):
     # -------------------------------------------------------------------------
     def test_custom_metric_and_scores_labels(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         scorer = make_scorer(zero_one_loss)
         scores_1 = cross_val_score(model, X_train, y=y_train,
                                    cv=n_folds, scoring=scorer,
                                    n_jobs=1, verbose=0)
         
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='B', random_state=0,
@@ -397,14 +406,14 @@ def test_custom_metric_and_scores_labels(self):
     # -------------------------------------------------------------------------
     def test_default_metric_and_scores_proba(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
-        scorer = make_scorer(log_loss, needs_proba=True)
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
+        scorer = make_scorer(log_loss, response_method='predict_proba')
         scores_1 = cross_val_score(model, X_train, y=y_train,
                                    cv=n_folds, scoring=scorer,
                                    n_jobs=1, verbose=0)
         
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='B', random_state=0,
@@ -436,14 +445,14 @@ def test_default_metric_and_scores_proba(self):
     # -------------------------------------------------------------------------
     def test_custom_metric_and_scores_proba(self):
 
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
-        scorer = make_scorer(roc_auc_score_universal, needs_proba=True)
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
+        scorer = make_scorer(roc_auc_score_universal, response_method='predict_proba')
         scores_1 = cross_val_score(model, X_train, y=y_train,
                                    cv=n_folds, scoring=scorer,
                                    n_jobs=1, verbose=0)
         
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='B', random_state=0,
@@ -476,7 +485,7 @@ def test_custom_metric_and_scores_proba(self):
 
     def test_variant_B_2_estimators_labels(self):
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1_e1 = cross_val_predict(model, X_train, y=y_train,
                                         cv=n_folds, n_jobs=1, verbose=0,
                                         method='predict').reshape(-1, 1)
@@ -494,7 +503,7 @@ def test_variant_B_2_estimators_labels(self):
         S_test_1 = np.c_[S_test_1_e1, S_test_1_e2]
         
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')),
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))),
                       ('bayes', GaussianNB())]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
@@ -521,7 +530,7 @@ def test_variant_B_2_estimators_labels(self):
 
     def test_variant_B_2_estimators_proba(self):
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1_e1 = cross_val_predict(model, X_train, y=y_train,
                                         cv=n_folds, n_jobs=1, verbose=0,
                                         method='predict_proba')
@@ -539,7 +548,7 @@ def test_variant_B_2_estimators_proba(self):
         S_test_1 = np.c_[S_test_1_e1, S_test_1_e2]
         
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')),
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))),
                       ('bayes', GaussianNB())]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
@@ -570,19 +579,19 @@ def test_variant_A_2_estimators_labels(self):
         # Estimator 1
         S_test_temp_e1 = np.zeros((X_test.shape[0], n_folds))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=0)
+        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             # X_te = X_train[te_index]
             # y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             model = model.fit(X_tr, y_tr)
             S_test_temp_e1[:, fold_counter] = model.predict(X_test)
-        S_test_1_e1 = st.mode(S_test_temp_e1, axis=1)[0]
+        S_test_1_e1 = st.mode(S_test_temp_e1, axis=1, keepdims=True)[0]
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1_e1 = cross_val_predict(model, X_train, y=y_train,
                                       cv=n_folds, n_jobs=1, verbose=0,
                                       method='predict').reshape(-1, 1)
@@ -590,7 +599,7 @@ def test_variant_A_2_estimators_labels(self):
         # Estimator 2
         S_test_temp_e2 = np.zeros((X_test.shape[0], n_folds))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=0)
+        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
@@ -600,7 +609,7 @@ def test_variant_A_2_estimators_labels(self):
             model = GaussianNB()
             model = model.fit(X_tr, y_tr)
             S_test_temp_e2[:, fold_counter] = model.predict(X_test)
-        S_test_1_e2 = st.mode(S_test_temp_e2, axis=1)[0]
+        S_test_1_e2 = st.mode(S_test_temp_e2, axis=1, keepdims=True)[0]
     
         model = GaussianNB()
         S_train_1_e2 = cross_val_predict(model, X_train, y=y_train,
@@ -611,7 +620,7 @@ def test_variant_A_2_estimators_labels(self):
         S_test_1 = np.c_[S_test_1_e1, S_test_1_e2]
         
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')),
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))),
                       ('bayes', GaussianNB())]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
@@ -643,21 +652,21 @@ def test_variant_A_2_estimators_proba(self):
         S_test_1_e1 = np.zeros((X_test.shape[0], n_classes))
         S_test_temp_e1 = np.zeros((X_test.shape[0], n_folds * n_classes))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=0)
+        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
             y_tr = y_train[tr_index]
             # X_te = X_train[te_index]
             # y_te = y_train[te_index]
-            model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+            model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
             model = model.fit(X_tr, y_tr)
             col_slice_fold = slice(fold_counter * n_classes, fold_counter * n_classes + n_classes)
             S_test_temp_e1[:, col_slice_fold] = model.predict_proba(X_test)
         for class_id in range(n_classes):
             S_test_1_e1[:, class_id] = np.mean(S_test_temp_e1[:, class_id::n_classes], axis=1)
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1_e1 = cross_val_predict(model, X_train, y=y_train,
                                          cv=n_folds, n_jobs=1, verbose=0,
                                          method='predict_proba')
@@ -666,7 +675,7 @@ def test_variant_A_2_estimators_proba(self):
         S_test_1_e2 = np.zeros((X_test.shape[0], n_classes))
         S_test_temp_e2 = np.zeros((X_test.shape[0], n_folds * n_classes))
         # Using StratifiedKFold because by defauld cross_val_predict uses StratifiedKFold
-        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=0)
+        kf = StratifiedKFold(n_splits=n_folds, shuffle=False, random_state=None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
@@ -689,7 +698,7 @@ def test_variant_A_2_estimators_proba(self):
         S_test_1 = np.c_[S_test_1_e1, S_test_1_e2]                              
 
         # fit then transform
-        estimators = [('logit', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')),
+        estimators = [('logit', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))),
                       ('bayes', GaussianNB())]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
@@ -783,7 +792,7 @@ def test_variant_B_default_classifier_proba(self):
     #---------------------------------------------------------------------------
     def test_variant_B_verbose(self):
     
-        model = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr')
+        model = OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear'))
         S_train_1 = cross_val_predict(model, X_train, y=y_train,
                                       cv=n_folds, n_jobs=1, verbose=0,
                                       method='predict').reshape(-1, 1)
@@ -792,7 +801,7 @@ def test_variant_B_verbose(self):
         
         # verbose=0
         # fit then transform
-        estimators = [('lr', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('lr', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='B', random_state=0,
@@ -808,7 +817,7 @@ def test_variant_B_verbose(self):
         
         # verbose=1
         # fit then transform
-        estimators = [('lr', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('lr', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='B', random_state=0,
@@ -824,7 +833,7 @@ def test_variant_B_verbose(self):
         
         # verbose=2
         # fit then transform
-        estimators = [('lr', LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr'))]
+        estimators = [('lr', OneVsRestClassifier(LogisticRegression(random_state=0, solver='liblinear')))]
         stack = StackingTransformer(estimators, regression=False,
                                     n_folds=n_folds, shuffle=False,
                                     variant='B', random_state=0,
@@ -857,6 +866,60 @@ def test_variant_B_verbose(self):
         assert_array_equal(S_train_1, S_train_7)
         assert_array_equal(S_test_1, S_test_7)
         
+
+    #--------------------------------------------------------------------------
+    # Added 20250921
+    # Compare with StackingClassifier
+    #--------------------------------------------------------------------------
+
+    def test_compare_with_stackinglassifier_from_sklearn(self):
+    
+        estimators = [    
+            ('et', ExtraTreesClassifier(n_estimators=100, random_state=0)),
+            ('rf', RandomForestClassifier(n_estimators=100, random_state=0))]        
+        final_estimator = LogisticRegression(random_state=0)
+                
+        # vecstack.StackingTransformer        
+        stack = StackingTransformer(estimators=estimators, 
+                                    regression=False, 
+                                    variant='B',
+                                    n_folds=5,
+                                    shuffle=False, 
+                                    stratified=True,
+                                    needs_proba=True)
+        
+        steps = [('stack', stack),
+                 ('final_estimator', final_estimator)]        
+        pipe = Pipeline(steps)        
+        y_pred_vecstack = pipe.fit(X_train, y_train).predict_proba(X_test)
+                
+        # sklearn.ensemble.StackingClassifier        
+        clf = StackingClassifier(estimators=estimators,
+                                 final_estimator=final_estimator,
+                                 stack_method='predict_proba')
+        y_pred_sklearn = clf.fit(X_train, y_train).predict_proba(X_test)
+        
+        assert_array_equal(y_pred_vecstack, y_pred_sklearn)
+        
+        # Compare transformation
+        
+        # Transformation for test set is equal
+        S_test_vecstack = stack.transform(X_test)
+        S_test_sklearn = clf.transform(X_test)        
+        assert_array_equal(S_test_vecstack, S_test_sklearn)
+        
+        # Transformation for train set set is different because StackingClassifier does not use CV procedure
+        S_train_vecstack = stack.transform(X_train)
+        S_train_sklearn = clf.transform(X_train)
+        assert_raises(AssertionError, assert_array_equal, S_train_vecstack, S_train_sklearn)
+        
+        # Instead of CV procedure it just uses models trained on the whole train set
+        et = ExtraTreesClassifier(random_state=0, n_estimators=100)
+        rf = RandomForestClassifier(random_state=0, n_estimators=100)
+        y_pred_et = et.fit(X_train, y_train).predict_proba(X_train)
+        y_pred_rf = rf.fit(X_train, y_train).predict_proba(X_train)
+        assert_array_equal(S_train_sklearn, np.hstack([y_pred_et, y_pred_rf]))
+
 #-------------------------------------------------------------------------------
 #-------------------------------------------------------------------------------
 
diff --git a/tests/test_sklearn_api_regression.py b/tests/test_sklearn_api_regression.py
index d7e9bad..67a2a79 100644
--- a/tests/test_sklearn_api_regression.py
+++ b/tests/test_sklearn_api_regression.py
@@ -31,25 +31,38 @@
 from sklearn.model_selection import RandomizedSearchCV
 from sklearn.feature_selection import SelectKBest
 from sklearn.feature_selection import f_regression
-from sklearn.datasets import load_boston
+# from sklearn.datasets import load_boston
+from sklearn.datasets import fetch_openml
 from sklearn.metrics import mean_absolute_error
 from sklearn.metrics import mean_squared_error
 from sklearn.metrics import make_scorer
+from sklearn.metrics import log_loss
 from sklearn.dummy import DummyRegressor
+from sklearn.dummy import DummyClassifier
 from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import LogisticRegression
 from sklearn.linear_model import Ridge
 from sklearn.tree import DecisionTreeRegressor
+from sklearn.tree import DecisionTreeClassifier
 from sklearn.utils.estimator_checks import check_estimator
-from sklearn.externals import joblib
+from sklearn.utils.multiclass import type_of_target
+# from sklearn.externals import joblib
+import joblib
 from sklearn.pipeline import Pipeline
 from sklearn.pipeline import FeatureUnion
 from vecstack import StackingTransformer
 
+from sklearn.ensemble import ExtraTreesRegressor
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.ensemble import StackingRegressor
+
 n_folds = 5
 temp_dir = 'tmpdw35lg54ms80eb42'
 
-boston = load_boston()
-X, y = boston.data, boston.target
+# boston = load_boston()
+boston = fetch_openml(name='boston', version=1, as_frame=False, parser='auto')
+# X, y = boston.data, boston.target
+X, y = boston.data.astype(float), boston.target.astype(float)
 # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
 
 
@@ -73,7 +86,49 @@
 
 
 # -----------------------------------------------------------------------------
-# Scikit-learn INcompatible estimator 
+# -----------------------------------------------------------------------------
+
+def log_loss_mod(y_true, y_pred):
+    """
+    When data is very small, cv split may lead to asituation where
+    `y_true` does not have all labels seen during training.
+    Original `log_loss` function raises in this case.
+
+    y_true = np.array([0, 1, 1])
+    y_pred = np.array([[0.3, 0.3, 0.4],
+                       [0.3, 0.3, 0.4],
+                       [0.3, 0.3, 0.4]])
+    log_loss(y_true, y_pred)  # ValueError
+    log_loss_mod(y_true, y_pred)  # OK
+    """
+    try:
+        return log_loss(y_true, y_pred)
+    except Exception as e:
+        shape = y_pred.shape
+        if len(shape) == 2:
+            try:
+                return log_loss(y_true, y_pred, labels=range(shape[1]))
+            except Exception as e:
+                return 0.0
+        else:
+            return 0.0
+
+# -----------------------------------------------------------------------------
+# -----------------------------------------------------------------------------
+
+class DummyClassifierNumeric(DummyClassifier):
+    """
+    This class is used as a default classification estimator when the `estimators` parameter is None.
+    The reason for subclassing is that the original `DummyClassifier` allows for string targets,
+    while `StackingTransformer` does not. Eventually `StackingTransformer` object created with
+    original `DummyClassifier` does not pass validation with `check_estimator` function.
+    """
+    def fit(self, X, y, sample_weight=None):
+        type_of_target(y, raise_unknown=True)
+        return super().fit(X, y, sample_weight=sample_weight)
+
+# -----------------------------------------------------------------------------
+# Scikit-learn INcompatible estimator
 # -----------------------------------------------------------------------------
 
 class IncompatibleEstimator:
@@ -132,10 +187,59 @@ def tearDownClass(cls):
     # -------------------------------------------------------------------------
     # Main scikit-learn compatibility test
     # -------------------------------------------------------------------------
-    
+
     def test_sklearn_compatibility(self):
-        check_estimator(StackingTransformer)
-    
+        # Check with actual `estimators`
+        # Ignored checks
+        expected_failed_checks = {
+            # Training time
+            'check_sample_weight_equivalence_on_dense_data': 'CV scheme is used. Changing the number of samples affects the CV split.',
+            'check_sample_weight_equivalence_on_sparse_data': 'CV scheme is used. Changing the number of samples affects the CV split.',
+            # Prediction time
+            'check_methods_sample_order_invariance': 'CV scheme is used. Changing the order of samples affects the CV split.',
+            'check_methods_subset_invariance': 'CV scheme is used. Changing the number of samples affects the CV split.',
+        }
+
+        # One estimator
+        # Regression
+        estimators = [('lr', LinearRegression())]
+        check_estimator(StackingTransformer(estimators=estimators, regression=True), expected_failed_checks=expected_failed_checks)
+        # Classifiaction (class labels)
+        estimators = [('logit', LogisticRegression())]
+        check_estimator(StackingTransformer(estimators=estimators, regression=False), expected_failed_checks=expected_failed_checks)
+        # Classifiaction (proba)
+        estimators = [('logit', LogisticRegression())]
+        check_estimator(StackingTransformer(estimators=estimators, regression=False, needs_proba=True, metric=log_loss_mod), expected_failed_checks=expected_failed_checks)
+
+        # Two estimators
+        # Regression
+        estimators = [
+            ('lr', LinearRegression()),
+            ('ridge', Ridge()),
+        ]
+        check_estimator(StackingTransformer(estimators=estimators, regression=True), expected_failed_checks=expected_failed_checks)
+        # Classifiaction (class labels)
+        estimators = [
+            ('logit', LogisticRegression()),
+            ('svc', DecisionTreeClassifier(random_state=0, max_depth=2)),
+        ]
+        check_estimator(StackingTransformer(estimators=estimators, regression=False), expected_failed_checks=expected_failed_checks)
+        # Classifiaction (proba)
+        estimators = [
+            ('logit', LogisticRegression()),
+            ('svc', DecisionTreeClassifier(random_state=0, max_depth=2)),
+        ]
+        check_estimator(StackingTransformer(estimators=estimators, regression=False, needs_proba=True, metric=log_loss_mod), expected_failed_checks=expected_failed_checks)
+
+        # Check with `estimators=None`.
+        # In this case we don't need `expected_failed_checks`,
+        # because Dummy estimators are used by default and they predict all constants regardless the split
+        # Regression
+        check_estimator(StackingTransformer())
+        # Classifiaction
+        estimators = [('dummyclf', DummyClassifierNumeric(strategy='constant', constant=1))]
+        check_estimator(StackingTransformer(estimators=estimators, regression=False))
+
     # -------------------------------------------------------------------------
     # Test returned arrays in variant B
     # -------------------------------------------------------------------------
@@ -178,7 +282,7 @@ def test_variant_B(self):
     def test_variant_A(self):
         
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
-        kf = KFold(n_splits=n_folds, shuffle=False, random_state=0)
+        kf = KFold(n_splits=n_folds, shuffle=False, random_state=None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
@@ -228,7 +332,7 @@ def test_variant_B_sample_weight_one(self):
         S_train_1 = cross_val_predict(model, X_train, y=y_train,
                                       cv=n_folds, n_jobs=1, verbose=0,
                                       method='predict', 
-                                      fit_params={'sample_weight': sw}).reshape(-1, 1)
+                                      params={'sample_weight': sw}).reshape(-1, 1)
         model = model.fit(X_train, y_train, sample_weight=sw)
         S_test_1 = model.predict(X_test).reshape(-1, 1)
         
@@ -266,7 +370,7 @@ def test_variant_B_sample_weight_random(self):
         S_train_1 = cross_val_predict(model, X_train, y=y_train,
                                       cv=n_folds, n_jobs=1, verbose=0,
                                       method='predict', 
-                                      fit_params={'sample_weight': sw}).reshape(-1, 1)
+                                      params={'sample_weight': sw}).reshape(-1, 1)
         model = model.fit(X_train, y_train, sample_weight=sw)
         S_test_1 = model.predict(X_test).reshape(-1, 1)
         
@@ -415,7 +519,7 @@ def test_variant_B_verbose(self):
     # -------------------------------------------------------------------------
     def test_variant_A_verbose(self):
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
-        kf = KFold(n_splits=n_folds, shuffle=False, random_state=0)
+        kf = KFold(n_splits=n_folds, shuffle=False, random_state=None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
@@ -770,7 +874,7 @@ def test_variant_A_2_estimators(self):
         
         # Model a
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
-        kf = KFold(n_splits=n_folds, shuffle=False, random_state=0)
+        kf = KFold(n_splits=n_folds, shuffle=False, random_state=None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
@@ -789,7 +893,7 @@ def test_variant_A_2_estimators(self):
             
         # Model b
         S_test_temp = np.zeros((X_test.shape[0], n_folds))
-        kf = KFold(n_splits=n_folds, shuffle=False, random_state=0)
+        kf = KFold(n_splits=n_folds, shuffle=False, random_state=None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train, y_train)):
             # Split data and target
             X_tr = X_train[tr_index]
@@ -1306,6 +1410,149 @@ def test_pipeline(self):
         # Here we expect that final predictions are equal
         assert_array_equal(y_pred_1, y_pred_3)
         
+
+    # -------------------------------------------------------------------------
+    # Added 20250921
+    # Test Pipeline and ability to reset the whole `estimators` collection
+    # -------------------------------------------------------------------------
+    def test_pipeline_2_reset_whole_estimators_collection(self):
+        # reference
+        model = LinearRegression(fit_intercept=True)
+        S_train_1_lr = cross_val_predict(model, X_train, y=y_train,
+                                         cv=n_folds, n_jobs=1, verbose=0,
+                                         method='predict').reshape(-1, 1)
+        model = model.fit(X_train, y_train)
+        S_test_1_lr = model.predict(X_test).reshape(-1, 1)
+        
+        model = DecisionTreeRegressor(random_state=0, max_depth=2)
+        S_train_1_ridge = cross_val_predict(model, X_train, y=y_train,
+                                            cv=n_folds, n_jobs=1, verbose=0,
+                                            method='predict').reshape(-1, 1)
+        model = model.fit(X_train, y_train)
+        S_test_1_ridge = model.predict(X_test).reshape(-1, 1)
+        
+        S_train_1 = np.c_[S_train_1_lr, S_train_1_ridge]
+        S_test_1 = np.c_[S_test_1_lr, S_test_1_ridge]
+        
+        model = Ridge(random_state=0, alpha=2)
+        model = model.fit(S_train_1, y_train)
+        y_pred_1 = model.predict(S_test_1)
+
+        # We intentionally set different parameter values to reset them
+        # later using ``set_params`` method
+        # We have 5 parameters which differs from reference:
+        # ``fit_intercept``, ``max_depth``, ``variant``, and ``alpha``
+        # and the whole ``estimators`` collection
+        estimators = [('ridge_1', Ridge(alpha=1.0, fit_intercept=True, random_state=0)),
+                      ('ridge_2', Ridge(alpha=0.1, fit_intercept=False, random_state=1))]        
+        stack = StackingTransformer(estimators, regression=True,
+                                    n_folds=n_folds, shuffle=False,
+                                    variant='A', random_state=0,
+                                    verbose=0)
+        ridge = Ridge(random_state=0, alpha=7)
+        
+        steps = [('stack', stack),
+                 ('ridge', ridge)]
+                 
+        pipe = Pipeline(steps)
+        
+        pipe = pipe.fit(X_train, y_train)
+        y_pred_2 = pipe.predict(X_test)
+        
+        # Here we expect that final predictions are different 
+        # because we've set different parameters
+        assert_raises(AssertionError, assert_array_equal, y_pred_1, y_pred_2)
+        
+        # Reset original parameters used in reference
+        # First we replace the whole `estimators` collection with correct estimators, but incorrect params
+        # and then reset params of each estimator within the collection
+        estimators = [('lr', LinearRegression(fit_intercept=False)),
+                      ('tree', DecisionTreeRegressor(random_state=0, max_depth=4))]
+        # It does not matter where `stack__estimators` will be placed 
+        # i.e. first or last in the list of parameters which we need to reset
+        pipe = pipe.set_params(stack__estimators=estimators,  # replace the whole `estimators` collection
+                               stack__lr__fit_intercept=True,
+                               stack__tree__max_depth=2,
+                               stack__variant='B',
+                               ridge__alpha=2)
+                               
+        pipe = pipe.fit(X_train, y_train)
+        y_pred_3 = pipe.predict(X_test)
+        
+        # Here we expect that final predictions are equal
+        assert_array_equal(y_pred_1, y_pred_3)
+
+
+    # -------------------------------------------------------------------------
+    # Added 20250921
+    # Test Pipeline and ability to reset an individual eatimator within the `estimators` collection
+    # -------------------------------------------------------------------------
+    def test_pipeline_3_reset_individual_estimator_within_collection(self):
+        # reference
+        model = LinearRegression(fit_intercept=True)
+        S_train_1_lr = cross_val_predict(model, X_train, y=y_train,
+                                         cv=n_folds, n_jobs=1, verbose=0,
+                                         method='predict').reshape(-1, 1)
+        model = model.fit(X_train, y_train)
+        S_test_1_lr = model.predict(X_test).reshape(-1, 1)
+        
+        model = DecisionTreeRegressor(random_state=0, max_depth=2)
+        S_train_1_ridge = cross_val_predict(model, X_train, y=y_train,
+                                            cv=n_folds, n_jobs=1, verbose=0,
+                                            method='predict').reshape(-1, 1)
+        model = model.fit(X_train, y_train)
+        S_test_1_ridge = model.predict(X_test).reshape(-1, 1)
+        
+        S_train_1 = np.c_[S_train_1_lr, S_train_1_ridge]
+        S_test_1 = np.c_[S_test_1_lr, S_test_1_ridge]
+        
+        model = Ridge(random_state=0, alpha=2)
+        model = model.fit(S_train_1, y_train)
+        y_pred_1 = model.predict(S_test_1)
+
+        # We intentionally set different parameter values to reset them
+        # later using ``set_params`` method
+        # We have 5 parameters which differs from reference:
+        # ``fit_intercept``, ``max_depth``, ``variant``, and ``alpha``
+        # and the whole ``estimators`` collection
+        estimators = [('ridge_1', Ridge(alpha=1.0, fit_intercept=True, random_state=0)),
+                      ('tree', DecisionTreeRegressor(random_state=0, max_depth=4))]        
+        stack = StackingTransformer(estimators, regression=True,
+                                    n_folds=n_folds, shuffle=False,
+                                    variant='A', random_state=0,
+                                    verbose=0)
+        ridge = Ridge(random_state=0, alpha=7)
+        
+        steps = [('stack', stack),
+                 ('ridge', ridge)]
+                 
+        pipe = Pipeline(steps)
+        
+        pipe = pipe.fit(X_train, y_train)
+        y_pred_2 = pipe.predict(X_test)
+        
+        # Here we expect that final predictions are different 
+        # because we've set different parameters
+        assert_raises(AssertionError, assert_array_equal, y_pred_1, y_pred_2)
+        
+        # Reset original parameters used in reference
+        # First we replace individual estimator whithin the collection with correct estimator, but incorrect params
+        # we retain the name "ridge_1" (instead of "lr") bcause it is arbitrary and does not matter,
+        # and then reset params of each estimator within the collection
+        # It does not matter where `stack__estimators` will be placed 
+        # i.e. first or last in the list of parameters which we need to reset
+        pipe = pipe.set_params(stack__ridge_1=LinearRegression(fit_intercept=False),  # replace individual estimator within the collection
+                               stack__ridge_1__fit_intercept=True,
+                               stack__tree__max_depth=2,
+                               stack__variant='B',
+                               ridge__alpha=2)
+                               
+        pipe = pipe.fit(X_train, y_train)
+        y_pred_3 = pipe.predict(X_test)
+        
+        # Here we expect that final predictions are equal
+        assert_array_equal(y_pred_1, y_pred_3)
+
     # -------------------------------------------------------------------------
     # Test FeatureUnion and ability to set parameters of nested estimators
     # -------------------------------------------------------------------------
@@ -1707,7 +1954,7 @@ def test_small_input(self):
         Test: 10 examples
         """
         S_test_temp = np.zeros((X_test[:10].shape[0], n_folds))
-        kf = KFold(n_splits=n_folds, shuffle=False, random_state=0)
+        kf = KFold(n_splits=n_folds, shuffle=False, random_state=None)
         for fold_counter, (tr_index, te_index) in enumerate(kf.split(X_train[:20], y_train[:20])):
             # Split data and target
             X_tr = X_train[:20][tr_index]
@@ -1746,6 +1993,97 @@ def test_small_input(self):
         assert_array_equal(S_train_1, S_train_3)
         assert_array_equal(S_test_1, S_test_3)
 
+    # -------------------------------------------------------------------------
+    # Added 20250921
+    # Compare with StackingRegressor
+    # -------------------------------------------------------------------------
+    def test_compare_with_stackingregressor_from_sklearn(self):
+    
+        estimators = [    
+            ('et', ExtraTreesRegressor(n_estimators=100, random_state=0)),
+            ('rf', RandomForestRegressor(n_estimators=100, random_state=0))]        
+        final_estimator = LinearRegression()
+                
+        # vecstack.StackingTransformer        
+        stack = StackingTransformer(estimators=estimators, 
+                                    regression=True, 
+                                    variant='B',
+                                    n_folds=5,
+                                    shuffle=False)
+        
+        steps = [('stack', stack),
+                 ('final_estimator', final_estimator)]        
+        pipe = Pipeline(steps)        
+        y_pred_vecstack = pipe.fit(X_train, y_train).predict(X_test)
+                
+        # sklearn.ensemble.StackingClassifier        
+        clf = StackingRegressor(estimators=estimators,
+                                 final_estimator=final_estimator)
+        y_pred_sklearn = clf.fit(X_train, y_train).predict(X_test)
+        
+        assert_array_equal(y_pred_vecstack, y_pred_sklearn)
+        
+        # Compare transformation
+        
+        # Transformation for test set is equal
+        S_test_vecstack = stack.transform(X_test)
+        S_test_sklearn = clf.transform(X_test)        
+        assert_array_equal(S_test_vecstack, S_test_sklearn)
+        
+        # Transformation for train set set is different because StackingClassifier does not use CV procedure
+        S_train_vecstack = stack.transform(X_train)
+        S_train_sklearn = clf.transform(X_train)
+        assert_raises(AssertionError, assert_array_equal, S_train_vecstack, S_train_sklearn)
+        
+        # Instead of CV procedure it just uses models trained on the whole train set
+        et = ExtraTreesRegressor(random_state=0, n_estimators=100)
+        rf = RandomForestRegressor(random_state=0, n_estimators=100)
+        y_pred_et = et.fit(X_train, y_train).predict(X_train)
+        y_pred_rf = rf.fit(X_train, y_train).predict(X_train)
+        assert_array_equal(S_train_sklearn, np.hstack([y_pred_et.reshape(-1, 1), y_pred_rf.reshape(-1, 1)]))
+
+    # -------------------------------------------------------------------------
+    # Added 20250924
+    # Explicitly check that `validate_data` checks number of features
+    # -------------------------------------------------------------------------
+    
+    def test_inconsistent_shape_passed_to_transform(self):
+        """
+        When transforming non-training set there was a check:
+        ```
+        if X.shape[1] != self.n_features_:
+            raise ValueError('Inconsistent number of features.')
+        ```
+        It was needed because I used `check_array` function to validate data
+        and probably number of features was not checked.
+        
+        Now I check data with `validate_data` which checks `self.n_features_in_`.
+        So my manual check can never happen and coverage dropped.
+        So I removed my manual check and created this test case to confirm explicitly that `validate_data` works.
+    
+        In version 0.4.0 there was no specific test for this case,
+        probably because it was included in `check_estimator`.
+        """
+        estimators = [
+            ('lr', LinearRegression()),
+            ('ridge', Ridge())]
+        
+        stack = StackingTransformer(estimators=estimators,
+                                    regression=True,
+                                    variant='B',
+                                    n_folds=5,
+                                    shuffle=False)
+        
+        stack = stack.fit(X_train, y_train)
+        S_train = stack.transform(X_train)  # OK
+        S_test = stack.transform(X_test)  # OK
+        
+        # Transform train set with different number of features - in fact it is identified as non-train set because shape is different
+        assert_raises(ValueError, stack.transform, X_train[:, 1:])
+        
+        # Transform test set with different number of features
+        assert_raises(ValueError, stack.transform, X_test[:, :-1])
+
 # -----------------------------------------------------------------------------
 # -----------------------------------------------------------------------------
 
diff --git a/vecstack/__init__.py b/vecstack/__init__.py
index 3ce96cb..80def9b 100644
--- a/vecstack/__init__.py
+++ b/vecstack/__init__.py
@@ -8,7 +8,7 @@
 
 MIT License
 
-Copyright (c) 2016-2018 Igor Ivanov
+Copyright (c) 2016-2025 Igor Ivanov
 Email: vecxoz@gmail.com
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -38,7 +38,7 @@
 
 __author__ = 'Igor Ivanov > kaggle.com/vecxoz'
 __license__ = 'MIT'
-__version__ = '0.4.0'
+__version__ = '0.5.0'
 
 __all__ = ['stacking', 'StackingTransformer']
 
diff --git a/vecstack/core.py b/vecstack/core.py
index 8c64f2c..66936d3 100644
--- a/vecstack/core.py
+++ b/vecstack/core.py
@@ -6,7 +6,7 @@
 
 MIT License
 
-Copyright (c) 2016-2018 Igor Ivanov
+Copyright (c) 2016-2025 Igor Ivanov
 Email: vecxoz@gmail.com
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -254,6 +254,7 @@ def your_metric(y_true, y_pred):
         
     random_state : int, default 0
         Random seed
+        Ignored if shuffle=False
         
     verbose : int, default 0
         Level of verbosity.
@@ -302,7 +303,7 @@ def your_metric(y_true, y_pred):
     
     Regression
     ----------
-    from sklearn.datasets import load_boston
+    from sklearn.datasets import fetch_california_housing
     from sklearn.model_selection import train_test_split
     from sklearn.metrics import mean_absolute_error
     from sklearn.ensemble import ExtraTreesRegressor
@@ -311,35 +312,36 @@ def your_metric(y_true, y_pred):
     from vecstack import stacking
 
     # Load demo data
-    boston = load_boston()
-    X, y = boston.data, boston.target
+    X, y = fetch_california_housing(return_X_y=True)
 
     # Make train/test split
     # As usual in machine learning task we have X_train, y_train, and X_test
-    X_train, X_test, y_train, y_test = train_test_split(X, y, 
-        test_size = 0.2, random_state = 0)
+    X_train, X_test, y_train, y_test = train_test_split(X, y,
+                                                        test_size=0.2,
+                                                        random_state=0)
 
-    # Caution! All models and parameter values are just 
+    # Caution! All models and parameter values are just
     # demonstrational and shouldn't be considered as recommended.
     # Initialize 1-st level models.
     models = [
-        ExtraTreesRegressor(random_state = 0, n_jobs = -1, 
-            n_estimators = 100, max_depth = 3),
+        ExtraTreesRegressor(random_state=0, n_jobs=-1,
+                            n_estimators=100, max_depth=3),
         
-        RandomForestRegressor(random_state = 0, n_jobs = -1, 
-            n_estimators = 100, max_depth = 3),
+        RandomForestRegressor(random_state=0, n_jobs=-1,
+                              n_estimators=100, max_depth=3),
         
-        XGBRegressor(seed = 0, n_jobs = -1, learning_rate = 0.1, 
-            n_estimators = 100, max_depth = 3)]
+        XGBRegressor(random_state=0, n_jobs=-1, learning_rate=0.1,
+                     n_estimators=100, max_depth=3)]
     
     # Compute stacking features
-    S_train, S_test = stacking(models, X_train, y_train, X_test, 
-        regression = True, metric = mean_absolute_error, n_folds = 4, 
-        shuffle = True, random_state = 0, verbose = 2)
+    S_train, S_test = stacking(models, X_train, y_train, X_test,
+                               regression=True, metric=mean_absolute_error,
+                               n_folds=4, shuffle=True,
+                               random_state=0, verbose=2)
 
     # Initialize 2-nd level model
-    model = XGBRegressor(seed = 0, n_jobs = -1, learning_rate = 0.1, 
-        n_estimators = 100, max_depth = 3)
+    model = XGBRegressor(random_state=0, n_jobs=-1, learning_rate=0.1,
+                         n_estimators=100, max_depth=3)
     
     # Fit 2-nd level model
     model = model.fit(S_train, y_train)
@@ -362,35 +364,36 @@ def your_metric(y_true, y_pred):
     from vecstack import stacking
 
     # Load demo data
-    iris = load_iris()
-    X, y = iris.data, iris.target
+    X, y = load_iris(return_X_y=True)
 
     # Make train/test split
     # As usual in machine learning task we have X_train, y_train, and X_test
-    X_train, X_test, y_train, y_test = train_test_split(X, y, 
-        test_size = 0.2, random_state = 0)
+    X_train, X_test, y_train, y_test = train_test_split(X, y,
+                                                        test_size=0.2,
+                                                        random_state=0)
 
-    # Caution! All models and parameter values are just 
+    # Caution! All models and parameter values are just
     # demonstrational and shouldn't be considered as recommended.
     # Initialize 1-st level models.
     models = [
-        ExtraTreesClassifier(random_state = 0, n_jobs = -1, 
-            n_estimators = 100, max_depth = 3),
+        ExtraTreesClassifier(random_state=0, n_jobs=-1,
+                             n_estimators=100, max_depth=3),
         
-        RandomForestClassifier(random_state = 0, n_jobs = -1, 
-            n_estimators = 100, max_depth = 3),
+        RandomForestClassifier(random_state=0, n_jobs=-1,
+                               n_estimators=100, max_depth=3),
         
-        XGBClassifier(seed = 0, n_jobs = -1, learning_rate = 0.1, 
-            n_estimators = 100, max_depth = 3)]
+        XGBClassifier(seed=0, n_jobs=-1, learning_rate=0.1,
+                      n_estimators=100, max_depth=3)]
     
     # Compute stacking features
-    S_train, S_test = stacking(models, X_train, y_train, X_test, 
-        regression = False, metric = accuracy_score, n_folds = 4, 
-        stratified = True, shuffle = True, random_state = 0, verbose = 2)
+    S_train, S_test = stacking(models, X_train, y_train, X_test,
+                               regression=False, metric=accuracy_score,
+                               n_folds=4, stratified=True, shuffle=True,
+                               random_state=0, verbose=2)
 
     # Initialize 2-nd level model
-    model = XGBClassifier(seed = 0, n_jobs = -1, learning_rate = 0.1, 
-        n_estimators = 100, max_depth = 3)
+    model = XGBClassifier(seed=0, n_jobs=-1, learning_rate=0.1,
+                          n_estimators=100, max_depth=3)
     
     # Fit 2-nd level model
     model = model.fit(S_train, y_train)
@@ -415,8 +418,8 @@ def your_metric(y_true, y_pred):
     X_train, y_train = check_X_y(X_train,
                                  y_train,
                                  accept_sparse=['csr'], # allow csr and cast all other sparse types to csr
-                                 force_all_finite=False, # allow nan and inf because 
-                                                         # some models (xgboost) can handle
+                                 ensure_all_finite=False, # allow nan and inf because
+                                                          # some models (xgboost) can handle
                                  allow_nd=True,
                                  multi_output=False) # do not allow several columns in y_train
                                  
@@ -424,8 +427,8 @@ def your_metric(y_true, y_pred):
         X_test = check_array(X_test,
                              accept_sparse=['csr'], # allow csr and cast all other sparse types to csr
                              allow_nd=True,
-                             force_all_finite=False) # allow nan and inf because 
-                                                     # some models (xgboost) can handle
+                             ensure_all_finite=False) # allow nan and inf because
+                                                      # some models (xgboost) can handle
     if sample_weight is not None:
         sample_weight = np.array(sample_weight).ravel()
     # <regression>
@@ -449,6 +452,10 @@ def your_metric(y_true, y_pred):
     stratified = bool(stratified)
     # <shuffle>
     shuffle = bool(shuffle)
+    # <random_state>
+    # To comply with sklearn requirement
+    if not shuffle:
+        random_state = None
     # <verbose>
     if verbose not in [0, 1, 2]:
         raise ValueError('Parameter <verbose> must be 0, 1, or 2')
@@ -669,7 +676,10 @@ def your_metric(y_true, y_pred):
             log_full_path = os.path.join(save_dir, log_file_name)
             
             # Save OOF
-            np.save(full_path, np.array([S_train, S_test]))
+            array_to_save = np.empty(2, dtype='object')
+            array_to_save[0] = S_train
+            array_to_save[1] = S_test
+            np.save(full_path, array_to_save)
             
             # Save log
             log_str = 'vecstack log '
diff --git a/vecstack/coresk.py b/vecstack/coresk.py
index 712f3c1..85ca7df 100644
--- a/vecstack/coresk.py
+++ b/vecstack/coresk.py
@@ -6,7 +6,7 @@
 
 MIT License
 
-Copyright (c) 2016-2018 Igor Ivanov
+Copyright (c) 2016-2025 Igor Ivanov
 Email: vecxoz@gmail.com
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -38,6 +38,7 @@
 # -----------------------------------------------------------------------------
 
 import warnings
+from contextlib import suppress
 import numpy as np
 import scipy.stats as st
 from sklearn.base import BaseEstimator
@@ -49,6 +50,7 @@
 from sklearn.utils.validation import check_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import has_fit_parameter
+from sklearn.utils.validation import validate_data
 from sklearn.model_selection import KFold
 from sklearn.model_selection import StratifiedKFold
 from sklearn.metrics import mean_absolute_error
@@ -60,7 +62,7 @@
 # -----------------------------------------------------------------------------
 
 
-class StackingTransformer(BaseEstimator, TransformerMixin):
+class StackingTransformer(TransformerMixin, BaseEstimator):
     """StackingTransformer. Scikit-learn compatible API for stacking.
 
     Parameters
@@ -149,6 +151,7 @@ def your_metric(y_true, y_pred):
     random_state : int, default 0
         Random seed used to initiate fold split.
         Same seed and correspondingly same split is used for all estimators.
+        Ignored if ``shuffle=False``
 
     verbose : int, default 0
         Level of verbosity.
@@ -200,7 +203,7 @@ def your_metric(y_true, y_pred):
 
     Examples
     --------
-    >>> from sklearn.datasets import load_boston
+    >>> from sklearn.datasets import fetch_california_housing
     >>> from sklearn.model_selection import train_test_split
     >>> from sklearn.metrics import mean_absolute_error
     >>> from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor
@@ -208,8 +211,7 @@ def your_metric(y_true, y_pred):
     >>> from vecstack import StackingTransformer
     >>>
     >>> # Load demo data
-    >>> boston = load_boston()
-    >>> X, y = boston.data, boston.target
+    >>> X, y = fetch_california_housing(return_X_y=True)
     >>>
     >>> # Make train/test split
     >>> X_train, X_test, y_train, y_test = train_test_split(X, y,
@@ -282,6 +284,17 @@ def __init__(self,
     # -------------------------------------------------------------------------
     # -------------------------------------------------------------------------
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.estimator_type = 'transformer'
+        tags.transformer_tags.preserves_dtype = []
+        tags.target_tags.required = True
+        tags.input_tags.sparse = True
+        return tags
+
+    # -------------------------------------------------------------------------
+    # -------------------------------------------------------------------------
+
     def fit(self, X, y, sample_weight=None):
         """Fit all base estimators.
 
@@ -308,19 +321,29 @@ def fit(self, X, y, sample_weight=None):
         # ---------------------------------------------------------------------
         # Check input data
         # ---------------------------------------------------------------------
+        # Check data and set `self.n_features_in_` and `self.feature_names_in_`
+        X, y = validate_data(self, X, y,
+                             reset=True,  # default: True, if True will set `self.n_features_in_` and `self.feature_names_in_`
+                             validate_separately=False,  # default: False, if False will use `check_X_y`
+                             skip_check_array=False,  # default: False, if False will NOT skip checks
+                             accept_sparse=['csr'],
+                             ensure_all_finite=True,
+                             multi_output=False)
+
+        # Legacy check included in `validate_data`
         # Check X and y
-        # ``check_estimator`` does not allow ``force_all_finite=False``
-        X, y = check_X_y(X, y,
-                         accept_sparse=['csr'],  # allow csr, cast all others to csr
-                         force_all_finite=True,  # do not allow  nan and inf
-                         multi_output=False)  # allow only one column in y_train
+        # ``check_estimator`` does not allow ``ensure_all_finite=False``
+        # X, y = check_X_y(X, y,
+        #                  accept_sparse=['csr'],  # allow csr, cast all others to csr
+        #                  ensure_all_finite=True,  # do not allow  nan and inf
+        #                  multi_output=False)  # allow only one column in y_train
 
         # Check X and sample_weight
         # X is alredy checked, but we need it to compare length of sample_weight
         if sample_weight is not None:
             X, sample_weight = check_X_y(X, sample_weight,
                                          accept_sparse=['csr'],
-                                         force_all_finite=True,
+                                         ensure_all_finite=True,
                                          multi_output=False)
 
         # ---------------------------------------------------------------------
@@ -381,6 +404,12 @@ def fit(self, X, y, sample_weight=None):
                 warn_str += ' ``stratified``'
             warnings.warn(warn_str, UserWarning)
 
+        # To comply with sklearn requirement
+        if not self.shuffle:
+            random_state_internal = None
+        else:
+            random_state_internal = self.random_state
+
         # ---------------------------------------------------------------------
         # Compute attributes (basic properties of data, number of estimators, etc.)
         # ---------------------------------------------------------------------
@@ -432,14 +461,14 @@ def fit(self, X, y, sample_weight=None):
         if not self.regression and self.stratified:
             self.kf_ = StratifiedKFold(n_splits=self.n_folds,
                                        shuffle=self.shuffle,
-                                       random_state=self.random_state)
+                                       random_state=random_state_internal)
             # Save target to be able to create stratified split in ``transform`` method
             # This is more efficient than to save split indices
             self._y_ = y.copy()
         else:
             self.kf_ = KFold(n_splits=self.n_folds,
                              shuffle=self.shuffle,
-                             random_state=self.random_state)
+                             random_state=random_state_internal)
             self._y_ = None
 
         # ---------------------------------------------------------------------
@@ -631,9 +660,18 @@ def transform(self, X, is_train_set=None):
         # Check if fitted
         check_is_fitted(self, ['models_A_'])
 
+        # Check data without resetting `self.n_features_in_` and `self.feature_names_in_`
+        X = validate_data(self, X,
+                          reset=False,  # default: True, if True will set `self.n_features_in_` and `self.feature_names_in_`
+                          validate_separately=False,  # default: False, if False will use `check_X_y`
+                          skip_check_array=False,  # default: False, if False will NOT skip checks
+                          accept_sparse=['csr'],
+                          ensure_all_finite=True)  # no need for `multi_output`, because no `y`
+
+        # Legacy check included in `validate_data`
         # Input validation
-        # ``check_estimator`` does not allow ``force_all_finite=False``
-        X = check_array(X, accept_sparse=['csr'], force_all_finite=True)
+        # ``check_estimator`` does not allow ``ensure_all_finite=False``
+        # X = check_array(X, accept_sparse=['csr'], ensure_all_finite=True)
 
         # *********************************************************************
         # Fitted StackingTransformer instance is bound to train set used for fitting.
@@ -717,9 +755,10 @@ def transform(self, X, is_train_set=None):
         # Transform any other set
         # *********************************************************************
         else:
+            # Legacy check included in `validate_data`
             # Check n_features
-            if X.shape[1] != self.n_features_:
-                raise ValueError('Inconsistent number of features.')
+            # if X.shape[1] != self.n_features_:
+            #     raise ValueError('Inconsistent number of features.')
 
             # Create empty numpy array for test predictions
             S_test = np.zeros((X.shape[0], self.n_estimators_ * self.n_classes_implicit_))
@@ -945,37 +984,121 @@ def _check_identity(self, X,
     # -------------------------------------------------------------------------
 
     def _get_params(self, attr, deep=True):
-        """Gives ability to get parameters of nested estimators
         """
-        out = super(StackingTransformer, self).get_params(deep=False)
+        Gives ability to get parameters of nested estimators
+        """
+        out = super().get_params(deep=deep)
         if not deep:
             return out
+
         estimators = getattr(self, attr)
-        if estimators is None:
+        try:
+            out.update(estimators)
+        except (TypeError, ValueError):
+            # Ignore TypeError for cases where estimators is not a list of
+            # (name, estimator) and ignore ValueError when the list is not
+            # formatted correctly. This is to prevent errors when calling
+            # `set_params`. `BaseEstimator.set_params` calls `get_params` which
+            # can error for invalid values for `estimators`.
             return out
-        out.update(estimators)
+
         for name, estimator in estimators:
-            for key, value in iter(estimator.get_params(deep=True).items()):
-                out['%s__%s' % (name, key)] = value
+            if hasattr(estimator, 'get_params'):
+                for key, value in estimator.get_params(deep=True).items():
+                    out['%s__%s' % (name, key)] = value
         return out
 
     # -------------------------------------------------------------------------
     # -------------------------------------------------------------------------
 
     def get_params(self, deep=True):
-        """Get parameters of StackingTransformer and base estimators.
+        """
+        Get parameters of StackingTransformer and base estimators.
 
         Parameters
         ----------
         deep : boolean
             If False - get parameters of StackingTransformer
             If True - get parameters of StackingTransformer and base estimators
+
+        Returns
+        -------
+        params : dict
+            Parameter and estimator names mapped to their values or parameter
+            names mapped to their values.
         """
         return self._get_params('estimators', deep=deep)
 
     # -------------------------------------------------------------------------
     # -------------------------------------------------------------------------
 
+    def _set_params(self, attr, **params):
+        """
+        Gives ability to set parameters of nested estimators,
+        and replace individual estimators in the list.
+        """
+        # Ensure strict ordering of parameter setting:
+        # 1. Replace the entire estimators collection
+        if attr in params:
+            setattr(self, attr, params.pop(attr))
+        # 2. Replace individual estimators by name
+        items = getattr(self, attr)
+        if isinstance(items, list) and items:
+            # Get item names used to identify valid names in params
+            # `zip` raises a TypeError when `items` does not contains
+            # elements of length 2
+            with suppress(TypeError):
+                item_names, _ = zip(*items)
+                for name in list(params.keys()):
+                    if '__' not in name and name in item_names:
+                        self._replace_estimator(attr, name, params.pop(name))
+
+        # 3. Individual estimator parameters and other initialisation arguments
+        super().set_params(**params)
+        return self
+
+    # -------------------------------------------------------------------------
+    # -------------------------------------------------------------------------
+
+    def _replace_estimator(self, attr, name, new_val):
+        """
+        Replace estimator, assuming `name` is a valid estimator name
+        """
+        new_estimators = list(getattr(self, attr))
+        for i, (estimator_name, _) in enumerate(new_estimators):
+            if estimator_name == name:
+                new_estimators[i] = (name, new_val)
+                break
+        setattr(self, attr, new_estimators)
+
+    # -------------------------------------------------------------------------
+    # -------------------------------------------------------------------------
+
+    def set_params(self, **params):
+        """
+        Set parameters of StackingTransformer and base estimators.
+        Valid parameter keys can be listed with `get_params()`. Note that you
+        can directly set the parameters of the estimators contained in `estimators`.
+
+        Parameters
+        ----------
+        **params : keyword arguments
+            Specific parameters using e.g. `set_params(parameter_name=new_value)`.
+            In addition, to setting the parameters of the estimator,
+            the individual estimator of the estimators can also be set.
+            Dropping individual estimators using 'drop' is not supported.
+
+        Returns
+        -------
+        self : object
+            Estimator instance.
+        """
+        self._set_params('estimators', **params)
+        return self
+
+    # -------------------------------------------------------------------------
+    # -------------------------------------------------------------------------
+
     def _validate_names(self, names):
         """Validates estimator names
         """
@@ -1012,7 +1135,7 @@ def is_train_set(self, X):
         # Check if fitted
         check_is_fitted(self, ['models_A_'])
         # Input validation
-        X = check_array(X, accept_sparse=['csr'], force_all_finite=True)
+        X = check_array(X, accept_sparse=['csr'], ensure_all_finite=True)
         return self._check_identity(X)
 
 # -----------------------------------------------------------------------------