JuliaAI
diff --git a/‎.github/workflows/CI.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/CI.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Project.toml‎
Lines changed: 3 additions & 3 deletions b/‎Project.toml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 6 additions & 5 deletions b/‎README.md‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎example/BalancedBagging.ipynb‎
Lines changed: 32 additions & 23 deletions b/‎example/BalancedBagging.ipynb‎
Lines changed: 32 additions & 23 deletions
@@ -19,7 +19,7 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.8'
+          - '1.7'
           - '1'
 
         os: [ubuntu-latest, windows-latest, macOS-latest]
 
@@ -1,3 +1,4 @@
 /Manifest.toml
 .CondaPkg/*
 .CondaPkg
+.vscode/settings.json
@@ -1,7 +1,7 @@
 name = "MLJBalancing"
 uuid = "45f359ea-796d-4f51-95a5-deb1a414c586"
 authors = ["Essam Wisam <[email protected]>", "Anthony Blaom <[email protected]> and contributors"]
-version = "0.1.0"
+version = "0.1.1"
 
 [deps]
 MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
@@ -12,12 +12,12 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 
 [compat]
-MLJBase = "0.21"
+MLJBase = "1"
 OrderedCollections = "1.6"
-julia = "1.6"
 MLJModelInterface = "1.9"
 MLUtils = "0.4"
 StatsBase = "0.34"
+julia = "1.7"
 
 [extras]
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 
@@ -1,7 +1,7 @@
 # MLJBalancing
 A package providing composite models wrapping class imbalance algorithms from [Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl) with classifiers from [MLJ](https://github.com/alan-turing-institute/MLJ.jl). 
 
-## ⏬ Instalattion
+## ⏬ Installation
 ```julia
 import Pkg;
 Pkg.add("MLJBalancing")
@@ -17,6 +17,7 @@ This package allows chaining of resampling methods from Imbalance.jl with classi
 ```julia
 SMOTENC = @load SMOTENC pkg=Imbalance verbosity=0
 TomekUndersampler = @load TomekUndersampler pkg=Imbalance verbosity=0
+LogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels verbosity=0
 
 oversampler = SMOTENC(k=5, ratios=1.0, rng=42)
 undersampler = TomekUndersampler(min_ratios=0.5, rng=42)
@@ -33,7 +34,7 @@ Here training data will be passed to `balancer1` then `balancer2`, whose output
 In general, there can be any number of balancers, and the user can give the balancers arbitrary names. 
 
 #### At this point, they behave like one single model
-You can fit, predict, cross-validate and finetune it like any other MLJ model. Here is an example for finetuning
+You can fit, predict, cross-validate and fine-tune it like any other MLJ model. Here is an example for fine-tuning
 ```julia
 r1 = range(balanced_model, :(balancer1.k), lower=3, upper=10)
 r2 = range(balanced_model, :(balancer2.min_ratios), lower=0.1, upper=0.9)
@@ -57,7 +58,7 @@ The package also offers an implementation of bagging over probabilistic classifi
 
 
 #### Construct a BalancedBaggingClassifier
-In this you must specify the model, and optionally specify the number of bags `T` and the random number generator `rng`. If `T` is not specified it is set as the ratio between the majority and minority counts. If `rng` isn't specified then `default_rng()` is used.
+In this you must specify some probabilistic model, and optionally specify the number of bags `T` and the random number generator `rng`. If `T` is not specified it is set as the ratio between the majority and minority counts. If `rng` isn't specified then `default_rng()` is used.
 
 ```julia
 LogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels verbosity=0
@@ -66,9 +67,9 @@ bagging_model = BalancedBaggingClassifier(model=logistic_model, T=10, rng=Random
 ```
 
 #### Now it behaves like one single model
-You can fit, predict, cross-validate and finetune it like any other probabilistic MLJ model where `X` must be a table input (e.g., a dataframe).
+You can fit, predict, cross-validate and fine-tune it like any other probabilistic MLJ model where `X` must be a table input (e.g., a dataframe).
 ```julia
 mach = machine(bagging_model, X, y)
 fit!(mach)
 pred = predict(mach, X)
-```
+```
@@ -40,16 +40,16 @@
     {
      "data": {
       "text/plain": [
-       "((Column1 = [0.9695150609084499, 0.012898301755861596, 0.7555027304121053, 0.3467415729179013, 0.35969402837473463, 0.2601876747805505, 0.9522580699968279, 0.06304475092339623, 0.18909001622655808, 0.19934942931986965  …  0.021532597906190776, 0.8482825697641306, 0.10773487816863903, 0.32189982199036116, 0.12662208474317038, 0.28529465447429614, 0.2907506630258835, 0.36872799387588473, 0.061489791166806085, 0.45645058368583713], Column2 = [0.06546916714160167, 0.7243956502957003, 0.5183099801474415, 0.7555562860508294, 0.11226218114407538, 0.9135150277876691, 0.8739421974558176, 0.2268482788660101, 0.580604436651146, 0.4142252330250549  …  0.6517425913240111, 0.01713263102740481, 0.7175499403837856, 0.7362894157420817, 0.24893665902538054, 0.41499951381631595, 0.2159527717429719, 0.8966879835264249, 0.87252430655793, 0.41461921031276117], Column3 = [0.5939320702328891, 0.19329886972497456, 0.04656947038518311, 0.22095698685781184, 0.678807659662497, 0.12720198818430306, 0.6795750371448686, 0.9314917999820301, 0.22920734893984274, 0.5148148980955375  …  0.55049773593343, 0.038576459283091946, 0.27765727942909757, 0.2753072414696357, 0.8823620780359746, 0.44831794170895023, 0.9073846432163745, 0.4648550947905655, 0.311984726769037, 0.25829997798611304], Column4 = [0.12253944650540982, 0.8259140842535423, 0.4034477332184384, 0.5279399406265695, 0.5579944087437719, 0.24650366028608328, 0.6874897000162434, 0.23391406844015605, 0.5641254897013973, 0.6250622796341656  …  0.21708181942178983, 0.35224683896541464, 0.8444113778983325, 0.4547214584884428, 0.13508852017592232, 0.9510137735662383, 0.5723463533029658, 0.626377972762265, 0.7854013810594317, 0.15394691114473347], Column5 = [0.47958743625921163, 0.45779753417165514, 0.6367059235247621, 0.8601116026079643, 0.3334020182022719, 0.41593698717526373, 0.13208968772625174, 0.16951044109747648, 0.8137887839507706, 0.4429229861115882  …  0.01308976221980429, 0.48597926808091163, 0.20768781798463476, 0.30045611276046247, 0.15759293576302558, 0.975806377881983, 0.19451065500145392, 0.9638103356367584, 0.3594043445295293, 0.7792867217495332], Column6 = [3.0, 3.0, 1.0, 3.0, 1.0, 2.0, 3.0, 2.0, 3.0, 3.0  …  3.0, 2.0, 1.0, 2.0, 1.0, 2.0, 2.0, 3.0, 3.0, 1.0], Column7 = [2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0  …  2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 1.0]), CategoricalArrays.CategoricalValue{Int64, UInt32}[0, 0, 0, 0, 0, 0, 0, 0, 1, 0  …  0, 0, 1, 0, 1, 0, 0, 0, 0, 0])"
+       "((Column1 = [0.564, 0.862, 0.793, 0.505, 0.683, 0.699, 0.545, 0.693, 0.95, 0.44  …  0.423, 0.632, 0.922, 0.592, 0.944, 0.517, 0.785, 0.579, 0.725, 0.711], Column2 = [0.42, 0.715, 0.358, -0.009, 0.228, 0.725, 0.786, 0.52, 0.646, 0.582  …  0.65, 0.633, 0.263, 0.141, 0.472, 0.45, -0.019, 0.593, 0.777, 0.877], Column3 = [0.638, 0.719, 0.716, 0.604, 0.616, 0.784, 0.697, 0.711, 0.878, 0.739  …  0.722, 0.672, 0.879, 0.598, 0.879, 0.669, 0.728, 0.768, 0.736, 0.725], Column4 = [0.29, 0.164, 0.164, 0.262, 0.246, 0.211, 0.155, 0.03, 1.842, 0.324  …  0.192, 0.143, 1.323, 0.251, 1.084, 0.165, 0.138, 0.176, 0.155, 0.217], Column5 = [0.605, 0.287, 0.565, 0.121, 0.752, 0.317, 0.165, 0.497, 0.361, 0.293  …  0.726, 0.781, 0.694, 0.728, 0.692, 0.351, 0.089, 0.478, 0.067, -0.19], Column6 = [2.0, 1.0, 3.0, 1.0, 3.0, 1.0, 3.0, 2.0, 2.0, 3.0  …  1.0, 3.0, 2.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0], Column7 = [2.0, 2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0  …  1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0]), CategoricalArrays.CategoricalValue{Int64, UInt32}[0, 0, 0, 0, 0, 0, 0, 0, 1, 0  …  0, 0, 1, 0, 1, 0, 0, 0, 0, 0])"
       ]
      },
      "metadata": {},
      "output_type": "display_data"
     }
    ],
    "source": [
-    "X, y = generate_imbalanced_data(100, 5; cat_feats_num_vals = [3, 2], \n",
-    "                                        probs = [0.9, 0.1], \n",
+    "X, y = generate_imbalanced_data(100, 5; num_vals_per_category = [3, 2], \n",
+    "                                        class_probs = [0.9, 0.1], \n",
     "                                        type = \"ColTable\", \n",
     "                                        rng=42)"
    ]
@@ -73,6 +73,15 @@
       "WARNING: using StaticArrays.setindex in module FiniteDiff conflicts with an existing identifier.\n"
      ]
     },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "┌ Warning: The call to compilecache failed to create a usable precompiled cache file for MLJLinearModels [6ee0df7b-362f-4a72-a706-9e79364fb692]\n",
+      "│   exception = ErrorException(\"Required dependency Optim [429524aa-4258-5aef-a3af-852621145aeb] failed to load from a cache file.\")\n",
+      "└ @ Base loading.jl:1349\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
@@ -108,7 +117,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -127,26 +136,26 @@
      "data": {
       "text/plain": [
        "100-element CategoricalDistributions.UnivariateFiniteVector{Multiclass{2}, Int64, UInt32, Float64}:\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.928, 1=>0.0722)\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.845, 1=>0.155)\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.749, 1=>0.251)\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.902, 1=>0.0977)\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.804, 1=>0.196)\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.864, 1=>0.136)\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.851, 1=>0.149)\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.954, 1=>0.0458)\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.853, 1=>0.147)\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.86, 1=>0.14)\n",
+       " UnivariateFinite{Multiclass{2}}(0=>1.0, 1=>0.0)\n",
+       " UnivariateFinite{Multiclass{2}}(0=>1.0, 1=>0.0)\n",
+       " UnivariateFinite{Multiclass{2}}(0=>1.0, 1=>0.0)\n",
+       " UnivariateFinite{Multiclass{2}}(0=>1.0, 1=>0.0)\n",
+       " UnivariateFinite{Multiclass{2}}(0=>1.0, 1=>0.0)\n",
+       " UnivariateFinite{Multiclass{2}}(0=>1.0, 1=>0.0)\n",
+       " UnivariateFinite{Multiclass{2}}(0=>1.0, 1=>0.0)\n",
+       " UnivariateFinite{Multiclass{2}}(0=>1.0, 1=>0.0)\n",
+       " UnivariateFinite{Multiclass{2}}(0=>0.0, 1=>1.0)\n",
+       " UnivariateFinite{Multiclass{2}}(0=>1.0, 1=>0.0)\n",
        " ⋮\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.671, 1=>0.329)\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.73, 1=>0.27)\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.843, 1=>0.157)\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.941, 1=>0.0594)\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.872, 1=>0.128)\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.92, 1=>0.0797)\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.929, 1=>0.0714)\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.791, 1=>0.209)\n",
-       " UnivariateFinite{Multiclass{2}}(0=>0.827, 1=>0.173)"
+       " UnivariateFinite{Multiclass{2}}(0=>1.0, 1=>0.0)\n",
+       " UnivariateFinite{Multiclass{2}}(0=>0.0, 1=>1.0)\n",
+       " UnivariateFinite{Multiclass{2}}(0=>1.0, 1=>0.0)\n",
+       " UnivariateFinite{Multiclass{2}}(0=>0.0, 1=>1.0)\n",
+       " UnivariateFinite{Multiclass{2}}(0=>1.0, 1=>0.0)\n",
+       " UnivariateFinite{Multiclass{2}}(0=>1.0, 1=>0.0)\n",
+       " UnivariateFinite{Multiclass{2}}(0=>1.0, 1=>0.0)\n",
+       " UnivariateFinite{Multiclass{2}}(0=>1.0, 1=>0.0)\n",
+       " UnivariateFinite{Multiclass{2}}(0=>1.0, 1=>0.0)"
       ]
      },
      "metadata": {},