maks-sh
diff --git a/‎.github/PULL_REQUEST_TEMPLATE/pull_request_template.md‎ renamed to ‎.github/pull_request_template.md‎ b/‎.github/PULL_REQUEST_TEMPLATE/pull_request_template.md‎ renamed to ‎.github/pull_request_template.md‎
diff --git a/‎notebooks/pipeline_usage_EN.ipynb‎
Lines changed: 44 additions & 88 deletions b/‎notebooks/pipeline_usage_EN.ipynb‎
Lines changed: 44 additions & 88 deletions
@@ -51,41 +51,15 @@
    "execution_count": 1,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-05-30T22:38:40.696778Z",
-     "start_time": "2020-05-30T22:38:40.692482Z"
+     "end_time": "2021-02-07T01:01:39.897817Z",
+     "start_time": "2021-02-07T01:01:39.890409Z"
     }
    },
    "outputs": [],
    "source": [
     "!pip install scikit-uplift xgboost==1.0.2 category_encoders==2.1.0 -U"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Secondly, load the data:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2020-05-30T22:38:40.705782Z",
-     "start_time": "2020-05-30T22:38:40.701316Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "import urllib.request\n",
-    "\n",
-    "\n",
-    "csv_path = '/content/Hilstorm.csv'\n",
-    "url = 'http://www.minethatdata.com/Kevin_Hillstrom_MineThatData_E-MailAnalytics_DataMiningChallenge_2008.03.20.csv'\n",
-    "urllib.request.urlretrieve(url, csv_path)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -99,20 +73,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-05-30T22:38:41.739525Z",
-     "start_time": "2020-05-30T22:38:40.711390Z"
-    }
+     "end_time": "2021-02-07T01:01:42.438253Z",
+     "start_time": "2021-02-07T01:01:39.901510Z"
+    },
+    "scrolled": true
    },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Shape of the dataset before processing: (64000, 12)\n",
-      "Shape of the dataset after processing: (42693, 10)\n"
+      "Shape of the dataset before processing: (64000, 8)\n",
+      "Shape of the dataset after processing: (42693, 8)\n"
      ]
     },
     {
@@ -144,8 +119,6 @@
        "      <th>zip_code</th>\n",
        "      <th>newbie</th>\n",
        "      <th>channel</th>\n",
-       "      <th>visit</th>\n",
-       "      <th>treatment</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -159,8 +132,6 @@
        "      <td>Surburban</td>\n",
        "      <td>0</td>\n",
        "      <td>Phone</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -172,8 +143,6 @@
        "      <td>Rural</td>\n",
        "      <td>1</td>\n",
        "      <td>Web</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -185,8 +154,6 @@
        "      <td>Surburban</td>\n",
        "      <td>1</td>\n",
        "      <td>Web</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -198,8 +165,6 @@
        "      <td>Urban</td>\n",
        "      <td>0</td>\n",
        "      <td>Web</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
@@ -211,49 +176,46 @@
        "      <td>Surburban</td>\n",
        "      <td>0</td>\n",
        "      <td>Phone</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "   recency history_segment  history  mens  womens   zip_code  newbie channel  \\\n",
-       "0       10  2) $100 - $200   142.44     1       0  Surburban       0   Phone   \n",
-       "1        6  3) $200 - $350   329.08     1       1      Rural       1     Web   \n",
-       "2        7  2) $100 - $200   180.65     0       1  Surburban       1     Web   \n",
-       "4        2    1) $0 - $100    45.34     1       0      Urban       0     Web   \n",
-       "5        6  2) $100 - $200   134.83     0       1  Surburban       0   Phone   \n",
-       "\n",
-       "   visit  treatment  \n",
-       "0      0          1  \n",
-       "1      0          0  \n",
-       "2      0          1  \n",
-       "4      0          1  \n",
-       "5      1          1  "
+       "   recency history_segment  history  mens  womens   zip_code  newbie channel\n",
+       "0       10  2) $100 - $200   142.44     1       0  Surburban       0   Phone\n",
+       "1        6  3) $200 - $350   329.08     1       1      Rural       1     Web\n",
+       "2        7  2) $100 - $200   180.65     0       1  Surburban       1     Web\n",
+       "4        2    1) $0 - $100    45.34     1       0      Urban       0     Web\n",
+       "5        6  2) $100 - $200   134.83     0       1  Surburban       0   Phone"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "import pandas as pd\n",
+    "from sklift.datasets import fetch_hillstrom\n",
     "\n",
     "\n",
     "%matplotlib inline\n",
     "\n",
-    "dataset = pd.read_csv(csv_path)\n",
+    "bunch = fetch_hillstrom(target_col='visit')\n",
+    "\n",
+    "dataset, target, treatment = bunch['data'], bunch['target'], bunch['treatment']\n",
+    "\n",
     "print(f'Shape of the dataset before processing: {dataset.shape}')\n",
-    "dataset = dataset[dataset['segment']!='Mens E-Mail']\n",
-    "dataset.loc[:, 'treatment'] = dataset['segment'].map({\n",
+    "\n",
+    "# Selecting two segments\n",
+    "dataset = dataset[treatment!='Mens E-Mail']\n",
+    "target = target[treatment!='Mens E-Mail']\n",
+    "treatment = treatment[treatment!='Mens E-Mail'].map({\n",
     "    'Womens E-Mail': 1,\n",
     "    'No E-Mail': 0\n",
     "})\n",
     "\n",
-    "dataset = dataset.drop(['segment', 'conversion', 'spend'], axis=1)\n",
     "print(f'Shape of the dataset after processing: {dataset.shape}')\n",
     "dataset.head()"
    ]
@@ -267,27 +229,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-05-30T22:38:42.307545Z",
-     "start_time": "2020-05-30T22:38:41.743319Z"
+     "end_time": "2021-02-07T01:01:42.579775Z",
+     "start_time": "2021-02-07T01:01:42.442595Z"
     }
    },
    "outputs": [],
    "source": [
     "from sklearn.model_selection import train_test_split\n",
     "\n",
     "\n",
-    "Xyt_tr, Xyt_val = train_test_split(dataset, test_size=0.5, random_state=42)\n",
-    "\n",
-    "X_tr = Xyt_tr.drop(['visit', 'treatment'], axis=1)\n",
-    "y_tr = Xyt_tr['visit']\n",
-    "treat_tr = Xyt_tr['treatment']\n",
-    "\n",
-    "X_val = Xyt_val.drop(['visit', 'treatment'], axis=1)\n",
-    "y_val = Xyt_val['visit']\n",
-    "treat_val = Xyt_val['treatment']"
+    "X_tr, X_val, y_tr, y_val, treat_tr, treat_val = train_test_split(\n",
+    "    dataset, target, treatment, test_size=0.5, random_state=42\n",
+    ")"
    ]
   },
   {
@@ -299,11 +255,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-05-30T22:38:42.330862Z",
-     "start_time": "2020-05-30T22:38:42.310277Z"
+     "end_time": "2021-02-07T01:01:42.600915Z",
+     "start_time": "2021-02-07T01:01:42.585066Z"
     }
    },
    "outputs": [
@@ -329,11 +285,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-05-30T22:38:42.430704Z",
-     "start_time": "2020-05-30T22:38:42.333721Z"
+     "end_time": "2021-02-07T01:01:42.703537Z",
+     "start_time": "2021-02-07T01:01:42.603875Z"
     }
    },
    "outputs": [],
@@ -363,11 +319,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-05-30T22:38:43.630594Z",
-     "start_time": "2020-05-30T22:38:42.433041Z"
+     "end_time": "2021-02-07T01:01:44.020040Z",
+     "start_time": "2021-02-07T01:01:42.707311Z"
     }
    },
    "outputs": [
@@ -402,11 +358,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-05-30T22:38:43.777122Z",
-     "start_time": "2020-05-30T22:38:43.632881Z"
+     "end_time": "2021-02-07T01:01:44.184968Z",
+     "start_time": "2021-02-07T01:01:44.047865Z"
     }
    },
    "outputs": [