maks-sh
diff --git a/‎notebooks/RetailHero.ipynb‎
Lines changed: 38 additions & 33 deletions b/‎notebooks/RetailHero.ipynb‎
Lines changed: 38 additions & 33 deletions
diff --git a/‎notebooks/RetailHero_EN.ipynb‎
Lines changed: 37 additions & 32 deletions b/‎notebooks/RetailHero_EN.ipynb‎
Lines changed: 37 additions & 32 deletions
@@ -107,8 +107,8 @@
    "execution_count": 1,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:24:37.492036Z",
-     "start_time": "2020-04-28T09:24:37.488584Z"
+     "end_time": "2020-05-02T17:07:22.372375Z",
+     "start_time": "2020-05-02T17:07:22.368436Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -126,7 +126,7 @@
     "urllib.request.urlretrieve(url, '/content/retail_hero.zip')\n",
     "\n",
     "!unzip /content/retail_hero.zip\n",
-    "!pip install scikit-uplift==0.1.1 catboost=0.22"
+    "!pip install scikit-uplift==0.1.2 catboost=0.22"
    ]
   },
   {
@@ -144,8 +144,8 @@
    "execution_count": 2,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:24:40.783897Z",
-     "start_time": "2020-04-28T09:24:37.503470Z"
+     "end_time": "2020-05-02T17:07:25.384054Z",
+     "start_time": "2020-05-02T17:07:22.383222Z"
     },
     "colab": {},
     "colab_type": "code",
@@ -196,8 +196,8 @@
    "execution_count": 3,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:24:42.100096Z",
-     "start_time": "2020-04-28T09:24:40.786498Z"
+     "end_time": "2020-05-02T17:07:26.483716Z",
+     "start_time": "2020-05-02T17:07:25.386480Z"
     },
     "colab": {},
     "colab_type": "code",
@@ -251,8 +251,8 @@
    "execution_count": 4,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:24:44.483576Z",
-     "start_time": "2020-04-28T09:24:42.102707Z"
+     "end_time": "2020-05-02T17:07:28.491581Z",
+     "start_time": "2020-05-02T17:07:26.486312Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -313,8 +313,8 @@
    "execution_count": 5,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:24:44.511016Z",
-     "start_time": "2020-04-28T09:24:44.486035Z"
+     "end_time": "2020-05-02T17:07:28.514717Z",
+     "start_time": "2020-05-02T17:07:28.494500Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -484,8 +484,8 @@
    "execution_count": 6,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:24:45.715602Z",
-     "start_time": "2020-04-28T09:24:44.514353Z"
+     "end_time": "2020-05-02T17:07:29.570605Z",
+     "start_time": "2020-05-02T17:07:28.518362Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -545,8 +545,8 @@
    "execution_count": 7,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:24:47.941480Z",
-     "start_time": "2020-04-28T09:24:45.719641Z"
+     "end_time": "2020-05-02T17:07:31.489869Z",
+     "start_time": "2020-05-02T17:07:29.572733Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -632,8 +632,8 @@
    "execution_count": 8,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:24:50.571779Z",
-     "start_time": "2020-04-28T09:24:47.944822Z"
+     "end_time": "2020-05-02T17:07:33.865281Z",
+     "start_time": "2020-05-02T17:07:31.494251Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -694,8 +694,8 @@
    "execution_count": 9,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:24:52.942803Z",
-     "start_time": "2020-04-28T09:24:50.576741Z"
+     "end_time": "2020-05-02T17:07:36.709646Z",
+     "start_time": "2020-05-02T17:07:33.871512Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -758,8 +758,8 @@
    "execution_count": 10,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:24:52.964396Z",
-     "start_time": "2020-04-28T09:24:52.945544Z"
+     "end_time": "2020-05-02T17:07:36.726223Z",
+     "start_time": "2020-05-02T17:07:36.712564Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -858,8 +858,8 @@
    "execution_count": 11,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:24:56.505700Z",
-     "start_time": "2020-04-28T09:24:53.019392Z"
+     "end_time": "2020-05-02T17:07:39.436995Z",
+     "start_time": "2020-05-02T17:07:36.729508Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -874,7 +874,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/Users/Maksim/Library/Python/3.6/lib/python/site-packages/ipykernel_launcher.py:2: UserWarning: It is recommended to use this approach on treatment balanced data. Current sample size is unbalanced.\n",
+      "/Users/Maksim/Library/Python/3.6/lib/python/site-packages/ipykernel_launcher.py:6: UserWarning: It is recommended to use this approach on treatment balanced data. Current sample size is unbalanced.\n",
       "  \n"
      ]
     },
@@ -891,10 +891,15 @@
     }
    ],
    "source": [
-    "cm_full = ClassTransformation(CatBoostClassifier(iterations=20, thread_count=2, random_state=42, silent=True))\n",
-    "cm_full = cm_full.fit(X_train_full, y_train_full, treat_train_full, estimator_fit_params={'cat_features': [1]})\n",
+    "ct_full = ClassTransformation(CatBoostClassifier(iterations=20, thread_count=2, random_state=42, silent=True))\n",
+    "ct_full = ct_full.fit(\n",
+    "    X_train_full, \n",
+    "    y_train_full, \n",
+    "    treat_train_full, \n",
+    "    estimator_fit_params={'cat_features': cat_features}\n",
+    ")\n",
     "\n",
-    "X_test.loc[:, 'uplift'] = cm_full.predict(X_test.values)\n",
+    "X_test.loc[:, 'uplift'] = ct_full.predict(X_test.values)\n",
     "\n",
     "sub = X_test[['uplift']].to_csv('sub1.csv')\n",
     "\n",
@@ -906,8 +911,8 @@
    "execution_count": 12,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:24:56.560018Z",
-     "start_time": "2020-04-28T09:24:56.508541Z"
+     "end_time": "2020-05-02T17:07:39.478855Z",
+     "start_time": "2020-05-02T17:07:39.440546Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -988,12 +993,12 @@
     }
    ],
    "source": [
-    "cm_full_fi = pd.DataFrame({\n",
-    "    'feature_name': cm_full.estimator.feature_names_,\n",
-    "    'feature_score': cm_full.estimator.feature_importances_\n",
+    "ct_full_fi = pd.DataFrame({\n",
+    "    'feature_name': ct_full.estimator.feature_names_,\n",
+    "    'feature_score': ct_full.estimator.feature_importances_\n",
     "}).sort_values('feature_score', ascending=False).reset_index(drop=True)\n",
     "\n",
-    "cm_full_fi"
+    "ct_full_fi"
    ]
   },
   {
 
@@ -98,8 +98,8 @@
    "execution_count": 1,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:26:59.973637Z",
-     "start_time": "2020-04-28T09:26:59.969856Z"
+     "end_time": "2020-05-02T17:03:42.686542Z",
+     "start_time": "2020-05-02T17:03:42.682766Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -117,7 +117,7 @@
     "urllib.request.urlretrieve(url, '/content/retail_hero.zip')\n",
     "\n",
     "!unzip /content/retail_hero.zip\n",
-    "!pip install scikit-uplift==0.1.1 catboost=0.22"
+    "!pip install scikit-uplift==0.1.2 catboost=0.22"
    ]
   },
   {
@@ -135,8 +135,8 @@
    "execution_count": 2,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:27:03.353098Z",
-     "start_time": "2020-04-28T09:26:59.984369Z"
+     "end_time": "2020-05-02T17:03:45.696618Z",
+     "start_time": "2020-05-02T17:03:42.697098Z"
     },
     "colab": {},
     "colab_type": "code",
@@ -187,8 +187,8 @@
    "execution_count": 3,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:27:04.563554Z",
-     "start_time": "2020-04-28T09:27:03.355432Z"
+     "end_time": "2020-05-02T17:03:46.792933Z",
+     "start_time": "2020-05-02T17:03:45.698939Z"
     },
     "colab": {},
     "colab_type": "code",
@@ -241,8 +241,8 @@
    "execution_count": 4,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:27:06.789462Z",
-     "start_time": "2020-04-28T09:27:04.570306Z"
+     "end_time": "2020-05-02T17:03:48.848922Z",
+     "start_time": "2020-05-02T17:03:46.795574Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -305,8 +305,8 @@
    "execution_count": 5,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:27:06.813310Z",
-     "start_time": "2020-04-28T09:27:06.792837Z"
+     "end_time": "2020-05-02T17:03:48.871869Z",
+     "start_time": "2020-05-02T17:03:48.851856Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -476,8 +476,8 @@
    "execution_count": 6,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:27:07.960855Z",
-     "start_time": "2020-04-28T09:27:06.816440Z"
+     "end_time": "2020-05-02T17:03:49.905064Z",
+     "start_time": "2020-05-02T17:03:48.875206Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -538,8 +538,8 @@
    "execution_count": 7,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:27:10.062915Z",
-     "start_time": "2020-04-28T09:27:07.972200Z"
+     "end_time": "2020-05-02T17:03:51.782971Z",
+     "start_time": "2020-05-02T17:03:49.907481Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -627,8 +627,8 @@
    "execution_count": 8,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:27:12.177386Z",
-     "start_time": "2020-04-28T09:27:10.065653Z"
+     "end_time": "2020-05-02T17:03:53.820694Z",
+     "start_time": "2020-05-02T17:03:51.785856Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -689,8 +689,8 @@
    "execution_count": 9,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:27:14.459477Z",
-     "start_time": "2020-04-28T09:27:12.181749Z"
+     "end_time": "2020-05-02T17:03:56.085589Z",
+     "start_time": "2020-05-02T17:03:53.824592Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -753,8 +753,8 @@
    "execution_count": 10,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:27:14.480794Z",
-     "start_time": "2020-04-28T09:27:14.463302Z"
+     "end_time": "2020-05-02T17:03:56.140144Z",
+     "start_time": "2020-05-02T17:03:56.091196Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -853,8 +853,8 @@
    "execution_count": 11,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:27:17.097949Z",
-     "start_time": "2020-04-28T09:27:14.483691Z"
+     "end_time": "2020-05-02T17:03:59.864286Z",
+     "start_time": "2020-05-02T17:03:56.151623Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -886,10 +886,15 @@
     }
    ],
    "source": [
-    "cm_full = ClassTransformation(CatBoostClassifier(iterations=20, thread_count=2, random_state=42, silent=True))\n",
-    "cm_full = cm_full.fit(X_train_full, y_train_full, treat_train_full, estimator_fit_params={'cat_features': [1]})\n",
+    "ct_full = ClassTransformation(CatBoostClassifier(iterations=20, thread_count=2, random_state=42, silent=True))\n",
+    "ct_full = ct_full.fit(\n",
+    "    X_train_full, \n",
+    "    y_train_full, \n",
+    "    treat_train_full, \n",
+    "    estimator_fit_params={'cat_features': cat_features}\n",
+    ")\n",
     "\n",
-    "X_test.loc[:, 'uplift'] = cm_full.predict(X_test.values)\n",
+    "X_test.loc[:, 'uplift'] = ct_full.predict(X_test.values)\n",
     "\n",
     "sub = X_test[['uplift']].to_csv('sub1.csv')\n",
     "\n",
@@ -901,8 +906,8 @@
    "execution_count": 12,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2020-04-28T09:27:17.138755Z",
-     "start_time": "2020-04-28T09:27:17.101433Z"
+     "end_time": "2020-05-02T17:03:59.898275Z",
+     "start_time": "2020-05-02T17:03:59.868331Z"
     },
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -983,12 +988,12 @@
     }
    ],
    "source": [
-    "cm_full_fi = pd.DataFrame({\n",
-    "    'feature_name': cm_full.estimator.feature_names_,\n",
-    "    'feature_score': cm_full.estimator.feature_importances_\n",
+    "ct_full = pd.DataFrame({\n",
+    "    'feature_name': ct_full.estimator.feature_names_,\n",
+    "    'feature_score': ct_full.estimator.feature_importances_\n",
     "}).sort_values('feature_score', ascending=False).reset_index(drop=True)\n",
     "\n",
-    "cm_full_fi"
+    "ct_full"
    ]
   },
   {