|
107 | 107 | "execution_count": 1, |
108 | 108 | "metadata": { |
109 | 109 | "ExecuteTime": { |
110 | | - "end_time": "2020-04-28T09:24:37.492036Z", |
111 | | - "start_time": "2020-04-28T09:24:37.488584Z" |
| 110 | + "end_time": "2020-05-02T17:07:22.372375Z", |
| 111 | + "start_time": "2020-05-02T17:07:22.368436Z" |
112 | 112 | }, |
113 | 113 | "colab": { |
114 | 114 | "base_uri": "https://localhost:8080/", |
|
126 | 126 | "urllib.request.urlretrieve(url, '/content/retail_hero.zip')\n", |
127 | 127 | "\n", |
128 | 128 | "!unzip /content/retail_hero.zip\n", |
129 | | - "!pip install scikit-uplift==0.1.1 catboost=0.22" |
| 129 | + "!pip install scikit-uplift==0.1.2 catboost=0.22" |
130 | 130 | ] |
131 | 131 | }, |
132 | 132 | { |
|
144 | 144 | "execution_count": 2, |
145 | 145 | "metadata": { |
146 | 146 | "ExecuteTime": { |
147 | | - "end_time": "2020-04-28T09:24:40.783897Z", |
148 | | - "start_time": "2020-04-28T09:24:37.503470Z" |
| 147 | + "end_time": "2020-05-02T17:07:25.384054Z", |
| 148 | + "start_time": "2020-05-02T17:07:22.383222Z" |
149 | 149 | }, |
150 | 150 | "colab": {}, |
151 | 151 | "colab_type": "code", |
|
196 | 196 | "execution_count": 3, |
197 | 197 | "metadata": { |
198 | 198 | "ExecuteTime": { |
199 | | - "end_time": "2020-04-28T09:24:42.100096Z", |
200 | | - "start_time": "2020-04-28T09:24:40.786498Z" |
| 199 | + "end_time": "2020-05-02T17:07:26.483716Z", |
| 200 | + "start_time": "2020-05-02T17:07:25.386480Z" |
201 | 201 | }, |
202 | 202 | "colab": {}, |
203 | 203 | "colab_type": "code", |
|
251 | 251 | "execution_count": 4, |
252 | 252 | "metadata": { |
253 | 253 | "ExecuteTime": { |
254 | | - "end_time": "2020-04-28T09:24:44.483576Z", |
255 | | - "start_time": "2020-04-28T09:24:42.102707Z" |
| 254 | + "end_time": "2020-05-02T17:07:28.491581Z", |
| 255 | + "start_time": "2020-05-02T17:07:26.486312Z" |
256 | 256 | }, |
257 | 257 | "colab": { |
258 | 258 | "base_uri": "https://localhost:8080/", |
|
313 | 313 | "execution_count": 5, |
314 | 314 | "metadata": { |
315 | 315 | "ExecuteTime": { |
316 | | - "end_time": "2020-04-28T09:24:44.511016Z", |
317 | | - "start_time": "2020-04-28T09:24:44.486035Z" |
| 316 | + "end_time": "2020-05-02T17:07:28.514717Z", |
| 317 | + "start_time": "2020-05-02T17:07:28.494500Z" |
318 | 318 | }, |
319 | 319 | "colab": { |
320 | 320 | "base_uri": "https://localhost:8080/", |
|
484 | 484 | "execution_count": 6, |
485 | 485 | "metadata": { |
486 | 486 | "ExecuteTime": { |
487 | | - "end_time": "2020-04-28T09:24:45.715602Z", |
488 | | - "start_time": "2020-04-28T09:24:44.514353Z" |
| 487 | + "end_time": "2020-05-02T17:07:29.570605Z", |
| 488 | + "start_time": "2020-05-02T17:07:28.518362Z" |
489 | 489 | }, |
490 | 490 | "colab": { |
491 | 491 | "base_uri": "https://localhost:8080/", |
|
545 | 545 | "execution_count": 7, |
546 | 546 | "metadata": { |
547 | 547 | "ExecuteTime": { |
548 | | - "end_time": "2020-04-28T09:24:47.941480Z", |
549 | | - "start_time": "2020-04-28T09:24:45.719641Z" |
| 548 | + "end_time": "2020-05-02T17:07:31.489869Z", |
| 549 | + "start_time": "2020-05-02T17:07:29.572733Z" |
550 | 550 | }, |
551 | 551 | "colab": { |
552 | 552 | "base_uri": "https://localhost:8080/", |
|
632 | 632 | "execution_count": 8, |
633 | 633 | "metadata": { |
634 | 634 | "ExecuteTime": { |
635 | | - "end_time": "2020-04-28T09:24:50.571779Z", |
636 | | - "start_time": "2020-04-28T09:24:47.944822Z" |
| 635 | + "end_time": "2020-05-02T17:07:33.865281Z", |
| 636 | + "start_time": "2020-05-02T17:07:31.494251Z" |
637 | 637 | }, |
638 | 638 | "colab": { |
639 | 639 | "base_uri": "https://localhost:8080/", |
|
694 | 694 | "execution_count": 9, |
695 | 695 | "metadata": { |
696 | 696 | "ExecuteTime": { |
697 | | - "end_time": "2020-04-28T09:24:52.942803Z", |
698 | | - "start_time": "2020-04-28T09:24:50.576741Z" |
| 697 | + "end_time": "2020-05-02T17:07:36.709646Z", |
| 698 | + "start_time": "2020-05-02T17:07:33.871512Z" |
699 | 699 | }, |
700 | 700 | "colab": { |
701 | 701 | "base_uri": "https://localhost:8080/", |
|
758 | 758 | "execution_count": 10, |
759 | 759 | "metadata": { |
760 | 760 | "ExecuteTime": { |
761 | | - "end_time": "2020-04-28T09:24:52.964396Z", |
762 | | - "start_time": "2020-04-28T09:24:52.945544Z" |
| 761 | + "end_time": "2020-05-02T17:07:36.726223Z", |
| 762 | + "start_time": "2020-05-02T17:07:36.712564Z" |
763 | 763 | }, |
764 | 764 | "colab": { |
765 | 765 | "base_uri": "https://localhost:8080/", |
|
858 | 858 | "execution_count": 11, |
859 | 859 | "metadata": { |
860 | 860 | "ExecuteTime": { |
861 | | - "end_time": "2020-04-28T09:24:56.505700Z", |
862 | | - "start_time": "2020-04-28T09:24:53.019392Z" |
| 861 | + "end_time": "2020-05-02T17:07:39.436995Z", |
| 862 | + "start_time": "2020-05-02T17:07:36.729508Z" |
863 | 863 | }, |
864 | 864 | "colab": { |
865 | 865 | "base_uri": "https://localhost:8080/", |
|
874 | 874 | "name": "stderr", |
875 | 875 | "output_type": "stream", |
876 | 876 | "text": [ |
877 | | - "/Users/Maksim/Library/Python/3.6/lib/python/site-packages/ipykernel_launcher.py:2: UserWarning: It is recommended to use this approach on treatment balanced data. Current sample size is unbalanced.\n", |
| 877 | + "/Users/Maksim/Library/Python/3.6/lib/python/site-packages/ipykernel_launcher.py:6: UserWarning: It is recommended to use this approach on treatment balanced data. Current sample size is unbalanced.\n", |
878 | 878 | " \n" |
879 | 879 | ] |
880 | 880 | }, |
|
891 | 891 | } |
892 | 892 | ], |
893 | 893 | "source": [ |
894 | | - "cm_full = ClassTransformation(CatBoostClassifier(iterations=20, thread_count=2, random_state=42, silent=True))\n", |
895 | | - "cm_full = cm_full.fit(X_train_full, y_train_full, treat_train_full, estimator_fit_params={'cat_features': [1]})\n", |
| 894 | + "ct_full = ClassTransformation(CatBoostClassifier(iterations=20, thread_count=2, random_state=42, silent=True))\n", |
| 895 | + "ct_full = ct_full.fit(\n", |
| 896 | + " X_train_full, \n", |
| 897 | + " y_train_full, \n", |
| 898 | + " treat_train_full, \n", |
| 899 | + " estimator_fit_params={'cat_features': cat_features}\n", |
| 900 | + ")\n", |
896 | 901 | "\n", |
897 | | - "X_test.loc[:, 'uplift'] = cm_full.predict(X_test.values)\n", |
| 902 | + "X_test.loc[:, 'uplift'] = ct_full.predict(X_test.values)\n", |
898 | 903 | "\n", |
899 | 904 | "sub = X_test[['uplift']].to_csv('sub1.csv')\n", |
900 | 905 | "\n", |
|
906 | 911 | "execution_count": 12, |
907 | 912 | "metadata": { |
908 | 913 | "ExecuteTime": { |
909 | | - "end_time": "2020-04-28T09:24:56.560018Z", |
910 | | - "start_time": "2020-04-28T09:24:56.508541Z" |
| 914 | + "end_time": "2020-05-02T17:07:39.478855Z", |
| 915 | + "start_time": "2020-05-02T17:07:39.440546Z" |
911 | 916 | }, |
912 | 917 | "colab": { |
913 | 918 | "base_uri": "https://localhost:8080/", |
|
988 | 993 | } |
989 | 994 | ], |
990 | 995 | "source": [ |
991 | | - "cm_full_fi = pd.DataFrame({\n", |
992 | | - " 'feature_name': cm_full.estimator.feature_names_,\n", |
993 | | - " 'feature_score': cm_full.estimator.feature_importances_\n", |
| 996 | + "ct_full_fi = pd.DataFrame({\n", |
| 997 | + " 'feature_name': ct_full.estimator.feature_names_,\n", |
| 998 | + " 'feature_score': ct_full.estimator.feature_importances_\n", |
994 | 999 | "}).sort_values('feature_score', ascending=False).reset_index(drop=True)\n", |
995 | 1000 | "\n", |
996 | | - "cm_full_fi" |
| 1001 | + "ct_full_fi" |
997 | 1002 | ] |
998 | 1003 | }, |
999 | 1004 | { |
|
0 commit comments