1
+ {
2
+ "nbformat" : 4 ,
3
+ "nbformat_minor" : 0 ,
4
+ "metadata" : {
5
+ "colab" : {
6
+ "provenance" : []
7
+ },
8
+ "kernelspec" : {
9
+ "name" : " python3" ,
10
+ "display_name" : " Python 3"
11
+ },
12
+ "language_info" : {
13
+ "name" : " python"
14
+ }
15
+ },
16
+ "cells" : [
17
+ {
18
+ "cell_type" : " markdown" ,
19
+ "source" : [
20
+ " #Libraries"
21
+ ],
22
+ "metadata" : {
23
+ "id" : " XmjSOfm5C7Y3"
24
+ }
25
+ },
26
+ {
27
+ "cell_type" : " code" ,
28
+ "execution_count" : null ,
29
+ "metadata" : {
30
+ "colab" : {
31
+ "base_uri" : " https://localhost:8080/"
32
+ },
33
+ "id" : " 3syypoOe4SZ0" ,
34
+ "outputId" : " b319cd48-1f8c-46aa-8e76-721f90fb13b9"
35
+ },
36
+ "outputs" : [
37
+ {
38
+ "output_type" : " stream" ,
39
+ "name" : " stdout" ,
40
+ "text" : [
41
+ " Requirement already satisfied: ktrain in /usr/local/lib/python3.10/dist-packages (0.37.6)\n " ,
42
+ " Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.2.2)\n " ,
43
+ " Requirement already satisfied: matplotlib>=3.0.0 in /usr/local/lib/python3.10/dist-packages (from ktrain) (3.7.1)\n " ,
44
+ " Requirement already satisfied: pandas>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.5.3)\n " ,
45
+ " Requirement already satisfied: fastprogress>=0.1.21 in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.0.3)\n " ,
46
+ " Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from ktrain) (2.27.1)\n " ,
47
+ " Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.3.1)\n " ,
48
+ " Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from ktrain) (23.1)\n " ,
49
+ " Requirement already satisfied: langdetect in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.0.9)\n " ,
50
+ " Requirement already satisfied: jieba in /usr/local/lib/python3.10/dist-packages (from ktrain) (0.42.1)\n " ,
51
+ " Requirement already satisfied: cchardet in /usr/local/lib/python3.10/dist-packages (from ktrain) (2.1.7)\n " ,
52
+ " Requirement already satisfied: chardet in /usr/local/lib/python3.10/dist-packages (from ktrain) (4.0.0)\n " ,
53
+ " Requirement already satisfied: syntok>1.3.3 in /usr/local/lib/python3.10/dist-packages (from ktrain) (1.4.4)\n " ,
54
+ " Requirement already satisfied: tika in /usr/local/lib/python3.10/dist-packages (from ktrain) (2.6.0)\n " ,
55
+ " Requirement already satisfied: transformers>=4.17.0 in /usr/local/lib/python3.10/dist-packages (from ktrain) (4.31.0)\n " ,
56
+ " Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from ktrain) (0.1.99)\n " ,
57
+ " Requirement already satisfied: keras-bert>=0.86.0 in /usr/local/lib/python3.10/dist-packages (from ktrain) (0.89.0)\n " ,
58
+ " Requirement already satisfied: whoosh in /usr/local/lib/python3.10/dist-packages (from ktrain) (2.7.4)\n " ,
59
+ " Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from keras-bert>=0.86.0->ktrain) (1.22.4)\n " ,
60
+ " Requirement already satisfied: keras-transformer==0.40.0 in /usr/local/lib/python3.10/dist-packages (from keras-bert>=0.86.0->ktrain) (0.40.0)\n " ,
61
+ " Requirement already satisfied: keras-pos-embd==0.13.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.13.0)\n " ,
62
+ " Requirement already satisfied: keras-multi-head==0.29.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.29.0)\n " ,
63
+ " Requirement already satisfied: keras-layer-normalization==0.16.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.16.0)\n " ,
64
+ " Requirement already satisfied: keras-position-wise-feed-forward==0.8.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.8.0)\n " ,
65
+ " Requirement already satisfied: keras-embed-sim==0.10.0 in /usr/local/lib/python3.10/dist-packages (from keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.10.0)\n " ,
66
+ " Requirement already satisfied: keras-self-attention==0.51.0 in /usr/local/lib/python3.10/dist-packages (from keras-multi-head==0.29.0->keras-transformer==0.40.0->keras-bert>=0.86.0->ktrain) (0.51.0)\n " ,
67
+ " Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (1.1.0)\n " ,
68
+ " Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (0.11.0)\n " ,
69
+ " Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (4.41.0)\n " ,
70
+ " Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (1.4.4)\n " ,
71
+ " Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (8.4.0)\n " ,
72
+ " Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (3.1.0)\n " ,
73
+ " Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.0.0->ktrain) (2.8.2)\n " ,
74
+ " Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.0.1->ktrain) (2022.7.1)\n " ,
75
+ " Requirement already satisfied: regex>2016 in /usr/local/lib/python3.10/dist-packages (from syntok>1.3.3->ktrain) (2022.10.31)\n " ,
76
+ " Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (3.12.2)\n " ,
77
+ " Requirement already satisfied: huggingface-hub<1.0,>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (0.16.4)\n " ,
78
+ " Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (6.0.1)\n " ,
79
+ " Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (0.13.3)\n " ,
80
+ " Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (0.3.1)\n " ,
81
+ " Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.17.0->ktrain) (4.65.0)\n " ,
82
+ " Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from langdetect->ktrain) (1.16.0)\n " ,
83
+ " Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->ktrain) (1.26.16)\n " ,
84
+ " Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->ktrain) (2023.5.7)\n " ,
85
+ " Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->ktrain) (2.0.12)\n " ,
86
+ " Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->ktrain) (3.4)\n " ,
87
+ " Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->ktrain) (1.10.1)\n " ,
88
+ " Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->ktrain) (3.2.0)\n " ,
89
+ " Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from tika->ktrain) (67.7.2)\n " ,
90
+ " Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers>=4.17.0->ktrain) (2023.6.0)\n " ,
91
+ " Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers>=4.17.0->ktrain) (4.7.1)\n "
92
+ ]
93
+ }
94
+ ],
95
+ "source" : [
96
+ " !pip3 install ktrain"
97
+ ]
98
+ },
99
+ {
100
+ "cell_type" : " code" ,
101
+ "source" : [
102
+ " import os.path\n " ,
103
+ " import numpy as np\n " ,
104
+ " import ktrain\n " ,
105
+ " from ktrain import text\n " ,
106
+ " import tensorflow"
107
+ ],
108
+ "metadata" : {
109
+ "id" : " 0ZejN0MU6dnb"
110
+ },
111
+ "execution_count" : null ,
112
+ "outputs" : []
113
+ },
114
+ {
115
+ "cell_type" : " markdown" ,
116
+ "source" : [
117
+ " #Dataset"
118
+ ],
119
+ "metadata" : {
120
+ "id" : " oSJh43dYC_I4"
121
+ }
122
+ },
123
+ {
124
+ "cell_type" : " code" ,
125
+ "source" : [
126
+ " data=tensorflow.keras.utils.get_file(fname=\" aclImdb_v1.tar.gz\" ,origin=\" http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\" ,extract=True)"
127
+ ],
128
+ "metadata" : {
129
+ "id" : " navTD1Nu7NMH"
130
+ },
131
+ "execution_count" : null ,
132
+ "outputs" : []
133
+ },
134
+ {
135
+ "cell_type" : " code" ,
136
+ "source" : [
137
+ " dir=os.path.join(os.path.dirname(data),\" aclImdb\" )"
138
+ ],
139
+ "metadata" : {
140
+ "id" : " DJD9_h829wMX"
141
+ },
142
+ "execution_count" : null ,
143
+ "outputs" : []
144
+ },
145
+ {
146
+ "cell_type" : " code" ,
147
+ "source" : [
148
+ " (x_train,y_train),(x_test,y_test),preproc=text.texts_from_folder(datadir=dir,classes=[\" pos\" ,\" neg\" ],train_test_names=[\" train\" ,\" test\" ],preprocess_mode=\" bert\" )"
149
+ ],
150
+ "metadata" : {
151
+ "colab" : {
152
+ "base_uri" : " https://localhost:8080/" ,
153
+ "height" : 161
154
+ },
155
+ "id" : " M84oU3gM-1zZ" ,
156
+ "outputId" : " 4cfe9061-cd3f-4d21-8826-c78853d4e090"
157
+ },
158
+ "execution_count" : null ,
159
+ "outputs" : [
160
+ {
161
+ "output_type" : " stream" ,
162
+ "name" : " stdout" ,
163
+ "text" : [
164
+ " detected encoding: utf-8\n " ,
165
+ " preprocessing train...\n " ,
166
+ " language: en\n "
167
+ ]
168
+ },
169
+ {
170
+ "output_type" : " display_data" ,
171
+ "data" : {
172
+ "text/plain" : [
173
+ " <IPython.core.display.HTML object>"
174
+ ],
175
+ "text/html" : [
176
+ " \n " ,
177
+ " <style>\n " ,
178
+ " /* Turns off some styling */\n " ,
179
+ " progress {\n " ,
180
+ " /* gets rid of default border in Firefox and Opera. */\n " ,
181
+ " border: none;\n " ,
182
+ " /* Needs to be in here for Safari polyfill so background images work as expected. */\n " ,
183
+ " background-size: auto;\n " ,
184
+ " }\n " ,
185
+ " progress:not([value]), progress:not([value])::-webkit-progress-bar {\n " ,
186
+ " background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n " ,
187
+ " }\n " ,
188
+ " .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n " ,
189
+ " background: #F44336;\n " ,
190
+ " }\n " ,
191
+ " </style>\n "
192
+ ]
193
+ },
194
+ "metadata" : {}
195
+ },
196
+ {
197
+ "output_type" : " display_data" ,
198
+ "data" : {
199
+ "text/plain" : [
200
+ " <IPython.core.display.HTML object>"
201
+ ],
202
+ "text/html" : [
203
+ " done."
204
+ ]
205
+ },
206
+ "metadata" : {}
207
+ },
208
+ {
209
+ "output_type" : " stream" ,
210
+ "name" : " stdout" ,
211
+ "text" : [
212
+ " Is Multi-Label? False\n " ,
213
+ " preprocessing test...\n " ,
214
+ " language: en\n "
215
+ ]
216
+ },
217
+ {
218
+ "output_type" : " display_data" ,
219
+ "data" : {
220
+ "text/plain" : [
221
+ " <IPython.core.display.HTML object>"
222
+ ],
223
+ "text/html" : [
224
+ " \n " ,
225
+ " <style>\n " ,
226
+ " /* Turns off some styling */\n " ,
227
+ " progress {\n " ,
228
+ " /* gets rid of default border in Firefox and Opera. */\n " ,
229
+ " border: none;\n " ,
230
+ " /* Needs to be in here for Safari polyfill so background images work as expected. */\n " ,
231
+ " background-size: auto;\n " ,
232
+ " }\n " ,
233
+ " progress:not([value]), progress:not([value])::-webkit-progress-bar {\n " ,
234
+ " background: repeating-linear-gradient(45deg, #7e7e7e, #7e7e7e 10px, #5c5c5c 10px, #5c5c5c 20px);\n " ,
235
+ " }\n " ,
236
+ " .progress-bar-interrupted, .progress-bar-interrupted::-webkit-progress-bar {\n " ,
237
+ " background: #F44336;\n " ,
238
+ " }\n " ,
239
+ " </style>\n "
240
+ ]
241
+ },
242
+ "metadata" : {}
243
+ },
244
+ {
245
+ "output_type" : " display_data" ,
246
+ "data" : {
247
+ "text/plain" : [
248
+ " <IPython.core.display.HTML object>"
249
+ ],
250
+ "text/html" : [
251
+ " done."
252
+ ]
253
+ },
254
+ "metadata" : {}
255
+ }
256
+ ]
257
+ },
258
+ {
259
+ "cell_type" : " markdown" ,
260
+ "source" : [
261
+ " #BERT Model(Bidirectional Encoder Representations from Transformers)"
262
+ ],
263
+ "metadata" : {
264
+ "id" : " HsD1RIeyDDHi"
265
+ }
266
+ },
267
+ {
268
+ "cell_type" : " code" ,
269
+ "source" : [
270
+ " model=text.text_classifier(name=\" bert\" ,train_data=(x_train,y_train),preproc=preproc)"
271
+ ],
272
+ "metadata" : {
273
+ "id" : " egXY63ExDBG9" ,
274
+ "colab" : {
275
+ "base_uri" : " https://localhost:8080/"
276
+ },
277
+ "outputId" : " 9fec6679-1aeb-4098-e9d4-57cb869765cd"
278
+ },
279
+ "execution_count" : null ,
280
+ "outputs" : [
281
+ {
282
+ "output_type" : " stream" ,
283
+ "name" : " stdout" ,
284
+ "text" : [
285
+ " Is Multi-Label? False\n " ,
286
+ " maxlen is 400\n "
287
+ ]
288
+ },
289
+ {
290
+ "output_type" : " stream" ,
291
+ "name" : " stderr" ,
292
+ "text" : [
293
+ " /usr/local/lib/python3.10/dist-packages/keras/initializers/initializers.py:120: UserWarning: The initializer GlorotNormal is unseeded and being called multiple times, which will return identical values each time (even if the initializer is unseeded). Please update your code to provide a seed to the initializer, or avoid using the same initalizer instance more than once.\n " ,
294
+ " warnings.warn(\n "
295
+ ]
296
+ },
297
+ {
298
+ "output_type" : " stream" ,
299
+ "name" : " stdout" ,
300
+ "text" : [
301
+ " done.\n "
302
+ ]
303
+ }
304
+ ]
305
+ },
306
+ {
307
+ "cell_type" : " code" ,
308
+ "source" : [
309
+ " a=ktrain.get_learner(model=model,train_data=(x_train,y_train),val_data=(x_test,y_test),batch_size=32)"
310
+ ],
311
+ "metadata" : {
312
+ "id" : " ICtxz7LHaB1I" ,
313
+ "colab" : {
314
+ "base_uri" : " https://localhost:8080/"
315
+ },
316
+ "outputId" : " c3b1c676-3fab-4445-e227-975f6a015e16"
317
+ },
318
+ "execution_count" : null ,
319
+ "outputs" : [
320
+ {
321
+ "output_type" : " stream" ,
322
+ "name" : " stderr" ,
323
+ "text" : [
324
+ " /usr/local/lib/python3.10/dist-packages/ktrain/__init__.py:100: UserWarning: For a GPU with 12GB of RAM, the following maxima apply:\n " ,
325
+ " sequence len=64, max_batch_size=64\n " ,
326
+ " sequence len=128, max_batch_size=32\n " ,
327
+ " sequence len=256, max_batch_size=16\n " ,
328
+ " sequence len=320, max_batch_size=14\n " ,
329
+ " sequence len=384, max_batch_size=12\n " ,
330
+ " sequence len=512, max_batch_size=6\n " ,
331
+ " \n " ,
332
+ " You've exceeded these limits.\n " ,
333
+ " If using a GPU with <=12GB of memory, you may run out of memory during training.\n " ,
334
+ " If necessary, adjust sequence length or batch size based on above.\n " ,
335
+ " I.warnings.warn(msg)\n "
336
+ ]
337
+ }
338
+ ]
339
+ },
340
+ {
341
+ "cell_type" : " code" ,
342
+ "source" : [
343
+ " a.fit_onecycle(lr=2e-5,epochs=1)"
344
+ ],
345
+ "metadata" : {
346
+ "id" : " mAjZxMowbr_R" ,
347
+ "colab" : {
348
+ "base_uri" : " https://localhost:8080/" ,
349
+ "height" : 171
350
+ },
351
+ "outputId" : " 47cc0abe-4083-4cd5-cc8d-6d1ee3e5cb31"
352
+ },
353
+ "execution_count" : null ,
354
+ "outputs" : [
355
+ {
356
+ "output_type" : " error" ,
357
+ "ename" : " NameError" ,
358
+ "evalue" : " ignored" ,
359
+ "traceback" : [
360
+ " \u001b [0;31m---------------------------------------------------------------------------\u001b [0m" ,
361
+ " \u001b [0;31mNameError\u001b [0m Traceback (most recent call last)" ,
362
+ " \u001b [0;32m<ipython-input-1-3c959640d8b7>\u001b [0m in \u001b [0;36m<cell line: 1>\u001b [0;34m()\u001b [0m\n \u001b [0;32m----> 1\u001b [0;31m \u001b [0ma\u001b [0m\u001b [0;34m.\u001b [0m\u001b [0mfit_onecycle\u001b [0m\u001b [0;34m(\u001b [0m\u001b [0mlr\u001b [0m\u001b [0;34m=\u001b [0m\u001b [0;36m2e-5\u001b [0m\u001b [0;34m,\u001b [0m\u001b [0mepochs\u001b [0m\u001b [0;34m=\u001b [0m\u001b [0;36m1\u001b [0m\u001b [0;34m)\u001b [0m\u001b [0;34m\u001b [0m\u001b [0;34m\u001b [0m\u001b [0m\n \u001b [0m" ,
363
+ " \u001b [0;31mNameError\u001b [0m: name 'a' is not defined"
364
+ ]
365
+ }
366
+ ]
367
+ }
368
+ ]
369
+ }
0 commit comments