|
186 | 186 | }, |
187 | 187 | { |
188 | 188 | "cell_type": "code", |
189 | | - "execution_count": 2, |
| 189 | + "execution_count": null, |
190 | 190 | "metadata": { |
191 | 191 | "ExecuteTime": { |
192 | 192 | "end_time": "2020-09-08T11:27:34.270631Z", |
|
195 | 195 | }, |
196 | 196 | "outputs": [], |
197 | 197 | "source": [ |
| 198 | + "import json\n", |
| 199 | + "\n", |
198 | 200 | "from medcat.cat import CAT\n", |
199 | 201 | "from medcat.cdb import CDB\n", |
200 | | - "from medcat.utils.vocab import Vocab" |
| 202 | + "from medcat.vocab import Vocab" |
201 | 203 | ] |
202 | 204 | }, |
203 | 205 | { |
|
310 | 312 | }, |
311 | 313 | { |
312 | 314 | "cell_type": "code", |
313 | | - "execution_count": 5, |
| 315 | + "execution_count": null, |
314 | 316 | "metadata": { |
315 | 317 | "ExecuteTime": { |
316 | 318 | "end_time": "2020-09-08T11:27:59.782731Z", |
|
319 | 321 | }, |
320 | 322 | "outputs": [], |
321 | 323 | "source": [ |
322 | | - "cdb = CDB()\n", |
323 | | - "cdb.load_dict(cdb_path)\n", |
324 | | - "vocab = Vocab()\n", |
325 | | - "vocab.load_dict(vocab_path)\n", |
| 324 | + "cdb = CDB.load(cdb_path)\n", |
| 325 | + "vocab = Vocab.load(vocab_path)\n", |
326 | 326 | "cat = CAT(cdb, vocab)" |
327 | 327 | ] |
328 | 328 | }, |
329 | 329 | { |
330 | 330 | "cell_type": "code", |
331 | | - "execution_count": 10, |
| 331 | + "execution_count": null, |
332 | 332 | "metadata": { |
333 | 333 | "ExecuteTime": { |
334 | 334 | "end_time": "2020-09-08T11:37:38.546552Z", |
|
1383 | 1383 | } |
1384 | 1384 | ], |
1385 | 1385 | "source": [ |
1386 | | - "cat.train_supervised(data_path=\"example_data/MedCAT_Export_With_Text_2020-05-22_10_34_09.json\",\n", |
1387 | | - " nepochs=1,\n", |
1388 | | - " lr=0.1,\n", |
1389 | | - " anneal=False, # Unless we are reseting the CDB or cui_count this is False\n", |
1390 | | - " print_stats=True,\n", |
1391 | | - " use_filters=True)" |
| 1386 | + "with open(\"example_data/MedCAT_Export_With_Text_2020-05-22_10_34_09.json\") as f:\n", |
| 1387 | + " data = json.load(f)\n", |
| 1388 | + "cat.trainer.train_supervised_raw(\n", |
| 1389 | + " data=data,\n", |
| 1390 | + " nepochs=1,\n", |
| 1391 | + " print_stats=True,\n", |
| 1392 | + " use_filters=True)" |
1392 | 1393 | ] |
1393 | 1394 | }, |
1394 | 1395 | { |
|
1402 | 1403 | }, |
1403 | 1404 | { |
1404 | 1405 | "cell_type": "code", |
1405 | | - "execution_count": 50, |
| 1406 | + "execution_count": null, |
1406 | 1407 | "metadata": { |
1407 | 1408 | "ExecuteTime": { |
1408 | 1409 | "end_time": "2020-09-08T15:04:02.394607Z", |
|
1411 | 1412 | }, |
1412 | 1413 | "outputs": [], |
1413 | 1414 | "source": [ |
1414 | | - "from medcat.meta_cat import MetaCAT\n", |
| 1415 | + "from medcat.components.addons.meta_cat import MetaCAT\n", |
1415 | 1416 | "from tokenizers import ByteLevelBPETokenizer\n", |
1416 | 1417 | "from itertools import chain" |
1417 | 1418 | ] |
1418 | 1419 | }, |
1419 | 1420 | { |
1420 | 1421 | "cell_type": "code", |
1421 | | - "execution_count": 18, |
| 1422 | + "execution_count": null, |
1422 | 1423 | "metadata": { |
1423 | 1424 | "ExecuteTime": { |
1424 | 1425 | "end_time": "2020-09-08T14:46:39.070589Z", |
|
1427 | 1428 | }, |
1428 | 1429 | "outputs": [], |
1429 | 1430 | "source": [ |
| 1431 | + "import numpy as np\n", |
1430 | 1432 | "# Tokenizer instantiation\n", |
1431 | 1433 | "tokenizer = ByteLevelBPETokenizer(vocab_file='data/medmen-vocab.json', merges_file='data/medmen-merges.txt')\n", |
1432 | 1434 | "embeddings = np.load(open('data/embeddings.npy', 'rb'))" |
|
0 commit comments