Skip to content

Commit 6587f43

Browse files
committed
Update (very old) notebook to v2
1 parent 9ea7a46 commit 6587f43

File tree

1 file changed

+19
-17
lines changed

1 file changed

+19
-17
lines changed

medcat-trainer/notebook_docs/Train_MedCAT_Models.ipynb

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@
186186
},
187187
{
188188
"cell_type": "code",
189-
"execution_count": 2,
189+
"execution_count": null,
190190
"metadata": {
191191
"ExecuteTime": {
192192
"end_time": "2020-09-08T11:27:34.270631Z",
@@ -195,9 +195,11 @@
195195
},
196196
"outputs": [],
197197
"source": [
198+
"import json\n",
199+
"\n",
198200
"from medcat.cat import CAT\n",
199201
"from medcat.cdb import CDB\n",
200-
"from medcat.utils.vocab import Vocab"
202+
"from medcat.vocab import Vocab"
201203
]
202204
},
203205
{
@@ -310,7 +312,7 @@
310312
},
311313
{
312314
"cell_type": "code",
313-
"execution_count": 5,
315+
"execution_count": null,
314316
"metadata": {
315317
"ExecuteTime": {
316318
"end_time": "2020-09-08T11:27:59.782731Z",
@@ -319,16 +321,14 @@
319321
},
320322
"outputs": [],
321323
"source": [
322-
"cdb = CDB()\n",
323-
"cdb.load_dict(cdb_path)\n",
324-
"vocab = Vocab()\n",
325-
"vocab.load_dict(vocab_path)\n",
324+
"cdb = CDB.load(cdb_path)\n",
325+
"vocab = Vocab.load(vocab_path)\n",
326326
"cat = CAT(cdb, vocab)"
327327
]
328328
},
329329
{
330330
"cell_type": "code",
331-
"execution_count": 10,
331+
"execution_count": null,
332332
"metadata": {
333333
"ExecuteTime": {
334334
"end_time": "2020-09-08T11:37:38.546552Z",
@@ -1383,12 +1383,13 @@
13831383
}
13841384
],
13851385
"source": [
1386-
"cat.train_supervised(data_path=\"example_data/MedCAT_Export_With_Text_2020-05-22_10_34_09.json\",\n",
1387-
" nepochs=1,\n",
1388-
" lr=0.1,\n",
1389-
" anneal=False, # Unless we are reseting the CDB or cui_count this is False\n",
1390-
" print_stats=True,\n",
1391-
" use_filters=True)"
1386+
"with open(\"example_data/MedCAT_Export_With_Text_2020-05-22_10_34_09.json\") as f:\n",
1387+
" data = json.load(f)\n",
1388+
"cat.trainer.train_supervised_raw(\n",
1389+
" data=data,\n",
1390+
" nepochs=1,\n",
1391+
" print_stats=True,\n",
1392+
" use_filters=True)"
13921393
]
13931394
},
13941395
{
@@ -1402,7 +1403,7 @@
14021403
},
14031404
{
14041405
"cell_type": "code",
1405-
"execution_count": 50,
1406+
"execution_count": null,
14061407
"metadata": {
14071408
"ExecuteTime": {
14081409
"end_time": "2020-09-08T15:04:02.394607Z",
@@ -1411,14 +1412,14 @@
14111412
},
14121413
"outputs": [],
14131414
"source": [
1414-
"from medcat.meta_cat import MetaCAT\n",
1415+
"from medcat.components.addons.meta_cat import MetaCAT\n",
14151416
"from tokenizers import ByteLevelBPETokenizer\n",
14161417
"from itertools import chain"
14171418
]
14181419
},
14191420
{
14201421
"cell_type": "code",
1421-
"execution_count": 18,
1422+
"execution_count": null,
14221423
"metadata": {
14231424
"ExecuteTime": {
14241425
"end_time": "2020-09-08T14:46:39.070589Z",
@@ -1427,6 +1428,7 @@
14271428
},
14281429
"outputs": [],
14291430
"source": [
1431+
"import numpy as np\n",
14301432
"# Tokenizer instantiation\n",
14311433
"tokenizer = ByteLevelBPETokenizer(vocab_file='data/medmen-vocab.json', merges_file='data/medmen-merges.txt')\n",
14321434
"embeddings = np.load(open('data/embeddings.npy', 'rb'))"

0 commit comments

Comments
 (0)