|
9 | 9 | print() |
10 | 10 | print("1. Preprocessing documents...", end = '', flush=True) |
11 | 11 | # Preprocessing full texts |
12 | | -from preprocessdocs import preprocess |
13 | | -preprocess() |
| 12 | +from preprocessdocs import preprocess, preprocessingdone |
| 13 | +if not preprocessingdone(): |
| 14 | + preprocess() |
14 | 15 | print("Done!") |
15 | 16 | print() |
16 | 17 | print("2. Executing semantic measures") |
17 | 18 | print("a) Training corpus models...", end = '', flush=True) |
18 | 19 | # Train Doc2Vec and Word2Vec models on full texts of documents |
19 | 20 | from trainedmodels import getdoc2vecmodel,getword2vecmodel |
20 | 21 | doc2vecmodel = getdoc2vecmodel() |
21 | | -word2vecmodel = getword2vecmodel() |
| 22 | +# word2vecmodel = getword2vecmodel() |
22 | 23 | print("Done!") |
23 | 24 | print("b) Similarity checks: corpus models...", end = '', flush=True) |
| 25 | +print() |
24 | 26 | # # Trained models: check document similarity |
25 | 27 | from trainedmodelssimilarity import dosimilaritychecks |
26 | 28 | dosimilaritychecks("doc2vec",doc2vecmodel,"cosine") |
27 | | -dosimilaritychecks("word2vec",word2vecmodel,"wmd") |
| 29 | +# dosimilaritychecks("word2vec",word2vecmodel,"wmd") |
| 30 | +print() |
28 | 31 | print("Done!") |
29 | 32 |
|
30 | | -# --------------------------------------------- # |
31 | | -# --- ADD CUSTOM PRETRAINED MODEL CODE HERE --- # |
32 | | -# --------------------------------------------- # |
33 | | - |
| 33 | +# # --------------------------------------------- # |
| 34 | +# # --- ADD CUSTOM PRETRAINED MODEL CODE HERE --- # |
| 35 | +# # --------------------------------------------- # |
34 | 36 | # print("c) Adapting GoogleNews pretrained model...", end = '', flush=True) |
35 | 37 | # # # GoogleNews pretrained load / train |
36 | 38 | # from pretrainedmodels import getdoc2vecmodel,getword2vecmodel |
|
53 | 55 | # dosimilaritychecks("doc2vec", "law2vec", law2vecdoc2vecmodel, "cosine") |
54 | 56 | # dosimilaritychecks("word2vec", "law2vec", law2vecword2vecmodel, "wmd") |
55 | 57 | # print("Done!") |
56 | | -print() |
| 58 | +# print() |
| 59 | + |
57 | 60 | print("3. Executing syntactic measures") |
58 | 61 | print("a) Training TFIDF and Ngram models...", end = '', flush=True) |
59 | 62 | # # TFIDF, Ngram models load / train |
|
68 | 71 | dosyntacticsimilaritychecks("jaccard",model=None) |
69 | 72 | print("Done!") |
70 | 73 | print() |
| 74 | +print("4. Analysing results") |
| 75 | +from analyseresults import analyse |
| 76 | +analyse() |
| 77 | +print("Done!") |
| 78 | +print() |
71 | 79 | print("-- FINISHED --") |
0 commit comments