Skip to content

Commit da6379b

Browse files
nastyachizhikovavaskonovIgnatovFedordmitrijeuseew
authored
Refactor faq models (#1608)
Co-authored-by: vasily <[email protected]> Co-authored-by: Fedor Ignatov <[email protected]> Co-authored-by: Дмитрий Евсеев <[email protected]>
1 parent 3ee1b85 commit da6379b

28 files changed

+246
-1191
lines changed

README.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,7 @@ Please leave us [your feedback](https://forms.gle/i64fowQmiVhMMC7f9) on how we c
3434

3535
[Automatic Spelling Correction](http://docs.deeppavlov.ai/en/master/features/models/spelling_correction.html) | [Entity Linking](http://docs.deeppavlov.ai/en/master/features/models/entity_linking.html)
3636

37-
[Open Domain Questions Answering](http://docs.deeppavlov.ai/en/master/features/models/odqa.html) | [Frequently Asked Questions Answering](http://docs.deeppavlov.ai/en/master/features/models/faq.html)
38-
39-
[Russian SuperGLUE](http://docs.deeppavlov.ai/en/master/features/models/superglue.html)
37+
[Open Domain Questions Answering](http://docs.deeppavlov.ai/en/master/features/models/odqa.html) | [Russian SuperGLUE](http://docs.deeppavlov.ai/en/master/features/models/superglue.html)
4038

4139
**Embeddings**
4240

deeppavlov/configs/classifiers/glue/glue_mnli_roberta.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@
137137
},
138138
"download": [
139139
{
140-
"url": "https://files.deeppavlov.ai/0.16/classifiers/glue_mnli.tar.gz",
140+
"url": "http://files.deeppavlov.ai/0.16/classifiers/glue_mnli.tar.gz",
141141
"subdir": "{MODELS_PATH}"
142142
}
143143
]

deeppavlov/configs/classifiers/glue/glue_rte_roberta_mnli.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@
137137
},
138138
"download": [
139139
{
140-
"url": "https://files.deeppavlov.ai/0.16/classifiers/glue_rte.tar.gz",
140+
"url": "http://files.deeppavlov.ai/0.16/classifiers/glue_rte.tar.gz",
141141
"subdir": "{MODELS_PATH}"
142142
}
143143
]

deeppavlov/configs/cv/cv_tfidf_autofaq.json

Lines changed: 0 additions & 90 deletions
This file was deleted.

deeppavlov/configs/doc_retrieval/ru_ranker_tfidf_wiki.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,11 @@
3333
"save_path": "{MODELS_PATH}/odqa/ruwiki_tfidf_matrix.npz",
3434
"load_path": "{MODELS_PATH}/odqa/ruwiki_tfidf_matrix.npz",
3535
"tokenizer": {
36-
"class_name": "ru_tokenizer",
36+
"class_name": "stream_spacy_tokenizer",
37+
"spacy_model": "ru_core_news_sm",
3738
"lemmas": true,
39+
"lowercase": true,
40+
"filter_stopwords": true,
3841
"ngram_range": [
3942
1,
4043
2

deeppavlov/configs/embedder/tfidf_vectorizer.json

Lines changed: 0 additions & 61 deletions
This file was deleted.

deeppavlov/configs/faq/fasttext_avg_autofaq.json

Lines changed: 0 additions & 69 deletions
This file was deleted.
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
{
2+
"dataset_reader": {
3+
"class_name": "basic_classification_reader",
4+
"format": "json",
5+
"orient": "split",
6+
"x": "text",
7+
"y": "category",
8+
"data_path": "{DOWNLOADS_PATH}/massive/{LANGUAGE}",
9+
"train": "train.json",
10+
"valid": "dev.json",
11+
"test": "test.json"
12+
},
13+
"dataset_iterator": {
14+
"class_name": "basic_classification_iterator",
15+
"seed": 42,
16+
"shuffle": true,
17+
"shot": 5
18+
},
19+
"chainer": {
20+
"in": ["text"],
21+
"in_y": ["category"],
22+
"pipe": [
23+
{
24+
"class_name": "stream_spacy_tokenizer",
25+
"in": ["text"],
26+
"id": "my_tokenizer",
27+
"lemmas": false,
28+
"out": "token_lemmas",
29+
"spacy_model": "{SPACY_MODEL}"
30+
},
31+
{
32+
"ref": "my_tokenizer",
33+
"in": ["token_lemmas"],
34+
"out": ["text_lem"]
35+
},
36+
{
37+
"class_name": "fasttext",
38+
"in": ["token_lemmas"],
39+
"load_path": "{DOWNLOADS_PATH}/embeddings/fasttext/{LANGUAGE}.bin",
40+
"mean": true,
41+
"out": ["text_vector"]
42+
},
43+
{
44+
"id": "answers_vocab",
45+
"class_name": "simple_vocab",
46+
"fit_on": "category",
47+
"save_path": "{MODEL_PATH}/cat_answers.dict",
48+
"load_path": "{MODEL_PATH}/cat_answers.dict",
49+
"in": ["category"],
50+
"out": ["y_ids"]
51+
},
52+
{
53+
"in": ["text_vector"],
54+
"fit_on": ["text_vector", "y_ids"],
55+
"out": ["y_pred_proba"],
56+
"class_name": "sklearn_component",
57+
"main": true,
58+
"save_path": "{MODEL_PATH}/model.pkl",
59+
"load_path": "{MODEL_PATH}/model.pkl",
60+
"model_class": "sklearn.linear_model:LogisticRegression",
61+
"infer_method": "predict_proba",
62+
"C": 10,
63+
"penalty": "l2"
64+
},
65+
{
66+
"in": ["y_pred_proba"],
67+
"out": ["y_pred_ids"],
68+
"class_name": "proba2labels",
69+
"max_proba": true
70+
},
71+
{
72+
"in": ["y_pred_ids"],
73+
"out": ["y_pred_category"],
74+
"ref": "answers_vocab"
75+
}
76+
],
77+
"out": ["y_pred_category"]
78+
},
79+
"train": {
80+
"evaluation_targets": ["train", "valid", "test"],
81+
"class_name": "fit_trainer",
82+
"metrics": [
83+
{
84+
"name": "accuracy",
85+
"inputs": ["category", "y_pred_category"]
86+
}
87+
]
88+
},
89+
"metadata": {
90+
"variables": {
91+
"LANGUAGE": "en",
92+
"ROOT_PATH": "~/.deeppavlov",
93+
"SPACY_MODEL": "en_core_web_sm",
94+
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
95+
"MODEL_PATH": "{ROOT_PATH}/models/faq/{LANGUAGE}/fasttext_logreg"
96+
},
97+
"download": [
98+
{
99+
"url": "http://files.deeppavlov.ai/embeddings/fasttext/{LANGUAGE}.bin",
100+
"subdir": "{DOWNLOADS_PATH}/embeddings/fasttext"
101+
},
102+
{
103+
"url": "http://files.deeppavlov.ai/datasets/massive-{LANGUAGE}.tar.gz",
104+
"subdir": "{DOWNLOADS_PATH}/massive/{LANGUAGE}"
105+
},
106+
{
107+
"url": "https://files.deeppavlov.ai/faq/fasttext_logreg_{LANGUAGE}.tar.gz",
108+
"subdir": "{MODEL_PATH}"
109+
}
110+
]
111+
}
112+
}

0 commit comments

Comments
 (0)