Skip to content

Commit bfd5cd0

Browse files
authored
Feat/rsg (#1577)
1 parent caa1f63 commit bfd5cd0

20 files changed

+1366
-41
lines changed

README.md

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,15 @@ Please leave us [your feedback](https://forms.gle/i64fowQmiVhMMC7f9) on how we c
2626

2727
**Models**
2828

29-
[Named Entity Recognition](http://docs.deeppavlov.ai/en/master/features/models/ner.html)
29+
[Named Entity Recognition](http://docs.deeppavlov.ai/en/master/features/models/ner.html) | [Intent/Sentence Classification](http://docs.deeppavlov.ai/en/master/features/models/classifiers.html) |
3030

31-
[Intent/Sentence Classification](http://docs.deeppavlov.ai/en/master/features/models/classifiers.html) | [Question Answering over Text (SQuAD)](http://docs.deeppavlov.ai/en/master/features/models/squad.html)
32-
33-
[Knowledge Base Question Answering](http://docs.deeppavlov.ai/en/master/features/models/kbqa.html)
31+
[Question Answering over Text (SQuAD)](http://docs.deeppavlov.ai/en/master/features/models/squad.html) | [Knowledge Base Question Answering](http://docs.deeppavlov.ai/en/master/features/models/kbqa.html)
3432

3533
[Sentence Similarity/Ranking](http://docs.deeppavlov.ai/en/master/features/models/neural_ranking.html) | [TF-IDF Ranking](http://docs.deeppavlov.ai/en/master/features/models/tfidf_ranking.html)
3634

37-
[Automatic Spelling Correction](http://docs.deeppavlov.ai/en/master/features/models/spelling_correction.html)
35+
[Automatic Spelling Correction](http://docs.deeppavlov.ai/en/master/features/models/spelling_correction.html) | [Entity Linking](http://docs.deeppavlov.ai/en/master/features/models/entity_linking.html)
3836

39-
[Entity Linking](http://docs.deeppavlov.ai/en/master/features/models/entity_linking.html)
37+
[Russian SuperGLUE](http://docs.deeppavlov.ai/en/master/features/models/superglue.html)
4038

4139
**Skills**
4240

@@ -66,19 +64,13 @@ Please leave us [your feedback](https://forms.gle/i64fowQmiVhMMC7f9) on how we c
6664

6765
0. We support `Linux` platform, `Python 3.6`, `3.7`, `3.8` and `3.9`
6866
* **`Python 3.5` is not supported!**
69-
* **installation for `Windows` requires `Git`(for example, [git](https://git-scm.com/download/win)) and `Visual Studio 2015/2017` with `C++` build tools installed!**
7067

7168
1. Create and activate a virtual environment:
7269
* `Linux`
7370
```
7471
python -m venv env
7572
source ./env/bin/activate
7673
```
77-
* `Windows`
78-
```
79-
python -m venv env
80-
.\env\Scripts\activate.bat
81-
```
8274
2. Install the package inside the environment:
8375
```
8476
pip install deeppavlov

deeppavlov/configs/ner/ner_case_agnostic_mdistilbert.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"dataset_reader": {
33
"class_name": "conll2003_reader",
4+
"data_path": "{DOWNLOADS_PATH}/conll2003/",
45
"dataset_name": "conll2003",
56
"provide_pos": false
67
},
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
{
2+
"dataset_reader": {
3+
"class_name": "huggingface_dataset_reader",
4+
"path": "{COMPETITION}",
5+
"name": "{TASK}",
6+
"train": "train",
7+
"valid": "validation",
8+
"test": "test"
9+
},
10+
"dataset_iterator": {
11+
"class_name": "huggingface_dataset_iterator",
12+
"features": ["question", "passage"],
13+
"label": "label",
14+
"seed": 42
15+
},
16+
"chainer": {
17+
"in": ["question", "passage"],
18+
"in_y": ["y"],
19+
"pipe": [
20+
{
21+
"class_name": "torch_transformers_preprocessor",
22+
"vocab_file": "{BASE_MODEL}",
23+
"do_lower_case": false,
24+
"max_seq_length": 512,
25+
"in": ["question", "passage"],
26+
"out": ["bert_features"]
27+
},
28+
{
29+
"id": "classes_vocab",
30+
"class_name": "simple_vocab",
31+
"fit_on": ["y"],
32+
"save_path": "{MODEL_PATH}/classes.dict",
33+
"load_path": "{MODEL_PATH}/classes.dict",
34+
"in": ["y"],
35+
"out": ["y_ids"]
36+
},
37+
{
38+
"in": ["y_ids"],
39+
"out": ["y_onehot"],
40+
"class_name": "one_hotter",
41+
"depth": "#classes_vocab.len",
42+
"single_vector": true
43+
},
44+
{
45+
"class_name": "torch_transformers_classifier",
46+
"n_classes": "#classes_vocab.len",
47+
"return_probas": true,
48+
"pretrained_bert": "{BASE_MODEL}",
49+
"is_binary": "{BINARY_CLASSIFICATION}",
50+
"save_path": "{MODEL_PATH}/model",
51+
"load_path": "{MODEL_PATH}/model",
52+
"optimizer": "AdamW",
53+
"optimizer_parameters": {"lr": 2e-05},
54+
"in": ["bert_features"],
55+
"in_y": ["y_ids"],
56+
"out": ["y_pred_probas"]
57+
},
58+
{
59+
"in": ["y_pred_probas"],
60+
"out": ["y_pred_ids"],
61+
"class_name": "proba2labels",
62+
"is_binary": "{BINARY_CLASSIFICATION}",
63+
"max_proba": true
64+
},
65+
{
66+
"in": ["y_pred_ids"],
67+
"out": ["y_pred_labels"],
68+
"ref": "classes_vocab"
69+
}
70+
],
71+
"out": ["y_pred_labels"]
72+
},
73+
"train": {
74+
"batch_size": 4,
75+
"metrics": ["accuracy"],
76+
"epochs": 10,
77+
"validation_patience": 10,
78+
"val_every_n_epochs": 1,
79+
"log_every_n_epochs": 1,
80+
"show_examples": false,
81+
"evaluation_targets": ["train", "valid"],
82+
"class_name": "torch_trainer",
83+
"tensorboard_log_dir": "{MODEL_PATH}/",
84+
"pytest_max_batches": 2,
85+
"pytest_batch_size": 2
86+
},
87+
"metadata": {
88+
"variables": {
89+
"BASE_MODEL": "DeepPavlov/rubert-base-cased",
90+
"ROOT_PATH": "~/.deeppavlov",
91+
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
92+
"MODELS_PATH": "{ROOT_PATH}/models",
93+
"COMPETITION": "russian_super_glue",
94+
"BINARY_CLASSIFICATION": false,
95+
"TASK": "danetqa",
96+
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
97+
},
98+
"download": [
99+
{
100+
"url": "http://files.deeppavlov.ai/v1/russian_super_glue/russian_superglue_danetqa_rubert.tar.gz",
101+
"subdir": "{MODEL_PATH}"
102+
}
103+
]
104+
}
105+
}
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
{
2+
"dataset_reader": {
3+
"class_name": "huggingface_dataset_reader",
4+
"path": "{COMPETITION}",
5+
"name": "{TASK}",
6+
"test": "test"
7+
},
8+
"dataset_iterator": {
9+
"class_name": "huggingface_dataset_iterator",
10+
"features": ["sentence1", "sentence2"],
11+
"label": "label",
12+
"seed": 42
13+
},
14+
"chainer": {
15+
"in": ["sentence1", "sentence2"],
16+
"in_y": ["y"],
17+
"pipe": [
18+
{
19+
"class_name": "torch_transformers_preprocessor",
20+
"vocab_file": "{BASE_MODEL}",
21+
"do_lower_case": false,
22+
"max_seq_length": 256,
23+
"in": ["sentence1", "sentence2"],
24+
"out": ["bert_features"]
25+
},
26+
{
27+
"id": "classes_vocab",
28+
"class_name": "simple_vocab",
29+
"fit_on": ["y"],
30+
"save_path": "{MODEL_PATH}/classes.dict",
31+
"load_path": "{MODEL_PATH}/classes.dict",
32+
"in": ["y"],
33+
"out": ["y_ids"]
34+
},
35+
{
36+
"in": ["y_ids"],
37+
"out": ["y_onehot"],
38+
"class_name": "one_hotter",
39+
"depth": "#classes_vocab.len",
40+
"single_vector": true
41+
},
42+
{
43+
"class_name": "torch_transformers_classifier",
44+
"n_classes": "#classes_vocab.len",
45+
"return_probas": true,
46+
"pretrained_bert": "{BASE_MODEL}",
47+
"is_binary": "{BINARY_CLASSIFICATION}",
48+
"save_path": "{MODEL_PATH}/model",
49+
"load_path": "{MODEL_PATH}/model",
50+
"optimizer": "AdamW",
51+
"optimizer_parameters": {"lr": 2e-05, "weight_decay": 0.1},
52+
"learning_rate_drop_patience": 3,
53+
"learning_rate_drop_div": 2.0,
54+
"in": ["bert_features"],
55+
"in_y": ["y_ids"],
56+
"out": ["y_pred_probas"]
57+
},
58+
{
59+
"in": ["y_pred_probas"],
60+
"out": ["y_pred_ids"],
61+
"class_name": "proba2labels",
62+
"is_binary": "{BINARY_CLASSIFICATION}",
63+
"confidence_threshold": 0.5
64+
},
65+
{
66+
"in": ["y_pred_ids"],
67+
"out": ["y_pred_labels"],
68+
"ref": "classes_vocab"
69+
}
70+
],
71+
"out": ["y_pred_labels"]
72+
},
73+
"train": {
74+
"batch_size": 16,
75+
"metrics": ["matthews_correlation"],
76+
"validation_patience": 10,
77+
"val_every_n_epochs": 1,
78+
"log_every_n_epochs": 1,
79+
"show_examples": false,
80+
"evaluation_targets": ["test"],
81+
"class_name": "torch_trainer",
82+
"tensorboard_log_dir": "{MODEL_PATH}/",
83+
"pytest_max_batches": 2,
84+
"pytest_batch_size": 2
85+
},
86+
"metadata": {
87+
"variables": {
88+
"BASE_MODEL": "DeepPavlov/rubert-base-cased",
89+
"ROOT_PATH": "~/.deeppavlov",
90+
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
91+
"MODELS_PATH": "{ROOT_PATH}/models",
92+
"COMPETITION": "russian_super_glue",
93+
"BINARY_CLASSIFICATION": false,
94+
"TASK": "lidirus",
95+
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/terra/{BASE_MODEL}"
96+
},
97+
"download": [
98+
{
99+
"url": "http://files.deeppavlov.ai/v1/russian_super_glue/russian_superglue_terra_rubert.tar.gz",
100+
"subdir": "{MODEL_PATH}"
101+
}
102+
]
103+
}
104+
}
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
{
2+
"dataset_reader": {
3+
"class_name": "huggingface_dataset_reader",
4+
"path": "{COMPETITION}",
5+
"name": "{TASK}",
6+
"train": "train",
7+
"valid": "validation",
8+
"test": "test"
9+
},
10+
"dataset_iterator": {
11+
"class_name": "huggingface_dataset_iterator",
12+
"features": ["context", "answer", "idx"],
13+
"label": "label",
14+
"seed": 42
15+
},
16+
"chainer": {
17+
"in": ["context", "answer", "idx"],
18+
"in_y": ["y"],
19+
"pipe": [
20+
{
21+
"class_name": "torch_transformers_preprocessor",
22+
"vocab_file": "{BASE_MODEL}",
23+
"do_lower_case": false,
24+
"max_seq_length": 512,
25+
"in": ["context", "answer"],
26+
"out": ["bert_features"]
27+
},
28+
{
29+
"id": "classes_vocab",
30+
"class_name": "simple_vocab",
31+
"fit_on": ["y"],
32+
"save_path": "{MODEL_PATH}/classes.dict",
33+
"load_path": "{MODEL_PATH}/classes.dict",
34+
"in": ["y"],
35+
"out": ["y_ids"]
36+
},
37+
{
38+
"in": ["y_ids"],
39+
"out": ["y_onehot"],
40+
"class_name": "one_hotter",
41+
"depth": "#classes_vocab.len",
42+
"single_vector": true
43+
},
44+
{
45+
"class_name": "torch_transformers_classifier",
46+
"n_classes": "#classes_vocab.len",
47+
"return_probas": true,
48+
"BINARY_CLASSIFICATION": false,
49+
"pretrained_bert": "{BASE_MODEL}",
50+
"save_path": "{MODEL_PATH}/model",
51+
"load_path": "{MODEL_PATH}/model",
52+
"optimizer": "AdamW",
53+
"optimizer_parameters": {"lr": 2e-05},
54+
"in": ["bert_features"],
55+
"in_y": ["y_ids"],
56+
"out": ["y_pred_probas"]
57+
},
58+
{
59+
"in": ["y_pred_probas"],
60+
"out": ["y_pred_ids"],
61+
"class_name": "proba2labels",
62+
"is_binary": "{BINARY_CLASSIFICATION}",
63+
"max_proba": true
64+
},
65+
{
66+
"in": ["y_pred_ids"],
67+
"out": ["y_pred_labels"],
68+
"ref": "classes_vocab"
69+
}
70+
],
71+
"out": ["y_pred_labels"]
72+
},
73+
"train": {
74+
"batch_size": 8,
75+
"metrics": ["roc_auc","f1"],
76+
"epochs": 10,
77+
"validation_patience": 10,
78+
"val_every_n_epochs": 1,
79+
"log_every_n_epochs": 1,
80+
"show_examples": false,
81+
"evaluation_targets": ["train", "valid"],
82+
"class_name": "torch_trainer",
83+
"tensorboard_log_dir": "{MODEL_PATH}/",
84+
"pytest_max_batches": 2,
85+
"pytest_batch_size": 2
86+
},
87+
"metadata": {
88+
"variables": {
89+
"BASE_MODEL": "DeepPavlov/rubert-base-cased",
90+
"ROOT_PATH": "~/.deeppavlov",
91+
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
92+
"MODELS_PATH": "{ROOT_PATH}/models",
93+
"COMPETITION": "russian_super_glue",
94+
"BINARY_CLASSIFICATION": false,
95+
"TASK": "muserc",
96+
"MODEL_PATH": "{MODELS_PATH}/{COMPETITION}/{TASK}/{BASE_MODEL}"
97+
},
98+
"download": [
99+
{
100+
"url": "http://files.deeppavlov.ai/v1/russian_super_glue/russian_superglue_muserc_rubert.tar.gz",
101+
"subdir": "{MODEL_PATH}"
102+
}
103+
]
104+
}
105+
}

0 commit comments

Comments
 (0)