Skip to content

Commit a41cf69

Browse files
committed
example 4 and 5
1 parent ebefeaf commit a41cf69

File tree

9 files changed

+325249
-0
lines changed

9 files changed

+325249
-0
lines changed

examples/ner_pos_tagging/coNLL_data/coNLL_testa.txt

Lines changed: 55045 additions & 0 deletions
Large diffs are not rendered by default.

examples/ner_pos_tagging/coNLL_data/coNLL_testb.txt

Lines changed: 50351 additions & 0 deletions
Large diffs are not rendered by default.

examples/ner_pos_tagging/coNLL_data/coNLL_train.txt

Lines changed: 219554 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"# Step - 1: Transforming data"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": null,
13+
"metadata": {},
14+
"outputs": [],
15+
"source": [
16+
"!python ../../data_transformations.py \\\n",
17+
" --transform_file 'transform_file_conll.yml'"
18+
]
19+
},
20+
{
21+
"cell_type": "markdown",
22+
"metadata": {},
23+
"source": [
24+
"# Step -2 Data Preparation"
25+
]
26+
},
27+
{
28+
"cell_type": "code",
29+
"execution_count": null,
30+
"metadata": {},
31+
"outputs": [],
32+
"source": [
33+
"!python ../../data_preparation.py \\\n",
34+
" --task_file 'tasks_file_conll.yml' \\\n",
35+
" --data_dir '../../data' \\\n",
36+
" --max_seq_len 50"
37+
]
38+
},
39+
{
40+
"cell_type": "markdown",
41+
"metadata": {},
42+
"source": [
43+
"# Step -3 Running Training"
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": null,
49+
"metadata": {},
50+
"outputs": [],
51+
"source": [
52+
"!python ../../train.py \\\n",
53+
" --data_dir '../../data/bert-base-uncased_prepared_data' \\\n",
54+
" --task_file 'tasks_file_conll.yml' \\\n",
55+
" --out_dir 'conll_ner_pos_bert_base' \\\n",
56+
" --epochs 5 \\\n",
57+
" --train_batch_size 32 \\\n",
58+
" --eval_batch_size 32 \\\n",
59+
" --grad_accumulation_steps 1 \\\n",
60+
" --log_per_updates 50 \\\n",
61+
" --eval_while_train True \\\n",
62+
" --test_while_train True \\\n",
63+
" --max_seq_len 50 \\\n",
64+
" --silent True "
65+
]
66+
},
67+
{
68+
"cell_type": "markdown",
69+
"metadata": {},
70+
"source": [
71+
"# Step - 4 Infering"
72+
]
73+
},
74+
{
75+
"cell_type": "code",
76+
"execution_count": null,
77+
"metadata": {},
78+
"outputs": [],
79+
"source": [
80+
"import sys\n",
81+
"sys.path.insert(1, '../../')\n",
82+
"from infer_pipeline import inferPipeline"
83+
]
84+
}
85+
],
86+
"metadata": {
87+
"kernelspec": {
88+
"display_name": "Python 3",
89+
"language": "python",
90+
"name": "python3"
91+
},
92+
"language_info": {
93+
"codemirror_mode": {
94+
"name": "ipython",
95+
"version": 3
96+
},
97+
"file_extension": ".py",
98+
"mimetype": "text/x-python",
99+
"name": "python",
100+
"nbconvert_exporter": "python",
101+
"pygments_lexer": "ipython3",
102+
"version": "3.7.3"
103+
}
104+
},
105+
"nbformat": 4,
106+
"nbformat_minor": 4
107+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
conllner:
2+
model_type: BERT
3+
config_name: bert-base-uncased
4+
dropout_prob: 0.2
5+
label_map_or_file: ../../data/ner_coNLL_train_label_map.joblib
6+
metrics:
7+
- seqeval_f1_score
8+
- seqeval_precision
9+
- seqeval_recall
10+
loss_type: NERLoss
11+
task_type: NER
12+
file_names:
13+
- ner_coNLL_train.tsv
14+
- ner_coNLL_testa.tsv
15+
- ner_coNLL_testb.tsv
16+
17+
conllpos:
18+
model_type: BERT
19+
config_name: bert-base-uncased
20+
dropout_prob: 0.2
21+
label_map_or_file: ../../data/pos_coNLL_train_label_map.joblib
22+
metrics:
23+
- seqeval_f1_score
24+
- seqeval_precision
25+
- seqeval_recall
26+
loss_type: NERLoss
27+
task_type: NER
28+
file_names:
29+
- pos_coNLL_train.tsv
30+
- pos_coNLL_testa.tsv
31+
- pos_coNLL_testb.tsv
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
transform1:
2+
transform_func: coNLL_ner_pos_to_tsv
3+
read_file_names:
4+
- coNLL_train.txt
5+
- coNLL_testa.txt
6+
- coNLL_testb.txt
7+
read_dir: coNLL_data
8+
save_dir: ../../data
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"!wget qim.fs.quoracdn.net/quora_duplicate_questions.tsv -P qqp_data/"
10+
]
11+
},
12+
{
13+
"cell_type": "markdown",
14+
"metadata": {},
15+
"source": [
16+
"# Step -1 Data Transformations\n",
17+
"\n",
18+
"Defining transform file\n",
19+
"\n",
20+
"```\n",
21+
"sample_transform:\n",
22+
" transform_func: qqp_query_similarity_to_tsv\n",
23+
" read_file_names:\n",
24+
" - quora_duplicate_questions.tsv\n",
25+
" read_dir : qqp_data\n",
26+
" save_dir: ../../data\n",
27+
"```"
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"execution_count": null,
33+
"metadata": {},
34+
"outputs": [],
35+
"source": [
36+
"!python ../../data_transformations.py \\\n",
37+
" --transform_file 'transform_file_qqp.yml'"
38+
]
39+
},
40+
{
41+
"cell_type": "markdown",
42+
"metadata": {},
43+
"source": [
44+
"# Step -2 Data Preparation\n",
45+
"\n",
46+
"Defining task file for query similarity detection with QQP data\n",
47+
"\n",
48+
"```\n",
49+
"querysimilarity:\n",
50+
" model_type: BERT\n",
51+
" config_name: bert-base-uncased\n",
52+
" dropout_prob: 0.2\n",
53+
" metrics:\n",
54+
" - classification_accuracy\n",
55+
" loss_type: CrossEntropyLoss\n",
56+
" class_num: 2\n",
57+
" task_type: SentencePairClassification\n",
58+
" file_names:\n",
59+
" - qqp_query_similarity_train.tsv\n",
60+
" - qqp_query_similarity_dev.tsv\n",
61+
" - qqp_query_similarity_test.tsv\n",
62+
"```"
63+
]
64+
},
65+
{
66+
"cell_type": "code",
67+
"execution_count": null,
68+
"metadata": {},
69+
"outputs": [],
70+
"source": [
71+
"!python ../../data_preparation.py \\\n",
72+
" --task_file 'tasks_file_qqp.yml' \\\n",
73+
" --data_dir '../../data' \\\n",
74+
" --max_seq_len 200"
75+
]
76+
},
77+
{
78+
"cell_type": "markdown",
79+
"metadata": {},
80+
"source": [
81+
"# Step -3 Running train"
82+
]
83+
},
84+
{
85+
"cell_type": "code",
86+
"execution_count": null,
87+
"metadata": {},
88+
"outputs": [],
89+
"source": [
90+
"!python ../../train.py \\\n",
91+
" --data_dir '../../data/bert-base-uncased_prepared_data' \\\n",
92+
" --task_file 'tasks_file_qqp.yml' \\\n",
93+
" --out_dir 'qqp_query_similarity_bert_base' \\\n",
94+
" --epochs 3 \\\n",
95+
" --train_batch_size 8 \\\n",
96+
" --eval_batch_size 16 \\\n",
97+
" --grad_accumulation_steps 2 \\\n",
98+
" --log_per_updates 50 \\\n",
99+
" --eval_while_train True \\\n",
100+
" --test_while_train True \\\n",
101+
" --max_seq_len 200 \\\n",
102+
" --silent True "
103+
]
104+
},
105+
{
106+
"cell_type": "code",
107+
"execution_count": null,
108+
"metadata": {},
109+
"outputs": [],
110+
"source": []
111+
}
112+
],
113+
"metadata": {
114+
"kernelspec": {
115+
"display_name": "Python 3",
116+
"language": "python",
117+
"name": "python3"
118+
},
119+
"language_info": {
120+
"codemirror_mode": {
121+
"name": "ipython",
122+
"version": 3
123+
},
124+
"file_extension": ".py",
125+
"mimetype": "text/x-python",
126+
"name": "python",
127+
"nbconvert_exporter": "python",
128+
"pygments_lexer": "ipython3",
129+
"version": "3.7.3"
130+
}
131+
},
132+
"nbformat": 4,
133+
"nbformat_minor": 4
134+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
querysimilarity:
2+
model_type: BERT
3+
config_name: bert-base-uncased
4+
dropout_prob: 0.2
5+
metrics:
6+
- classification_accuracy
7+
loss_type: CrossEntropyLoss
8+
class_num: 2
9+
task_type: SentencePairClassification
10+
file_names:
11+
- qqp_query_similarity_train.tsv
12+
- qqp_query_similarity_dev.tsv
13+
- qqp_query_similarity_test.tsv
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
sample_transform:
2+
transform_func: qqp_query_similarity_to_tsv
3+
read_file_names:
4+
- quora_duplicate_questions.tsv
5+
read_dir : qqp_data
6+
save_dir: ../../data

0 commit comments

Comments
 (0)