Skip to content
This repository was archived by the owner on Nov 8, 2022. It is now read-only.

Commit c1b287e

Browse files
authored
Merge pull request #212 from NervanaSystems/peter/missing_docstrings
Fixed docstrings
2 parents e3741d3 + f6be5dd commit c1b287e

File tree

5 files changed

+37
-16
lines changed

5 files changed

+37
-16
lines changed

doc/source/api.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,6 @@ these will be placed into a central repository.
9292
nlp_architect.data.intent_datasets.IntentDataset
9393
nlp_architect.data.intent_datasets.TabularIntentDataset
9494
nlp_architect.data.intent_datasets.SNIPS
95-
nlp_architect.data.sequential_tagging.NamedEntityDataset
95+
nlp_architect.data.sequential_tagging.SequentialTaggingDataset
9696
nlp_architect.data.babi_dialog.BABI_Dialog
9797
nlp_architect.data.wikimovies.WIKIMOVIES

doc/source/ner_crf.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ In the above format each sentence is separated by an empty line. Each line consi
4949
Data loader
5050
-----------
5151

52-
Loading data into the model can be done using the ``NamedEntityDataset`` data loader from ``nlp_architect.data.sequential_tagging`` package, which can be used with the preprared train and test data sets described above.
52+
Loading data into the model can be done using the ``SequentialTaggingDataset`` data loader from ``nlp_architect.data.sequential_tagging`` package, which can be used with the preprared train and test data sets described above.
5353

5454
The data loader returns 2 numpy matrices:
5555
1. sparse word representation of the sentence words

examples/ner/train.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
from keras.utils import to_categorical
2525
from nlp_architect.contrib.keras.callbacks import ConllCallback
26-
from nlp_architect.data.sequential_tagging import NamedEntityDataset
26+
from nlp_architect.data.sequential_tagging import SequentialTaggingDataset
2727
from nlp_architect.models.ner_crf import NERCRF
2828
from nlp_architect.utils.io import validate_existing_filepath, validate_parent_exists, validate
2929
from nlp_architect.utils.metrics import get_conll_scores
@@ -88,10 +88,10 @@ def validate_input_args(args):
8888
args = read_input_args()
8989

9090
# load dataset and parameters
91-
dataset = NamedEntityDataset(args.train_file, args.test_file,
92-
max_sentence_length=args.sentence_length,
93-
max_word_length=args.word_length,
94-
tag_field_no=args.tag_num)
91+
dataset = SequentialTaggingDataset(args.train_file, args.test_file,
92+
max_sentence_length=args.sentence_length,
93+
max_word_length=args.word_length,
94+
tag_field_no=args.tag_num)
9595

9696
# get the train and test data sets
9797
x_train, x_char_train, y_train = dataset.train

nlp_architect/contrib/neon/layers.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,9 @@ def bprop(self, error, alpha=1.0, beta=0.0):
234234

235235

236236
class TimeDistBiLSTM(BiLSTM):
237+
"""
238+
A Bi-directional LSTM layer that supports time step output of the LSTM layer.
239+
"""
237240
def __init__(self, output_size, init, init_inner=None, activation=None,
238241
gate_activation=None, reset_cells=False, reset_freq=0,
239242
split_inputs=False, name=None):

nlp_architect/data/sequential_tagging.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,18 @@
2525
from nlp_architect.utils.text import Vocabulary
2626

2727

28-
class NamedEntityDataset(object):
28+
class SequentialTaggingDataset(object):
29+
"""
30+
Sequential tagging dataset loader.
31+
Loads train/test files with tabular separation.
32+
33+
Args:
34+
train_file (str): path to train file
35+
test_file (str): path to test file
36+
max_sentence_length (int, optional): max sentence length
37+
max_word_length (int, optional): max word length
38+
tag_field_no (int, optional): index of column to use a y-samples
39+
"""
2940
def __init__(self,
3041
train_file,
3142
test_file,
@@ -39,12 +50,12 @@ def __init__(self,
3950
self.tf = tag_field_no
4051

4152
self.vocabs = {'token': Vocabulary(2), # 0=pad, 1=unk
42-
'char': Vocabulary(2), # 0=pad, 1=unk
43-
'tag': Vocabulary(1)} # 0=pad
53+
'char': Vocabulary(2), # 0=pad, 1=unk
54+
'tag': Vocabulary(1)} # 0=pad
4455

4556
self.data = {}
4657
for f in self.files:
47-
raw_sentences = self.read_file(self.files[f])
58+
raw_sentences = self._read_file(self.files[f])
4859
word_vecs = []
4960
char_vecs = []
5061
tag_vecs = []
@@ -69,42 +80,49 @@ def __init__(self,
6980

7081
@property
7182
def y_labels(self):
83+
"""return y labels"""
7284
return self.vocabs['tag'].vocab
7385

7486
@property
7587
def word_vocab(self):
88+
"""words vocabulary"""
7689
return self.vocabs['token'].vocab
7790

7891
@property
7992
def char_vocab(self):
93+
"""characters vocabulary"""
8094
return self.vocabs['char'].vocab
8195

8296
@property
8397
def word_vocab_size(self):
98+
"""word vocabulary size"""
8499
return len(self.vocabs['token']) + 2
85100

86101
@property
87102
def char_vocab_size(self):
103+
"""character vocabulary size"""
88104
return len(self.vocabs['char']) + 2
89105

90106
@property
91107
def train(self):
108+
"""Get the train set"""
92109
return self.data['train']
93110

94111
@property
95112
def test(self):
113+
"""Get the test set"""
96114
return self.data['test']
97115

98-
def read_file(self, path):
116+
def _read_file(self, path):
99117
with open(path, encoding='utf-8') as fp:
100118
data = fp.readlines()
101119
data = [d.strip() for d in data]
102120
data = [d for d in data if 'DOCSTART' not in d]
103-
sentences = self.split_into_sentences(data)
104-
parsed_sentences = [self.parse_sentence(s) for s in sentences if len(s) > 0]
121+
sentences = self._split_into_sentences(data)
122+
parsed_sentences = [self._parse_sentence(s) for s in sentences if len(s) > 0]
105123
return parsed_sentences
106124

107-
def parse_sentence(self, sentence):
125+
def _parse_sentence(self, sentence):
108126
tokens = []
109127
tags = []
110128
for line in sentence:
@@ -118,7 +136,7 @@ def parse_sentence(self, sentence):
118136
return tokens, tags
119137

120138
@staticmethod
121-
def split_into_sentences(file_lines):
139+
def _split_into_sentences(file_lines):
122140
sents = []
123141
s = []
124142
for line in file_lines:

0 commit comments

Comments
 (0)