Merge branch 'master' into peter/documentation

Peter Izsak · web-flow · commit 0b8c743c1d2b · 2019-08-28T12:16:58.000+03:00
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,28 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: 'bug: [short bug description]'
+labels: bug
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+Model/procedure: what model or procedure were you running?
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. 
+2. 
+3. 
+4. 
+
+**Expected behavior**
+
+**Environment setup: **
+ - OS (Linux/Mac OS):
+ - Python version:
+- Backend:
+
+**Additional context**
diff --git a/.github/ISSUE_TEMPLATE/feature-request-improvement.md b/.github/ISSUE_TEMPLATE/feature-request-improvement.md
@@ -0,0 +1,16 @@
+---
+name: Feature request/improvement
+about: Suggest an idea for this project or an improvement to an existing feature
+title: 'improvement: [short improvement/feature topic]'
+labels: ''
+assignees: ''
+
+---
+
+**Is your feature request related to a model or library usage/experience? Please describe.**
+
+**Describe why the feature addition/improvement should be added.**
+
+**Describe the solution you'd like**
+
+**Additional context**
diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md
@@ -0,0 +1,10 @@
+---
+name: Question
+about: template for asking questions regarding models or library usage
+title: 'question: [question topic]'
+labels: question
+assignees: ''
+
+---
+
+<!-- a question regarding models or library usage -->
diff --git a/.jenkins/Jenkinsfile b/.jenkins/Jenkinsfile
@@ -2,31 +2,6 @@
 * NLP Architect Jenkinsfile
 */
 
-/* 
-// Constant Variables for Build
-static final Map slackColorMap      = [
-    'FAILURE': 'danger',
-    'UNSTABLE': 'warning',
-    'SUCCESS': 'good'
-]
-
-// Common Closures for Build
-def slackStartMessage = {
-    try {
-        slackSend message: "Build ${env.JOB_NAME} started (<${env.BUILD_URL}|LINK>)"
-    } catch (err) {
-        echo "BUILD WARNING - Failed to send Slack Message: ${err}"
-    }
-}
-def slackEndMessage = {
-    try {
-        slackSend color: slackColorMap[currentBuild.currentResult], message: "Build ${env.JOB_NAME} finished with result: ${currentBuild.currentResult} (<${env.BUILD_URL}|LINK>)"
-    } catch (err) {
-        echo "BUILD WARNING - Failed to send Slack Message: ${err}"
-    }
-}
-*/
-
 pipeline {
     agent {
         node {
@@ -57,6 +32,7 @@ pipeline {
                 pip3 install -r requirements.txt
                 pip3 install -r dev-requirements.txt
                 pip3 install -e .
+                python -m spacy download en
                 """
             }
         }
@@ -68,7 +44,7 @@ pipeline {
                         . .nlp_architect_env/bin/activate
                         export LC_ALL=en_US.UTF-8
                         export LANG=en_US.UTF-8
-                        nlp_architect test
+                        pytest ./tests -rs -vv --cov=nlp_architect --junit-xml=pytest_unit.xml
                         """
                     }
                     post {
@@ -86,7 +62,7 @@ pipeline {
                     steps {
                         sh """
                         . .nlp_architect_env/bin/activate
-                        nlp_architect style --only-flake
+                        ./scripts/check_flake.sh
                         """
                     }
                     post {
@@ -101,7 +77,7 @@ pipeline {
                     steps {
                         sh """
                         . .nlp_architect_env/bin/activate
-                        nlp_architect style --only-pylint
+                        ./scripts/check_pylint.sh
                         """
                     }
                     post {
@@ -126,8 +102,8 @@ pipeline {
                 currentBuild.result = 'FAILURE'
             }
         }
-        cleanup {
+        always {
             deleteDir()
         }
     }
-}
+}
diff --git a/README.md b/README.md
@@ -8,19 +8,13 @@ A Deep Learning NLP/NLU library by <a href="https://www.intel.ai/research/">Inte
 </h2>
 <p align="center">
     <a href="https://github.com/NervanaSystems/nlp-architect/blob/master/LICENSE">
-        <img alt="GitHub" src="https://img.shields.io/github/license/NervanaSystems/nlp-architect.svg?color=blue&style=popout">
+        <img alt="GitHub" src="https://img.shields.io/github/license/NervanaSystems/nlp-architect.svg?color=blue&style=flat-square">
     </a>
     <a href="http://nlp_architect.nervanasys.com">
-        <img alt="Website" src="https://img.shields.io/website/http/nlp_architect.nervanasys.com.svg?down_color=red&down_message=offline&style=popout&up_message=online">
-    </a>
-    <a href="https://doi.org/10.5281/zenodo.1477518">
-        <img src="https://zenodo.org/badge/DOI/10.5281/zenodo.1477518.svg" alt="DOI">
-    </a>
-    <a href="https://pepy.tech/project/nlp-architect">
-        <img src="https://pepy.tech/badge/nlp-architect"/>
+        <img alt="Website" src="https://img.shields.io/website/http/nlp_architect.nervanasys.com.svg?down_color=red&down_message=offline&style=flat-square&up_message=online">
     </a>
     <a href="https://github.com/NervanaSystems/nlp-architect/blob/master/LICENSE">
-        <img alt="GitHub release" src="https://img.shields.io/github/release/NervanaSystems/nlp-architect.svg?style=popout">
+        <img alt="GitHub release" src="https://img.shields.io/github/release/NervanaSystems/nlp-architect.svg?style=flat-square">
     </a>
 </p>
 
diff --git a/examples/reading_comprehension/prepare_data.py b/examples/reading_comprehension/prepare_data.py
@@ -23,6 +23,7 @@
 from nlp_architect.utils.io import validate_existing_directory
 from nlp_architect.utils.text import SpacyInstance
 
+sep = os.sep
 PAD = "<pad>"
 SOS = "<sos>"
 UNK = "<unk>"
@@ -72,10 +73,10 @@ def get_glove_matrix(vocabulary_list, download_path):
     """
     Function to obtain preprocessed glove embeddings matrix
     """
-    save_file_name = download_path + "glove.trimmed.300"
+    save_file_name = download_path + sep + "glove.trimmed.300"
     if not os.path.exists(save_file_name + ".npz"):
         vocab_len = len(vocabulary_list)
-        glove_path = os.path.join(download_path + "glove.6B.300d.txt")
+        glove_path = os.path.join(download_path + sep + "glove.6B.300d.txt")
         glove_matrix = np.zeros((vocab_len, 300))
         count = 0
         with open(glove_path) as f:
@@ -253,13 +254,13 @@ def get_ids_list(data_list, vocab):
     dev_para_ids = get_ids_list(dev_para, vocab_dict)
     dev_question_ids = get_ids_list(dev_question, vocab_dict)
 
-    final_data_dict = {"train.ids.context": train_para_ids,
-                       "train.ids.question": train_question_ids,
-                       "dev.ids.context": dev_para_ids,
-                       "dev.ids.question": dev_question_ids,
-                       "vocab.dat": vocab_list,
-                       "train.span": train_ans,
-                       "dev.span": dev_ans}
+    final_data_dict = {sep + "train.ids.context": train_para_ids,
+                       sep + "train.ids.question": train_question_ids,
+                       sep + "dev.ids.context": dev_para_ids,
+                       sep + "dev.ids.question": dev_question_ids,
+                       sep + "vocab.dat": vocab_list,
+                       sep + "train.span": train_ans,
+                       sep + "dev.span": dev_ans}
 
     print("writing data to files")
     write_to_file(final_data_dict, data_path)
diff --git a/nlp_architect/cli/__init__.py b/nlp_architect/cli/__init__.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 # ******************************************************************************
 import argparse
-import logging 
+import logging
 
 # register all procedures by importing
 import nlp_architect.procedures  # noqa: F401
@@ -28,6 +28,7 @@
                     datefmt='%m/%d/%Y %H:%M:%S',
                     level=logging.INFO)
 
+
 def run_cli():
     """ Run nlp_architect command line application
     """
diff --git a/nlp_architect/cli/cli_commands.py b/nlp_architect/cli/cli_commands.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ******************************************************************************
+# CLI definition
 from argparse import _SubParsersAction
 
 from nlp_architect.cli.cmd_registry import CMD_REGISTRY
@@ -34,11 +35,6 @@ def generic_cmd(cmd_name: str, subtitle: str, description: str, subparsers: _Sub
     parser.set_defaults(func=lambda _: parser.print_help())
 
 
-"""
-cli commands definition
-"""
-
-
 def cli_train_cmd(subparsers: _SubParsersAction):
     generic_cmd('train',
                 'Available models',
diff --git a/nlp_architect/models/matchlstm_ansptr.py b/nlp_architect/models/matchlstm_ansptr.py
@@ -525,15 +525,15 @@ def inference_mode(self, session, valid, vocab_tuple, num_examples, dropout=1.0,
             else:
                 # Print Paragraph
                 print("\n")
-                print("Paragraph Number:", idx)
-                test_paragraph = [vocab_forward[ele] for ele in valid[idx][0] if ele != 0]
+                print("Paragraph Number AA:", idx)
+                test_paragraph = [vocab_forward[ele].replace(" ", "") for ele in valid[idx][0] if ele != 0]
                 para_string = " ".join(map(str, test_paragraph))
-                print(re.sub(r'\s([?.!,"](?:\s|$))', r'\1', para_string))
+                print(para_string)
 
                 # Print corresponding Question
-                test_question = [vocab_forward[ele] for ele in valid[idx][1] if ele != 0]
+                test_question = [vocab_forward[ele].replace(" ", "") for ele in valid[idx][1] if ele != 0]
                 ques_string = " ".join(map(str, test_question))
-                print("Question:", re.sub(r'\s([?.!"",])', r'\1', ques_string))
+                print("Question:", ques_string)
                 question_ids = valid[idx][1]
                 question_length = valid[idx][3]
                 ques_mask = valid[idx][6]
@@ -555,7 +555,7 @@ def inference_mode(self, session, valid, vocab_tuple, num_examples, dropout=1.0,
             answer_ind = valid[idx][0][start_idx[0]:end_idx[0] + 1]
 
             # Print answer
-            req_ans = [vocab_forward[ele] for ele in answer_ind if ele != 0]
+            req_ans = [vocab_forward[ele].replace(" ", "") for ele in answer_ind if ele != 0]
             ans_string = " ".join(map(str, req_ans))
             answer = re.sub(r'\s([?.!",])', r'\1', ans_string)
             print("Answer:", answer)
diff --git a/nlp_architect/models/tagging.py b/nlp_architect/models/tagging.py
@@ -297,7 +297,8 @@ def evaluate(self, data_set: DataLoader):
                 logits = self.model(**inputs)
                 if 'labels' in inputs:
                     if self.use_crf:
-                        loss = -1.0 * self.crf(logits, inputs['labels'], mask=inputs['mask'] != 0.0)
+                        loss = -1.0 * self.crf(logits, inputs['labels'],
+                                               mask=inputs['mask'] != 0.0)
                     else:
                         loss_fn = CrossEntropyLoss(ignore_index=0)
                         loss = loss_fn(logits.view(-1, self.num_labels), inputs['labels'].view(-1))
diff --git a/nlp_architect/models/transformers/base_model.py b/nlp_architect/models/transformers/base_model.py
@@ -365,8 +365,8 @@ def _batch_mapper(self, batch):
         return mapping
 
     def evaluate_predictions(self, logits, label_ids):
-        raise Exception('evaluate_predictions method must be implemented in order to be used for '
-                        'dev/test set evaluation')
+        raise NotImplementedError('evaluate_predictions method must be implemented in order to'
+                                  'be used for dev/test set evaluation')
 
     def save_model_checkpoint(self, output_path: str, name: str):
         """
diff --git a/nlp_architect/models/transformers/sequence_classification.py b/nlp_architect/models/transformers/sequence_classification.py
@@ -174,8 +174,7 @@ def convert_to_tensors(self,
             elif self.task_type == "regression":
                 all_label_ids = torch.tensor([f.label_id for f in features], dtype=torch.float)
             return TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
-        else:
-            return TensorDataset(all_input_ids, all_input_mask, all_segment_ids)
+        return TensorDataset(all_input_ids, all_input_mask, all_segment_ids)
 
     def inference(self, examples: List[SequenceClsInputExample], batch_size: int = 64):
         """
diff --git a/nlp_architect/nn/tensorflow/python/keras/utils/__init__.py b/nlp_architect/nn/tensorflow/python/keras/utils/__init__.py
@@ -13,4 +13,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ******************************************************************************
-from nlp_architect.nn.tensorflow.python.keras.utils.layer_utils import save_model, load_model
+# flake8: noqa
+from nlp_architect.nn.tensorflow.python.keras.utils.layer_utils import save_model, load_model
diff --git a/nlp_architect/nn/torch/__init__.py b/nlp_architect/nn/torch/__init__.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 # ******************************************************************************
 import random
+import time
 
 import numpy as np
 import torch
@@ -35,6 +36,8 @@ def setup_backend(no_cuda):
 def set_seed(seed, n_gpus=None):
     """set seed
     """
+    if seed == -1:
+        seed = int(time.time())
     random.seed(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
diff --git a/nlp_architect/nn/torch/layers/__init__.py b/nlp_architect/nn/torch/layers/__init__.py
@@ -13,4 +13,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ******************************************************************************
-from nlp_architect.nn.torch.layers.crf import CRF
+# flake8: noqa
+from nlp_architect.nn.torch.layers.crf import CRF
diff --git a/nlp_architect/procedures/transformers/glue.py b/nlp_architect/procedures/transformers/glue.py
@@ -146,23 +146,22 @@ def do_inference(args):
 def get_metric_fn(task_name):
     if task_name == "cola":
         return lambda p, l: {"mcc": matthews_corrcoef(p, l)}
-    elif task_name == "sst-2":
+    if task_name == "sst-2":
         return lambda p, l: {"acc": simple_accuracy(p, l)}
-    elif task_name == "mrpc":
+    if task_name == "mrpc":
         return acc_and_f1
-    elif task_name == "sts-b":
+    if task_name == "sts-b":
         return pearson_and_spearman
-    elif task_name == "qqp":
+    if task_name == "qqp":
         return acc_and_f1
-    elif task_name == "mnli":
+    if task_name == "mnli":
         return lambda p, l: {"acc": simple_accuracy(p, l)}
-    elif task_name == "mnli-mm":
+    if task_name == "mnli-mm":
         return lambda p, l: {"acc": simple_accuracy(p, l)}
-    elif task_name == "qnli":
+    if task_name == "qnli":
         return lambda p, l: {"acc": simple_accuracy(p, l)}
-    elif task_name == "rte":
+    if task_name == "rte":
         return lambda p, l: {"acc": simple_accuracy(p, l)}
-    elif task_name == "wnli":
+    if task_name == "wnli":
         return lambda p, l: {"acc": simple_accuracy(p, l)}
-    else:
-        raise KeyError(task_name)
+    raise KeyError(task_name)
diff --git a/nlp_architect/utils/io.py b/nlp_architect/utils/io.py
@@ -342,6 +342,7 @@ def line_count(file):
             count += 1
     return count
 
+
 def prepare_output_path(output_dir: str, overwrite_output_dir: str):
     """Create output directory or throw error if exists and overwrite_output_dir is false
     """
diff --git a/nlp_architect/utils/text.py b/nlp_architect/utils/text.py
@@ -75,8 +75,7 @@ def word_id(self, word):
         """
         if hasattr(self, "oov_id"):
             return self._vocab.get(word, self.oov_id)
-        else:
-            return self._vocab.get(word, None)
+        return self._vocab.get(word, None)
 
     def __getitem__(self, item):
         """
diff --git a/pylint.out b/pylint.out
diff --git a/pylint.txt b/pylint.txt
diff --git a/pylintrc b/pylintrc
@@ -151,7 +151,8 @@ disable=print-statement,
         too-many-branches,
         c-extension-no-member,
         logging-format-interpolation,
-        arguments-differ
+        arguments-differ,
+        not-callable
 
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option
diff --git a/scripts/check_flake.sh b/scripts/check_flake.sh
diff --git a/scripts/check_pylint.sh b/scripts/check_pylint.sh