PaddlePaddle
diff --git a/‎paddlenlp/taskflow/sentiment_analysis.py‎
Lines changed: 63 additions & 17 deletions b/‎paddlenlp/taskflow/sentiment_analysis.py‎
Lines changed: 63 additions & 17 deletions
diff --git a/‎paddlenlp/taskflow/task.py‎
Lines changed: 9 additions & 1 deletion b/‎paddlenlp/taskflow/task.py‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎paddlenlp/taskflow/taskflow.py‎
Lines changed: 20 additions & 4 deletions b/‎paddlenlp/taskflow/taskflow.py‎
Lines changed: 20 additions & 4 deletions
@@ -23,9 +23,9 @@
 import paddle
 import paddle.nn as nn
 import paddle.nn.functional as F
-from ..datasets import MapDataset
+from ..datasets import load_dataset, MapDataset
 from ..data import Stack, Pad, Tuple, Vocab, JiebaTokenizer
-from .utils import download_file
+from .utils import download_file, add_docstrings
 from .model import BoWModel, LSTMModel
 from .task import Task
 
@@ -42,19 +42,57 @@
     ]
 }
 
+usage = r"""
+           from paddlenlp.taskflow import TaskFlow 
+
+           task = TaskFlow("sentiment_analysis")
+           task("怀着十分激动的心情放映，可是看着看着发现，在放映完毕后，出现一集米老鼠的动画片")
+           '''
+           [{'text': '怀着十分激动的心情放映，可是看着看着发现，在放映完毕后，出现一集米老鼠的动画片', 'label': 'positive'}]
+           '''
+
+           task = TaskFlow("sentiment_analysis", network="lstm")
+           task("作为老的四星酒店，房间依然很整洁，相当不错。机场接机服务很好，可以在车上办理入住手续，节省时间。")
+           '''
+           [{'text': '作为老的四星酒店，房间依然很整洁，相当不错。机场接机服务很好，可以在车上办理入住手续，节省时间。', 'label': 'positive'}]
+           '''
+
+           task = TaskFlow("sentiment_analysis", lazy_load="True")
+           task("作为老的四星酒店，房间依然很整洁，相当不错。机场接机服务很好，可以在车上办理入住手续，节省时间。")
+           '''
+           [{'text': '作为老的四星酒店，房间依然很整洁，相当不错。机场接机服务很好，可以在车上办理入住手续，节省时间。', 'label': 'positive'}]
+           '''
+
+           task = TaskFlow("sentiment_analysis", batch_size=2)
+           task(["作为老的四星酒店，房间依然很整洁，相当不错。机场接机服务很好，可以在车上办理入住手续，节省时间。", 
+                 "怀着十分激动的心情放映，可是看着看着发现，在放映完毕后，出现一集米老鼠的动画片",
+                 "这个宾馆比较陈旧了，特价的房间也很一般。总体来说一般",
+                 "２００１年来福州就住在这里，这次感觉房间就了点，温泉水还是有的．总的来说很满意．早餐简单了些．"])
+           '''
+           [{'text': '作为老的四星酒店，房间依然很整洁，相当不错。机场接机服务很好，可以在车上办理入住手续，节省时间。', 'label': 'positive'}, {'text': '怀着十分激动的心情放映，可是看着看着发现，在放映完毕后，出现一集米老鼠的动画片', 'label': 'negative'}, {'text': '这个宾馆比较陈旧了，特价的房间也很一般。总体来说一般', 'label': 'negative'}, {'text': '２００１年来福州就住在这里，这次感觉房间就了点，温泉水还是有的．总的来说很满意．早餐简单了些．', 'label': 'positive'}]
+           '''
+           """
+
 
 class SentaTask(Task):
-    """The one task of sentiment_analysis which use the RNN or Bow model to analysis the input text. 
+    """
+    Sentiment analysis task using RNN or BOW model to predict sentiment opinion on Chinese text. 
+    Args:
+        task(string): The name of task.
+        model(string): The model name in the task.
+        kwargs (dict, optional): Additional keyword arguments passed along to the specific task. 
     """
 
     def __init__(self, task, model, **kwargs):
         super().__init__(task=task, model=model, **kwargs)
         self._tokenizer = self._construct_tokenizer(model)
         self._model_instance = self._construct_model(model)
         self._label_map = {0: 'negative', 1: 'positive'}
+        self._usage = usage
 
     def _construct_model(self, model):
-        """Construct the inference model for the predictor.
+        """
+        Construct the inference model for the predictor.
         """
         vocab_size = self.kwargs['vocab_size']
         pad_token_id = self.kwargs['pad_token_id']
@@ -90,7 +128,8 @@ def _construct_model(self, model):
         return model
 
     def _construct_tokenizer(self, model):
-        """Construct the tokenizer for the predictor.
+        """
+        Construct the tokenizer for the predictor.
         """
         full_name = download_file(self.model, "senta_word_dict.txt",
                                   URLS['senta_vocab'][0],
@@ -119,21 +158,26 @@ def _preprocess(self, inputs, padding=True, add_special_tokens=True):
             raise TypeError(
                 "Invalid inputs, input text should be str or list of str, {type(inputs)} found!"
             )
+        # Get the config from the kwargs
+        batch_size = self.kwargs[
+            'batch_size'] if 'batch_size' in self.kwargs else 1
+        num_workers = self.kwargs[
+            'num_workers'] if 'num_workers' in self.kwargs else 0
+        lazy_load = self.kwargs[
+            'lazy_load'] if 'lazy_load' in self.kwargs else False
         infer_data = []
-        for i in range(0, len(inputs)):
-            ids = self._tokenizer.encode(inputs[i])
-            lens = len(ids)
-            infer_data.append([ids, lens])
-        infer_ds = MapDataset(infer_data)
+
+        def read(inputs):
+            for input_data in inputs:
+                ids = self._tokenizer.encode(input_data)
+                lens = len(ids)
+                yield ids, lens
+
+        infer_ds = load_dataset(read, inputs=inputs, lazy=lazy_load)
         batchify_fn = lambda samples, fn=Tuple(
             Pad(axis=0, pad_val=self._tokenizer.vocab.token_to_idx.get('[PAD]', 0)),  # input_ids
             Stack(dtype='int64'),  # seq_len
         ): fn(samples)
-
-        batch_size = self.kwargs[
-            'batch_size'] if 'batch_size' in self.kwargs else 1
-        num_workers = self.kwargs[
-            'num_workers'] if 'num_workers' in self.kwargs else 0
         infer_data_loader = paddle.io.DataLoader(
             infer_ds,
             collate_fn=batchify_fn,
@@ -147,7 +191,8 @@ def _preprocess(self, inputs, padding=True, add_special_tokens=True):
         return outputs
 
     def _run_model(self, inputs):
-        """Run the task model from the outputs of the `_tokenize` function. 
+        """
+        Run the task model from the outputs of the `_tokenize` function. 
         """
         results = []
         with paddle.no_grad():
@@ -163,7 +208,8 @@ def _run_model(self, inputs):
         return inputs
 
     def _postprocess(self, inputs):
-        """The model output is allways the logits and pros, this function will convert the model output to raw text.
+        """
+        The model output is allways the logits and pros, this function will convert the model output to raw text.
         """
         final_results = []
         for text, label in zip(inputs['text'], inputs['result']):
 
@@ -18,14 +18,16 @@
 
 
 class Task(metaclass=abc.ABCMeta):
-    """ The meta classs of task in TaskFlow. The meta class has the five abstract function,
+    """
+    The meta classs of task in TaskFlow. The meta class has the five abstract function,
         the subclass need to inherit from the meta class.
     """
 
     def __init__(self, model, task, **kwargs):
         self.model = model
         self.task = task
         self.kwargs = kwargs
+        self._usage = ""
 
     @abstractmethod
     def _construct_model(self, model):
@@ -59,6 +61,12 @@ def _postprocess(self, inputs):
         The model output is allways the logits and pros, this function will convert the model output to raw text.
         """
 
+    def help(self):
+        """
+        Return the usage message of the current task.
+        """
+        print("Examples:\n{}".format(self._usage))
+
     def __call__(self, *args):
         inputs = self._preprocess(*args)
         outputs = self._run_model(inputs)
 
@@ -17,7 +17,7 @@
 import paddle
 from ..utils.tools import get_env_device
 from ..transformers import ErnieCtmWordtagModel, ErnieCtmTokenizer
-from .text2knowledge import Text2KnowledgeTask
+from .text2knowledge import WordTagTask
 from .sentiment_analysis import SentaTask
 
 warnings.simplefilter(action='ignore', category=Warning, lineno=0, append=False)
@@ -26,7 +26,7 @@
     "text2knowledge": {
         "models": {
             "wordtag": {
-                "task_class": Text2KnowledgeTask,
+                "task_class": WordTagTask,
             }
         },
         "default": {
@@ -51,7 +51,7 @@ class TaskFlow(object):
     The TaskFlow is the end2end inferface that could convert the raw text to model result, and decode the model result to task result. The main functions as follows:
         1) Convert the raw text to task result.
         2) Convert the model to the inference model.
-        3) Offer the usesage and help message.
+        3) Offer the usage and help message.
     Args:
         task (str): The task name for the TaskFlow, and get the task class from the name.
         model (str, optional): The model name in the task, if set None, will use the default model.  
@@ -84,10 +84,26 @@ def __init__(self, task, model=None, device_id=0, **kwargs):
         task_class = TASKS[self.task]['models'][self.model]['task_class']
         self.task_instance = task_class(
             model=self.model, task=self.task, **self.kwargs)
+        task_list = TASKS.keys()
+        TaskFlow.task_list = task_list
 
     def __call__(self, *inputs):
+        """
+        The main work function in the taskflow.
+        """
         results = self.task_instance(inputs)
         return results
 
     def help(self):
-        pass
+        """
+        Return the task usage message.
+        """
+        return self.task_instance.help()
+
+    @staticmethod
+    def tasks():
+        """
+        Return the available task list.
+        """
+        task_list = list(TASKS.keys())
+        return task_list