Skip to content

Commit ec2333e

Browse files
authored
Add the knowledgemining task update the document for the Taskflow (PaddlePaddle#1130)
* update the document for the taskflow and add the taskflow of knowledge_minin * update the document for the taskflow in main document
1 parent 522f279 commit ec2333e

File tree

9 files changed

+622
-442
lines changed

9 files changed

+622
-442
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ tag("第十四届全运会在西安举办")
7070
# 命名实体识别
7171
ner = Taskflow("ner")
7272
ner("《孤女》是2010年九州出版社出版的小说,作者是余兼羽")
73-
>>> [{'text': '《孤女》是2010年九州出版社出版的小说,作者是余兼羽', 'items': [{'item': '《', 'offset': 0, 'wordtag_label': 'w', 'length': 1}, {'item': '孤女', 'offset': 1, 'wordtag_label': '作品类_实体', 'length': 2}, {'item': '》', 'offset': 3, 'wordtag_label': 'w', 'length': 1}, {'item': '是', 'offset': 4, 'wordtag_label': '肯定词', 'length': 1}, {'item': '2010年', 'offset': 5, 'wordtag_label': '时间类', 'length': 5}, {'item': '九州出版社', 'offset': 10, 'wordtag_label': '组织机构类', 'length': 5}, {'item': '出版', 'offset': 15, 'wordtag_label': '场景事件', 'length': 2}, {'item': '的', 'offset': 17, 'wordtag_label': '助词', 'length': 1}, {'item': '小说', 'offset': 18, 'wordtag_label': '作品类_概念', 'length': 2}, {'item': ',', 'offset': 20, 'wordtag_label': 'w', 'length': 1}, {'item': '作者', 'offset': 21, 'wordtag_label': '人物类_概念', 'length': 2}, {'item': '是', 'offset': 23, 'wordtag_label': '肯定词', 'length': 1}, {'item': '余兼羽', 'offset': 24, 'wordtag_label': '人物类_实体', 'length': 3}]}]
73+
>>> [('', 'w'), ('孤女', '作品类_实体'), ('', 'w'), ('', '肯定词'), ('2010年', '时间类'), ('九州出版社', '组织机构类'), ('出版', '场景事件'), ('', '助词'), ('小说', '作品类_概念'), ('', 'w'), ('作者', '人物类_概念'), ('', '肯定词'), ('余兼羽', '人物类_实体')]
7474

7575
# 句法分析
7676
ddp = Taskflow("dependency_parsing")

docs/model_zoo/taskflow.md

Lines changed: 55 additions & 24 deletions
Large diffs are not rendered by default.

examples/text_to_knowledge/wordtag/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,11 @@ Term-Linking示例程序可以对无标签数据可以启动模型预测, 例如
5656

5757
```python
5858
from paddlenlp import Taskflow
59-
ner = Taskflow("ner", model="wordtag", linking=True)
60-
ner(["热梅茶是一道以梅子为主要原料制作的茶饮",
59+
wordtag = Taskflow("knowledge_mining", model="wordtag", linking=True)
60+
wordtag(["热梅茶是一道以梅子为主要原料制作的茶饮",
6161
"《孤女》是2010年九州出版社出版的小说,作者是余兼羽"])
6262
# Support the input text directly
63-
ner("热梅茶是一道以梅子为主要原料制作的茶饮")
63+
wordtag("热梅茶是一道以梅子为主要原料制作的茶饮")
6464

6565
```
6666
下面是运行WordTag工具后的知识链接的预测结果

examples/text_to_knowledge/wordtag/predict.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def parse_args():
3535
def do_predict(args):
3636
paddle.set_device(args.device)
3737
wordtag = Taskflow(
38-
"ner",
38+
"knowledge_mining",
3939
model="wordtag",
4040
batch_size=args.batch_size,
4141
max_seq_length=args.max_seq_len,

paddlenlp/taskflow/dependency_parsing.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
'''
6464
[{'word': ['百度', '是', '一家', '高科技', '公司'], 'head': ['2', '0', '5', '5', '2'], 'deprel': ['SBV', 'HED', 'ATT', 'ATT', 'VOB']}]
6565
'''
66+
6667
ddp = Taskflow("dependency_parsing", model="ddparser-ernie-gram-zh")
6768
ddp("百度是一家高科技公司")
6869
'''

paddlenlp/taskflow/knowledge_mining.py

Lines changed: 501 additions & 0 deletions
Large diffs are not rendered by default.

paddlenlp/taskflow/lexical_analysis.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
'''
4646
[{'text': 'LAC是个优秀的分词工具', 'segs': ['LAC', '是', '个', '优秀', '的', '分词', '工具'], 'tags': ['nz', 'v', 'q', 'a', 'u', 'n', 'n']}]
4747
'''
48+
4849
lac(["LAC是个优秀的分词工具", "三亚是一个美丽的城市"])
4950
'''
5051
[{'text': 'LAC是个优秀的分词工具', 'segs': ['LAC', '是', '个', '优秀', '的', '分词', '工具'], 'tags': ['nz', 'v', 'q', 'a', 'u', 'n', 'n']},

paddlenlp/taskflow/named_entity_recognition.py

Lines changed: 42 additions & 411 deletions
Large diffs are not rendered by default.

paddlenlp/taskflow/taskflow.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
import paddle
1818
from ..utils.tools import get_env_device
1919
from ..transformers import ErnieCtmWordtagModel, ErnieCtmTokenizer
20-
from .named_entity_recognition import WordTagTask
20+
from .knowledge_mining import WordTagTask
21+
from .named_entity_recognition import NERTask
2122
from .sentiment_analysis import SentaTask, SkepTask
2223
from .lexical_analysis import LacTask
2324
from .word_segmentation import WordSegmentationTask
@@ -31,10 +32,24 @@
3132
warnings.simplefilter(action='ignore', category=Warning, lineno=0, append=False)
3233

3334
TASKS = {
34-
"ner": {
35+
"knowledge_mining": {
3536
"models": {
3637
"wordtag": {
3738
"task_class": WordTagTask,
39+
"log_name": 'knowledge_mining_wordtag',
40+
"linking": True,
41+
}
42+
},
43+
"default": {
44+
"model": "wordtag"
45+
}
46+
},
47+
"ner": {
48+
"models": {
49+
"wordtag": {
50+
"task_class": NERTask,
51+
"log_name": 'ner_wordtag',
52+
"linking": False,
3853
}
3954
},
4055
"default": {

0 commit comments

Comments
 (0)