Skip to content

Commit ac84075

Browse files
authored
remove the pandas dependency in the taskflow (#803)
1 parent 5855111 commit ac84075

File tree

1 file changed

+15
-9
lines changed

1 file changed

+15
-9
lines changed

paddlenlp/taskflow/text2knowledge.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@
1818
import math
1919
import os
2020
import copy
21+
import csv
2122
import itertools
2223

2324
import paddle
2425
import paddle.nn as nn
25-
import pandas as pd
2626
from ..datasets import MapDataset, load_dataset
2727
from ..data import Stack, Pad, Tuple
2828
from ..transformers import ErnieCtmWordtagModel, ErnieCtmTokenizer
@@ -207,15 +207,21 @@ def _load_labels(tag_path):
207207

208208
@staticmethod
209209
def _load_schema(schema_path):
210-
schema_df = pd.read_csv(schema_path, sep="\t", encoding="utf8")
211210
schema = {}
212-
for idx in range(schema_df.shape[0]):
213-
if not isinstance(schema_df["type-1"][idx], float):
214-
schema[schema_df["type-1"][idx]] = "root"
215-
if not isinstance(schema_df["type-2"][idx], float):
216-
schema[schema_df["type-2"][idx]] = schema_df["type-1"][idx]
217-
if not isinstance(schema_df["type-3"][idx], float):
218-
schema[schema_df["type-3"][idx]] = schema_df["type-2"][idx]
211+
with open(schema_path, encoding="utf8") as f:
212+
reader = csv.reader(f)
213+
first_line = True
214+
for line in reader:
215+
if first_line:
216+
first_line = False
217+
continue
218+
items = line[0].split("\t")
219+
if len(items[0]):
220+
schema[items[0]] = "root"
221+
if len(items[1]):
222+
schema[items[1]] = items[0]
223+
if len(items[2]):
224+
schema[items[2]] = items[1]
219225
return schema
220226

221227
@staticmethod

0 commit comments

Comments
 (0)