12
12
# See the License for the specific language governing permissions and
13
13
# limitations under the License.
14
14
15
+ import importlib
16
+
15
17
import paddle
16
18
import paddle .fluid .core as core
17
19
import paddle .nn as nn
18
20
from paddle .fluid .layer_helper import LayerHelper
19
21
from paddle .fluid .framework import in_dygraph_mode
20
22
from paddlenlp .utils .downloader import get_path_from_url
21
23
from paddlenlp .transformers import BertTokenizer , ErnieTokenizer , RobertaTokenizer
22
- from paddle import _C_ops
24
+ from paddlenlp . utils . log import logger
23
25
24
26
__all__ = ["to_tensor" , "to_vocab_buffer" , "FasterTokenizer" ]
25
27
@@ -77,6 +79,15 @@ class FasterTokenizer(nn.Layer):
77
79
78
80
def __init__ (self , vocab , do_lower_case = False , is_split_into_words = False ):
79
81
super (FasterTokenizer , self ).__init__ ()
82
+
83
+ try :
84
+ self .mod = importlib .import_module ("paddle._C_ops" )
85
+ except Exception as e :
86
+ logger .warning (
87
+ f"The paddlepaddle version is { paddle .__version__ } , not the latest. "
88
+ "Please upgrade the paddlepaddle package (>= 2.2.1)." )
89
+ self .mod = importlib .import_module ("paddle.fluid.core.ops" )
90
+
80
91
vocab_buffer = to_vocab_buffer (vocab , "vocab" )
81
92
self .register_buffer ("vocab" , vocab_buffer , persistable = True )
82
93
@@ -94,11 +105,12 @@ def forward(self,
94
105
if text_pair is not None :
95
106
if isinstance (text_pair , list ) or isinstance (text_pair , tuple ):
96
107
text_pair = to_tensor (list (text_pair ))
97
- input_ids , seg_ids = _C_ops .faster_tokenizer (
108
+ input_ids , seg_ids = self . mod .faster_tokenizer (
98
109
self .vocab , text , text_pair , "do_lower_case" ,
99
110
self .do_lower_case , "max_seq_len" , max_seq_len ,
100
111
"pad_to_max_seq_len" , pad_to_max_seq_len , "is_split_into_words" ,
101
112
self .is_split_into_words )
113
+
102
114
return input_ids , seg_ids
103
115
104
116
attrs = {
0 commit comments