@@ -242,7 +242,7 @@ def __init__(
242242 self .replacement_char = replacement_char
243243 self .input_encoding = input_encoding
244244 self .output_encoding = output_encoding
245- self .vocabulary_size = vocabulary_size
245+ self ._vocabulary_size = vocabulary_size
246246
247247 def get_config (self ):
248248 config = super ().get_config ()
@@ -255,15 +255,15 @@ def get_config(self):
255255 "replacement_char" : self .replacement_char ,
256256 "input_encoding" : self .input_encoding ,
257257 "output_encoding" : self .output_encoding ,
258- "vocabulary_size" : self .vocabulary_size ,
258+ "vocabulary_size" : self ._vocabulary_size ,
259259 }
260260 )
261261 return config
262262
263263 def vocabulary_size (self ) -> int :
264264 """Get the size of the tokenizer vocabulary. None implies no vocabulary
265265 size was provided"""
266- return self .vocabulary_size
266+ return self ._vocabulary_size
267267
268268 def tokenize (self , inputs ):
269269 if not isinstance (inputs , (tf .Tensor , tf .RaggedTensor )):
@@ -299,8 +299,8 @@ def tokenize(self, inputs):
299299
300300 # Optionally clamps the output code point values to be in the
301301 # range [0, vocabulary_size)
302- if self .vocabulary_size :
303- tokens = tf .clip_by_value (tokens , 0 , self .vocabulary_size - 1 )
302+ if self ._vocabulary_size :
303+ tokens = tf .clip_by_value (tokens , 0 , self ._vocabulary_size - 1 )
304304
305305 return tokens
306306
0 commit comments