Skip to content

Commit 69dfee1

Browse files
itazaprgtjf
authored andcommitted
refactor can_save_slow_tokenizer (huggingface#37722)
* refactor to rm property can_save_slow_tokenizer, it can be done within the if of save_vocab * move property to fast * revert if * check if vocab_file is attr * fix check for sp * fix if condition * fix if condition * fix if condition
1 parent 10bcd76 commit 69dfee1

25 files changed

+8
-98
lines changed

src/transformers/models/albert/tokenization_albert_fast.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,6 @@ def __init__(
130130
self.keep_accents = keep_accents
131131
self.vocab_file = vocab_file
132132

133-
@property
134-
def can_save_slow_tokenizer(self) -> bool:
135-
return os.path.isfile(self.vocab_file) if self.vocab_file else False
136-
137133
def build_inputs_with_special_tokens(
138134
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
139135
) -> List[int]:

src/transformers/models/barthez/tokenization_barthez_fast.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,6 @@ def __init__(
122122

123123
self.vocab_file = vocab_file
124124

125-
@property
126-
def can_save_slow_tokenizer(self) -> bool:
127-
return os.path.isfile(self.vocab_file) if self.vocab_file else False
128-
129125
def build_inputs_with_special_tokens(
130126
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
131127
) -> List[int]:

src/transformers/models/big_bird/tokenization_big_bird_fast.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,6 @@ def __init__(
119119

120120
self.vocab_file = vocab_file
121121

122-
@property
123-
def can_save_slow_tokenizer(self) -> bool:
124-
return os.path.isfile(self.vocab_file) if self.vocab_file else False
125-
126122
def build_inputs_with_special_tokens(
127123
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
128124
) -> List[int]:

src/transformers/models/camembert/tokenization_camembert_fast.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,6 @@ def __init__(
125125

126126
self.vocab_file = vocab_file
127127

128-
@property
129-
def can_save_slow_tokenizer(self) -> bool:
130-
return os.path.isfile(self.vocab_file) if self.vocab_file else False
131-
132128
def build_inputs_with_special_tokens(
133129
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
134130
) -> List[int]:

src/transformers/models/code_llama/tokenization_code_llama_fast.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,10 +168,6 @@ def __init__(
168168
self._eot_token = eot_token
169169
self.fill_token = fill_token
170170

171-
@property
172-
def can_save_slow_tokenizer(self) -> bool:
173-
return os.path.isfile(self.vocab_file) if self.vocab_file else False
174-
175171
# Copied from transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast.update_post_processor
176172
def update_post_processor(self):
177173
"""

src/transformers/models/cpm/tokenization_cpm_fast.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -144,10 +144,6 @@ def __init__(
144144
self.jieba = jieba
145145
self.translator = str.maketrans(" \n", "\u2582\u2583")
146146

147-
@property
148-
def can_save_slow_tokenizer(self) -> bool:
149-
return os.path.isfile(self.vocab_file) if self.vocab_file else False
150-
151147
# Copied from transformers.models.xlnet.tokenization_xlnet_fast.XLNetTokenizerFast.build_inputs_with_special_tokens
152148
def build_inputs_with_special_tokens(
153149
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None

src/transformers/models/deberta_v2/tokenization_deberta_v2_fast.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,6 @@ def __init__(
119119
self.split_by_punct = split_by_punct
120120
self.vocab_file = vocab_file
121121

122-
@property
123-
def can_save_slow_tokenizer(self) -> bool:
124-
return os.path.isfile(self.vocab_file) if self.vocab_file else False
125-
126122
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
127123
"""
128124
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and

src/transformers/models/deprecated/xlm_prophetnet/tokenization_xlm_prophetnet.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -169,10 +169,6 @@ def __init__(
169169
**kwargs,
170170
)
171171

172-
@property
173-
def can_save_slow_tokenizer(self) -> bool:
174-
return os.path.isfile(self.vocab_file) if self.vocab_file else False
175-
176172
def __getstate__(self):
177173
state = self.__dict__.copy()
178174
state["sp_model"] = None

src/transformers/models/fnet/tokenization_fnet_fast.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,6 @@ def __init__(
113113
self.keep_accents = keep_accents
114114
self.vocab_file = vocab_file
115115

116-
@property
117-
def can_save_slow_tokenizer(self) -> bool:
118-
return os.path.isfile(self.vocab_file) if self.vocab_file else False
119-
120116
def build_inputs_with_special_tokens(
121117
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
122118
) -> List[int]:

src/transformers/models/gemma/tokenization_gemma_fast.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,6 @@ def __init__(
114114
self.update_post_processor()
115115
self.vocab_file = vocab_file
116116

117-
@property
118-
def can_save_slow_tokenizer(self) -> bool:
119-
return os.path.isfile(self.vocab_file) if self.vocab_file else False
120-
121117
# Copied from transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast.update_post_processor
122118
def update_post_processor(self):
123119
"""

0 commit comments

Comments
 (0)