Skip to content

Commit af8ba05

Browse files
authored
feat: add trust-remote-code option (#173)
1 parent 44b0dcf commit af8ba05

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

model2vec/distill/distillation.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ def distill(
204204
apply_zipf: bool = True,
205205
use_subword: bool = True,
206206
token_remove_pattern: str | None = r"\[unused\d+\]",
207+
trust_remote_code: bool = False,
207208
) -> StaticModel:
208209
"""
209210
Distill a staticmodel from a sentence transformer.
@@ -223,11 +224,12 @@ def distill(
223224
:param apply_zipf: Whether to apply Zipf weighting to the embeddings.
224225
:param use_subword: Whether to keep subword tokens in the vocabulary. If this is False, you must pass a vocabulary, and the returned tokenizer will only detect full words.
225226
:param token_remove_pattern: If this is set to a string, we compile this into a regex. Any tokens that conform to this regex pattern will be removed from the vocabulary.
227+
:param trust_remote_code: Whether to trust the remote code. If this is False, we will only load components coming from `transformers`. If this is True, we will load all components.
226228
:return: A StaticModel
227229
228230
"""
229-
model: PreTrainedModel = AutoModel.from_pretrained(model_name)
230-
tokenizer: PreTrainedTokenizerFast = AutoTokenizer.from_pretrained(model_name)
231+
model: PreTrainedModel = AutoModel.from_pretrained(model_name, trust_remote_code=trust_remote_code)
232+
tokenizer: PreTrainedTokenizerFast = AutoTokenizer.from_pretrained(model_name, trust_remote_code=trust_remote_code)
231233

232234
return distill_from_model(
233235
model=model,

0 commit comments

Comments
 (0)