|
32 | 32 | from gguf.vocab import MistralTokenizerType, MistralVocab |
33 | 33 |
|
34 | 34 | if importlib.util.find_spec("mistral_common") is not None: |
35 | | - from mistral_common.tokens.tokenizers.base import TokenizerVersion |
36 | | - from mistral_common.tokens.tokenizers.multimodal import DATASET_MEAN as _MISTRAL_COMMON_DATASET_MEAN, DATASET_STD as _MISTRAL_COMMON_DATASET_STD |
37 | | - from mistral_common.tokens.tokenizers.tekken import Tekkenizer |
38 | | - from mistral_common.tokens.tokenizers.sentencepiece import ( |
39 | | - SentencePieceTokenizer, |
40 | | - ) |
41 | | - |
42 | 35 | _mistral_common_installed = True |
43 | 36 | _mistral_import_error_msg = "" |
44 | 37 | else: |
45 | | - _MISTRAL_COMMON_DATASET_MEAN = (0.48145466, 0.4578275, 0.40821073) |
46 | | - _MISTRAL_COMMON_DATASET_STD = (0.26862954, 0.26130258, 0.27577711) |
47 | | - |
48 | 38 | _mistral_common_installed = False |
49 | | - TokenizerVersion = None |
50 | | - Tekkenizer = None |
51 | | - SentencePieceTokenizer = None |
52 | 39 | _mistral_import_error_msg = ( |
53 | 40 | "Mistral format requires `mistral-common` to be installed. Please run " |
54 | 41 | "`pip install mistral-common[image,audio]` to install it." |
@@ -1384,8 +1371,14 @@ def set_gguf_parameters(self): |
1384 | 1371 | self.gguf_writer.add_vision_head_count(self.find_vparam(["num_attention_heads"])) |
1385 | 1372 |
|
1386 | 1373 | # preprocessor config |
1387 | | - image_mean = _MISTRAL_COMMON_DATASET_MEAN if self.is_mistral_format else self.preprocessor_config["image_mean"] |
1388 | | - image_std = _MISTRAL_COMMON_DATASET_STD if self.is_mistral_format else self.preprocessor_config["image_std"] |
| 1374 | + if self.is_mistral_format: |
| 1375 | + from mistral_common.tokens.tokenizers.multimodal import DATASET_MEAN, DATASET_STD |
| 1376 | + |
| 1377 | + image_mean = DATASET_MEAN |
| 1378 | + image_std = DATASET_STD |
| 1379 | + else: |
| 1380 | + image_mean = self.preprocessor_config["image_mean"] |
| 1381 | + image_std = self.preprocessor_config["image_std"] |
1389 | 1382 |
|
1390 | 1383 | self.gguf_writer.add_vision_image_mean(image_mean) |
1391 | 1384 | self.gguf_writer.add_vision_image_std(image_std) |
@@ -9236,6 +9229,11 @@ class MistralModel(LlamaModel): |
9236 | 9229 |
|
9237 | 9230 | @staticmethod |
9238 | 9231 | def get_community_chat_template(vocab: MistralVocab, templates_dir: Path, is_mistral_format: bool): |
| 9232 | + from mistral_common.tokens.tokenizers.base import TokenizerVersion |
| 9233 | + from mistral_common.tokens.tokenizers.tekken import Tekkenizer |
| 9234 | + from mistral_common.tokens.tokenizers.sentencepiece import ( |
| 9235 | + SentencePieceTokenizer, |
| 9236 | + ) |
9239 | 9237 | assert TokenizerVersion is not None and Tekkenizer is not None and SentencePieceTokenizer is not None, _mistral_import_error_msg |
9240 | 9238 | assert isinstance(vocab.tokenizer, (Tekkenizer, SentencePieceTokenizer)), ( |
9241 | 9239 | f"Expected Tekkenizer or SentencePieceTokenizer, got {type(vocab.tokenizer)}" |
|
0 commit comments