2626# Pixtral:
2727# https://huggingface.co/mistral-community/pixtral-12b/
2828# https://huggingface.co/turboderp/pixtral-12b-exl2
29+ # Mistral-Small 3.1:
30+ # https://huggingface.co/prince-canuma/Mistral-Small-3.1-24B-Instruct-2503
2931# Qwen2-VL:
3032# https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct
3133# https://huggingface.co/turboderp/Qwen2-VL-7B-Instruct-exl2
34+ # Gemma3:
35+ # https://huggingface.co/google/gemma-3-27b-it
36+ # https://huggingface.co/turboderp/gemma-3-27b-it-exl2
3237
3338# mode = "pixtral"
34- mode = "qwen2"
39+ mode = "mistral3"
40+ # mode = "qwen2"
41+ # mode = "gemma3"
3542
3643streaming = True
3744greedy = True
3845
3946if mode == "pixtral" :
4047 model_directory = "/mnt/str/models/pixtral-12b-exl2/6.0bpw"
4148elif mode == "qwen2" :
42- model_directory = "/mnt/str/models/qwen2-vl-7b-instruct-exl2/6.0bpw"
49+ model_directory = "/mnt/str/models/qwen2.5-vl-7b-instruct-exl2/5.0bpw"
50+ elif mode == "gemma3" :
51+ model_directory = "/mnt/str/models/gemma3-12b-it-exl2/6.0bpw"
52+ elif mode == "mistral3" :
53+ model_directory = "/mnt/str/models/mistral-small-3.1-24b-instruct/exl2/4.5bpw"
4354
4455images = [
45- {"file" : "media/test_image_1.jpg" },
46- {"file" : "media/test_image_2.jpg" },
47- # {"url": "https://media.istockphoto.com/id/1212540739/photo/mom-cat-with-kitten.jpg?s=612x612&w=0&k=20&c=RwoWm5-6iY0np7FuKWn8FTSieWxIoO917FF47LfcBKE="},
56+ # {"file": "media/test_image_1.jpg"},
57+ # {"file": "media/test_image_2.jpg"},
58+ {"url" : "https://media.istockphoto.com/id/1212540739/photo/mom-cat-with-kitten.jpg?s=612x612&w=0&k=20&c=RwoWm5-6iY0np7FuKWn8FTSieWxIoO917FF47LfcBKE=" },
4859 # {"url": "https://i.dailymail.co.uk/1s/2023/07/10/21/73050285-12283411-Which_way_should_I_go_One_lady_from_the_US_shared_this_incredibl-a-4_1689019614007.jpg"},
4960 # {"url": "https://images.fineartamerica.com/images-medium-large-5/metal-household-objects-trevor-clifford-photography.jpg"}
5061]
5162
52- instruction = "Compare and contrast the two experiments."
53- # instruction = "Describe the image."
63+ # instruction = "Compare and contrast the two experiments."
64+ instruction = "Describe the image."
5465# instruction = "Find the alarm clock." # Qwen2 seems to support this but unsure of how to prompt correctly
5566
5667# Initialize model
5768
5869config = ExLlamaV2Config (model_directory )
59- config .max_seq_len = 16384 # Pixtral default is 1M
70+ config .max_seq_len = 8192 # Pixtral default is 1M
6071
6172# Load vision model and multimodal projector and initialize preprocessor
6273
6677# Load EXL2 model
6778
6879model = ExLlamaV2 (config )
69- cache = ExLlamaV2Cache (model , lazy = True , max_seq_len = 16384 )
70- model .load_autosplit (cache , progress = True )
80+ cache = ExLlamaV2Cache (model , max_seq_len = 8192 , lazy = True )
81+ model .load_autosplit (progress = True , cache = cache )
7182tokenizer = ExLlamaV2Tokenizer (config )
7283
7384# Create generator
@@ -115,13 +126,14 @@ def get_image(file = None, url = None):
115126# Image token IDs are assigned sequentially, however, so two ExLlamaV2Embedding objects created from the same
116127# source image will not be recognized as the same image for purposes of prompt caching etc.
117128
118- if mode == "pixtral" :
129+ if mode in [ "pixtral" , "mistral3" ] :
119130 prompt = (
120131 "[INST]" +
121132 placeholders +
122133 instruction +
123134 "[/INST]"
124135 )
136+ stop_conditions = [tokenizer .eos_token_id ]
125137
126138elif mode == "qwen2" :
127139 prompt = (
@@ -133,6 +145,18 @@ def get_image(file = None, url = None):
133145 "<|im_end|>\n " +
134146 "<|im_start|>assistant\n "
135147 )
148+ stop_conditions = [tokenizer .eos_token_id ]
149+
150+ elif mode == "gemma3" :
151+ prompt = (
152+ "<start_of_turn>user\n You are a helpful assistant.\n \n \n \n " +
153+ placeholders +
154+ "\n " +
155+ instruction +
156+ "<end_of_turn>\n " +
157+ "<start_of_turn>model\n "
158+ )
159+ stop_conditions = [tokenizer .single_id ("<end_of_turn>" )]
136160
137161# Generate
138162
@@ -149,7 +173,7 @@ def get_image(file = None, url = None):
149173 input_ids = input_ids ,
150174 max_new_tokens = 500 ,
151175 decode_special_tokens = True ,
152- stop_conditions = [ tokenizer . eos_token_id ] ,
176+ stop_conditions = stop_conditions ,
153177 gen_settings = ExLlamaV2Sampler .Settings .greedy () if greedy else None ,
154178 embeddings = image_embeddings ,
155179 )
0 commit comments