Skip to content

Commit 4bbbde7

Browse files
yeonsilyChris-Sigopt
authored andcommitted
Ovis 2 5 (#1993)
## Essential Elements of an Effective PR Description Checklist - [ ] The purpose of the PR, such as "Fix some issue (link existing issues this PR will resolve)". - [ ] The test plan, such as providing test command. - [ ] The test results, such as pasting the results comparison before and after, or e2e results ## Purpose ## Test Plan ## Test Result <!--- pyml disable-next-line no-emphasis-as-heading --> --------- Co-authored-by: Christopher Manteuffel <[email protected]>
1 parent 6a1d7ad commit 4bbbde7

File tree

15 files changed

+1841
-50
lines changed

15 files changed

+1841
-50
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ovis2_5-9b.yaml
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
model_name: "/mnt/weka/data/llm/aidc-ai/ovis2.5-9b"
2+
tasks:
3+
- name: "gsm8k"
4+
metrics:
5+
- name: "exact_match,strict-match"
6+
value: 0.700
7+
- name: "exact_match,flexible-extract"
8+
value: 0.700
9+
limit: 256
10+
num_fewshot: 8
11+
dtype: "bfloat16"
12+
trust_remote_code: True

.jenkins/test_config.yaml

Lines changed: 59 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -5,54 +5,59 @@ stages:
55
- name: v0_gsm8k_small_g3_tp1_part1
66
flavor: g3
77
command: >-
8-
export PT_HPU_LAZY_MODE=1 &&
8+
export PT_HPU_LAZY_MODE=1 &&
99
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1
1010
- name: v0_gsm8k_small_g3_tp1_part2
1111
flavor: g3
1212
command: >-
13-
export PT_HPU_LAZY_MODE=1 &&
13+
export PT_HPU_LAZY_MODE=1 &&
1414
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small-2.txt -t 1
1515
- name: v0_gsm8k_small_g3_tp1_part3
1616
flavor: g3
1717
command: >-
18-
export PT_HPU_LAZY_MODE=1 &&
18+
export PT_HPU_LAZY_MODE=1 &&
1919
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small-3.txt -t 1
2020
- name: v0_gsm8k_small_g3_tp2
2121
flavor: g3.s
2222
command: >-
23-
export PT_HPU_LAZY_MODE=1 &&
23+
export PT_HPU_LAZY_MODE=1 &&
2424
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 2
2525
- name: v0_gsm8k_small_g2_tp1
2626
flavor: g2
2727
command: >-
28-
export PT_HPU_LAZY_MODE=1 &&
28+
export PT_HPU_LAZY_MODE=1 &&
2929
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1
3030
- name: v0_gsm8k_small_g2_tp2
3131
flavor: g2.s
3232
command: >-
33-
export PT_HPU_LAZY_MODE=1 &&
33+
export PT_HPU_LAZY_MODE=1 &&
3434
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 2
3535
- name: v0_gsm8k_g2_deepseek-v2-lite_tp1
3636
flavor: g3
3737
command: >-
38-
export PT_HPU_LAZY_MODE=1 &&
38+
export PT_HPU_LAZY_MODE=1 &&
3939
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-deepseek.txt -t 1
4040
- name: v0_gsm8k_g3_gemma3_tp1
4141
flavor: g3.s
4242
command: >-
4343
export PT_HPU_LAZY_MODE=1 &&
4444
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-gemma.txt -t 1
45+
- name: v0_gsm8k_g3_ovis2_5_tp1
46+
flavor: g3.s
47+
command: >-
48+
export PT_HPU_LAZY_MODE=1 && export VLLM_SKIP_WARMUP=true &&
49+
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-ovis.txt -t 1
4550
- name: test_gsm8k_small_models_apc
4651
steps:
4752
- name: gsm8k_small_g3_tp1_apc
4853
flavor: g3
4954
command: >-
50-
export VLLM_CONTIGUOUS_PA=false &&
55+
export VLLM_CONTIGUOUS_PA=false &&
5156
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1 -a
5257
- name: gsm8k_small_g2_tp1_apc
5358
flavor: g2
5459
command: >-
55-
export VLLM_CONTIGUOUS_PA=false &&
60+
export VLLM_CONTIGUOUS_PA=false &&
5661
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-small.txt -t 1 -a
5762
- name: test_gsm8k_small_models_merged_prefill
5863
steps:
@@ -66,139 +71,139 @@ stages:
6671
- name: v0_gsm8k_large_g3_tp2_part1
6772
flavor: g3.s
6873
command: >-
69-
export PT_HPU_LAZY_MODE=1 &&
74+
export PT_HPU_LAZY_MODE=1 &&
7075
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-large.txt -t 2
7176
- name: v0_gsm8k_large_g3_tp2_part2
7277
flavor: g3.s
7378
command: >-
74-
export PT_HPU_LAZY_MODE=1 &&
79+
export PT_HPU_LAZY_MODE=1 &&
7580
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-large-2.txt -t 2
7681
- name: v0_gsm8k_large_g2_tp4
7782
flavor: g2.m
7883
command: >-
79-
export PT_HPU_LAZY_MODE=1 &&
84+
export PT_HPU_LAZY_MODE=1 &&
8085
cd .jenkins/lm-eval-harness && bash run-tests.sh -c configs/models-large.txt -t 4
8186
- name: test_gsm8k_fp8
8287
steps:
8388
- name: gsm8k_small_g3_tp1_fp8
8489
flavor: g3
8590
command: >-
86-
cd .jenkins/lm-eval-harness &&
87-
PT_HPU_LAZY_MODE=1
91+
cd .jenkins/lm-eval-harness &&
92+
PT_HPU_LAZY_MODE=1
8893
bash run-tests.sh -c configs/models-fp8-g3-tp1.txt -t 1
8994
# - name: gsm8k_small_g3_tp2_fp8
9095
# flavor: g3.s
9196
# command: >-
92-
# cd .jenkins/lm-eval-harness &&
93-
# PT_HPU_LAZY_MODE=1
97+
# cd .jenkins/lm-eval-harness &&
98+
# PT_HPU_LAZY_MODE=1
9499
# bash run-tests.sh -c configs/models-fp8.txt -t 2
95100
- name: test_gsm8k_fp8_bypass_inc
96101
steps:
97102
- name: gsm8k_fp8_llama4_scout_g3_tp2_compressed_tensor
98103
flavor: g3.s
99104
command: >-
100-
cd .jenkins/lm-eval-harness &&
101-
PT_HPU_LAZY_MODE=1
105+
cd .jenkins/lm-eval-harness &&
106+
PT_HPU_LAZY_MODE=1
102107
bash run-tests.sh -c configs/models-fp8-compressedtensor.txt -t 2
103108
- name: gsm8k_fp8_qwen3_30B_g3_tp1_block_scale_dynamic
104109
flavor: g3
105110
command: >-
106-
cd .jenkins/lm-eval-harness &&
107-
PT_HPU_LAZY_MODE=1
111+
cd .jenkins/lm-eval-harness &&
112+
PT_HPU_LAZY_MODE=1
108113
bash run-tests.sh -c configs/models-fp8-blockfp8.txt -t 1
109114
- name: gsm8k_fp8_qwen3_30B_g3_tp1_block_scale_dequant
110115
flavor: g3
111116
command: >-
112-
cd .jenkins/lm-eval-harness &&
113-
PT_HPU_LAZY_MODE=1 VLLM_HPU_FORCE_CHANNEL_FP8=0
117+
cd .jenkins/lm-eval-harness &&
118+
PT_HPU_LAZY_MODE=1 VLLM_HPU_FORCE_CHANNEL_FP8=0
114119
bash run-tests.sh -c configs/models-fp8-blockfp8.txt -t 1
115120
- name: test_gsm8k_mss
116121
steps:
117122
- name: gsm8k_small_g3_tp1_mss
118123
flavor: g3
119124
command: >-
120-
cd .jenkins/lm-eval-harness &&
121-
PT_HPU_LAZY_MODE=1
125+
cd .jenkins/lm-eval-harness &&
126+
PT_HPU_LAZY_MODE=1
122127
bash run-tests.sh -c configs/models-mss.txt -t 1
123128
- name: gsm8k_small_g2_tp1_mss
124129
flavor: g2
125130
command: >-
126-
cd .jenkins/lm-eval-harness &&
127-
PT_HPU_LAZY_MODE=1
131+
cd .jenkins/lm-eval-harness &&
132+
PT_HPU_LAZY_MODE=1
128133
bash run-tests.sh -c configs/models-mss.txt -t 1
129134
- name: gsm8k_small_g3_tp2_mss
130135
flavor: g3.s
131136
command: >-
132-
cd .jenkins/lm-eval-harness &&
133-
PT_HPU_LAZY_MODE=1
137+
cd .jenkins/lm-eval-harness &&
138+
PT_HPU_LAZY_MODE=1
134139
bash run-tests.sh -c configs/models-mss.txt -t 2
135140
- name: gsm8k_small_g2_tp2_mss
136141
flavor: g2.s
137142
command: >-
138-
cd .jenkins/lm-eval-harness &&
139-
PT_HPU_LAZY_MODE=1
143+
cd .jenkins/lm-eval-harness &&
144+
PT_HPU_LAZY_MODE=1
140145
bash run-tests.sh -c configs/models-mss.txt -t 2
141146
- name: gsm8k_small_g2_tp1_spec_decode
142147
flavor: g2
143148
command: >-
144-
cd .jenkins/lm-eval-harness &&
145-
PT_HPU_LAZY_MODE=1
149+
cd .jenkins/lm-eval-harness &&
150+
PT_HPU_LAZY_MODE=1
146151
bash run-tests.sh -c configs/models-mss.txt -t 1
147152
- name: test_gsm8k_spec_decode
148153
steps:
149154
# - name: gsm8k_small_g2_tp1_mlp_spec_decode
150155
# flavor: g2
151156
# command: >-
152-
# PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
157+
# PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
153158
# pytest -v tests/spec_decode/e2e/test_mlp_correctness.py::test_mlp_e2e_greedy_correctness
154159
- name: gsm8k_small_g2_tp1_medusa_spec_decode
155160
flavor: g2
156161
command: >-
157-
PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
162+
PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
158163
pytest -v tests/spec_decode/e2e/test_medusa_correctness.py::test_medusa_e2e_greedy_correctness
159164
- name: gsm8k_small_g2_tp1_eagle_spec_decode
160165
flavor: g2
161166
command: >-
162-
PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
167+
PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
163168
pytest -v tests/spec_decode/e2e/test_eagle_correctness.py::test_eagle_e2e_greedy_correctness
164169
#TODO(kwisniewski98) temporary disable test, until model specific for Gaudi2 is uploaded to test infrastructure
165170
# - name: test_deepseek_mtp
166171
# steps:
167172
# - name: test_deepseek_mtp_correctness
168173
# flavor: g3
169174
# command: >-
170-
# PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
175+
# PT_HPU_LAZY_MODE=1 VLLM_CONTIGUOUS_PA=false VLLM_SKIP_WARMUP=True
171176
# pytest -v tests/spec_decode/e2e/test_mtp_correctness.py::test_mtp_e2e_greedy_correctness
172177
- name: tests_lora
173178
steps:
174179
- name: test_llama_lora
175180
flavor: g2
176181
command: >-
177-
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
182+
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
178183
pytest -v tests/lora/test_llama_hpu.py::test_llama_lora_1x
179184
- name: test_multilora
180185
flavor: g2
181186
command: >-
182-
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
187+
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
183188
pytest -v tests/lora/test_multilora_hpu.py::test_llama_multilora_1x
184189
# - name: test_long_context
185190
# flavor: g2
186191
# command: >-
187-
# PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
192+
# PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
188193
# pytest -v tests/lora/test_long_context_hpu.py::test_quality
189194
- name: tests_multimodal
190195
steps:
191196
- name: multimodal_small_g3_tp1
192197
flavor: g3
193198
command: >-
194-
cd .jenkins/vision &&
195-
PT_HPU_LAZY_MODE=1
199+
cd .jenkins/vision &&
200+
PT_HPU_LAZY_MODE=1
196201
bash run-tests.sh -c configs/models-small.txt -t 1
197202
- name: multimodal_small_g3_tp2
198203
flavor: g3.s
199204
command: >-
200205
cd .jenkins/vision &&
201-
PT_HPU_LAZY_MODE=1
206+
PT_HPU_LAZY_MODE=1
202207
bash run-tests.sh -c configs/models-small.txt -t 2
203208
- name: multimodal_qwen_tp1
204209
flavor: g3.s
@@ -210,13 +215,13 @@ stages:
210215
flavor: g3
211216
command: >-
212217
cd .jenkins/vision &&
213-
PT_HPU_LAZY_MODE=1
218+
PT_HPU_LAZY_MODE=1
214219
bash run-tests.sh -c configs/models-mss.txt -t 1
215220
- name: multimodal_small_g3_tp2_mss
216221
flavor: g3.s
217222
command: >-
218223
cd .jenkins/vision &&
219-
PT_HPU_LAZY_MODE=1
224+
PT_HPU_LAZY_MODE=1
220225
bash run-tests.sh -c configs/models-mss.txt -t 2
221226
- name: multimodal_llama4_scout_g3_tp2_ep
222227
flavor: g3.s
@@ -230,26 +235,32 @@ stages:
230235
cd .jenkins/vision &&
231236
PT_HPU_LAZY_MODE=1
232237
bash run-tests.sh -c configs/models-gemma.txt -t 1
238+
- name: multimodal_ovis2_5_g3_tp1_ep
239+
flavor: g3.s
240+
command: >-
241+
cd .jenkins/vision &&
242+
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
243+
bash run-tests.sh -c configs/models-ovis.txt -t 1
233244
- name: tests_int4_quantization
234245
steps:
235246
- name: test_awq
236247
flavor: g2
237248
command: >-
238-
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
249+
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
239250
pytest -v tests/quantization/test_awq.py::test_awq
240251
- name: test_gptq
241252
flavor: g2
242253
command: >-
243-
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
254+
PT_HPU_LAZY_MODE=1 VLLM_SKIP_WARMUP=true
244255
pytest -v tests/quantization/test_gptq.py::test_gptq
245256
- name: tests_guided_decode
246257
steps:
247258
- name: test_lazy_outlines
248259
flavor: g2
249260
command: >-
250261
pip install -e tests/vllm_test_utils &&
251-
export VLLM_SKIP_WARMUP=true && PT_HPU_LAZY_MODE=1
252-
pytest -v tests/entrypoints/llm/test_lazy_outlines.py -s -vvv --log-cli-level=INFO
262+
export VLLM_SKIP_WARMUP=true && PT_HPU_LAZY_MODE=1
263+
pytest -v tests/entrypoints/llm/test_lazy_outlines.py -s -vvv --log-cli-level=INFO
253264
# - name: test_guided_generate
254265
# flavor: g2
255266
# command: >-
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ovis2_5-9b.yaml
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
model_name: "/mnt/weka/data/llm/aidc-ai/ovis2.5-9b"
2+
dtype: "bfloat16"
3+
max_model_len: 32768
4+
max_num_seqs: 32
5+
num_prompts: 4
6+
limit_mm_per_prompt_image: 5
7+
trust_remote_code: True

.jenkins/vision/test_enc_dec_model.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def fail_on_exit():
2424
def launch_enc_dec_model(config, question, images):
2525
model_name = config.get('model_name')
2626
dtype = config.get('dtype', 'bfloat16')
27+
trust_remote_code = config.get('trust_remote_code', False)
2728
max_num_seqs = config.get('max_num_seqs', 128)
2829
max_model_len = config.get('max_model_len', 4096)
2930
enforce_eager = config.get('enforce_eager', False)
@@ -41,6 +42,7 @@ def launch_enc_dec_model(config, question, images):
4142
enable_expert_parallel=enable_expert_parallel,
4243
enforce_eager=enforce_eager,
4344
limit_mm_per_prompt={"image": limit_mm_per_prompt_image},
45+
trust_remote_code=trust_remote_code,
4446
)
4547

4648
tokenizer = AutoTokenizer.from_pretrained(model_name)

examples/offline_inference/vision_language_multi_image.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,36 @@ def load_ovis(question: str, image_urls: list[str]) -> ModelRequestData:
460460
)
461461

462462

463+
# ovis2_5
464+
def load_ovis2_5(question: str, image_urls: list[str]) -> ModelRequestData:
465+
model_name = "AIDC-AI/Ovis2.5-2B"
466+
467+
engine_args = EngineArgs(
468+
model=model_name,
469+
max_model_len=8192,
470+
max_num_seqs=2,
471+
trust_remote_code=True,
472+
dtype="half",
473+
limit_mm_per_prompt={"image": len(image_urls)},
474+
)
475+
476+
placeholders = "\n".join(
477+
f"Image-{i}: <image>\n" for i, _ in enumerate(image_urls, start=1)
478+
)
479+
messages = [{"role": "user", "content": f"{placeholders}\n{question}"}]
480+
481+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
482+
prompt = tokenizer.apply_chat_template(
483+
messages, tokenize=False, add_generation_prompt=True
484+
)
485+
486+
return ModelRequestData(
487+
engine_args=engine_args,
488+
prompt=prompt,
489+
image_data=[fetch_image(url) for url in image_urls],
490+
)
491+
492+
463493
def load_pixtral_hf(question: str, image_urls: list[str]) -> ModelRequestData:
464494
model_name = "mistral-community/pixtral-12b"
465495

@@ -742,6 +772,7 @@ def load_tarsier(question: str, image_urls: list[str]) -> ModelRequestData:
742772
"mllama": load_mllama,
743773
"NVLM_D": load_nvlm_d,
744774
"ovis": load_ovis,
775+
"ovis2_5": load_ovis2_5,
745776
"phi3_v": load_phi3v,
746777
"phi4_mm": load_phi4mm,
747778
"pixtral_hf": load_pixtral_hf,

0 commit comments

Comments
 (0)