Skip to content

Commit 3e116c9

Browse files
authored
test: add random image test for llama-3.2-11b-vision (NVIDIA#3055)
* add random image test for llama-3.2-11b-vision Signed-off-by: Ivy Zhang <[email protected]> * rename case Signed-off-by: Ivy Zhang <[email protected]> --------- Signed-off-by: Ivy Zhang <[email protected]> Co-authored-by: Larry <[email protected]> CI got Passed: https://nv/trt-llm-cicd/job/helpers/job/PR_Github/522/
1 parent f70b439 commit 3e116c9

File tree

3 files changed

+20
-1
lines changed

3 files changed

+20
-1
lines changed

examples/multimodal/run.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ def print_result(model, input_text, output_text, args):
5252
ref_1 = ", it would be:.\\nPeter Rabbit is a rabbit.\\nHe lives in a cozy little house.\\nHe's a very good rabbit.\\"
5353
ref_2 = "Here is a haiku for the image:\n\n"
5454

55+
elif "Answer:" in input_text:
56+
ref_1 = "2,173. <OCR/> A 1 2 3 4 5 6 Date Income 2005-12-17"
57+
ref_2 = "Answer: 2,173. <OCR/> 1 2 3 4 5 6 Date Income 2005-12-17"
58+
5559
elif "The key to life is" in input_text:
5660
ref_1 = "to find your passion and pursue it with all your heart. For me, that passion is photography. I love capturing the beauty of the world around me"
5761
ref_2 = "not to be found in the external world,"

tests/integration/defs/examples/test_multimodal.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -478,9 +478,24 @@ def _test_llm_multimodal_general(llm_venv,
478478
"If I had to write a haiku for this one"
479479
])
480480

481-
print("Run mllama vision test...")
481+
print("Run mllama vision test in with example image ...")
482482
_call_run_cmd(llm_venv, llm_root, run_cmd_vision, world_size)
483483

484+
print("multimodal_example_root: ", multimodal_example_root)
485+
print("llm_root: ", llm_root)
486+
run_cmd_vision = run_cmd.copy()
487+
run_cmd_vision.extend([
488+
"--cross_kv_cache_fraction=0.5", # mllama uses cross attention
489+
"--image_path",
490+
os.path.join(
491+
llm_root,
492+
"tests/integration/test_input_files/excel_table_test.jpg"),
493+
"--input_text",
494+
"What is the total income? Answer:"
495+
])
496+
497+
print("Run mllama vision test with random image ...")
498+
484499
run_cmd_text = run_cmd.copy()
485500
run_cmd_text.extend([
486501
"--cross_kv_cache_fraction=0.5", # mllama uses cross attention
19.7 KB
Loading

0 commit comments

Comments
 (0)