Skip to content

Commit 63efd70

Browse files
authored
revamp vision models
1 parent 3adb8b8 commit 63efd70

File tree

3 files changed

+271
-369
lines changed

3 files changed

+271
-369
lines changed

src/chart_models_vision.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,16 @@ def create_vision_models_comparison_plot():
1616
}
1717

1818
data = [
19-
{"model": "THUDM glm4v - 9b", "cps": 200.96, "memory": 9802.12},
20-
{"model": "Molmo-D-0924 - 8b", "cps": 194.65, "memory": 7693.93},
19+
{"model": "GLM-4.1V-9B-Thinking", "cps": 200.96, "memory": 9802.12},
2120
{"model": "Qwen VL - 3b", "cps": 178.31, "memory": 6306.30},
2221
{"model": "Qwen VL - 7b", "cps": 173.67, "memory": 9559.89},
2322
{"model": "Granite Vision - 2b", "cps": 217.64, "memory": 4094.18},
2423
{"model": "InternVL3 - 1b", "cps": 274.25, "memory": 2318.05},
2524
{"model": "InternVL3 - 2b", "cps": 244.36, "memory": 3153.87},
2625
{"model": "InternVL3 - 8b", "cps": 255.95, "memory": 8153.30},
2726
{"model": "InternVL3 - 14b", "cps": 162.58, "memory": 12998.80},
28-
{"model": "Ovis2 - 1b", "cps": 286.39, "memory": 4071.93},
29-
{"model": "Ovis2 - 2b", "cps": 312.08, "memory": 5846.49},
27+
{"model": "Liquid-VL - 1.6B", "cps": 437.50, "memory": 1396.00},
28+
{"model": "Liquid-VL - 450m", "cps": 497.64, "memory": 497.64},
3029
]
3130

3231
df = pd.DataFrame(data)

src/constants.py

Lines changed: 36 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -759,8 +759,10 @@
759759
'requires_cuda': False,
760760
'vram': '628 MB',
761761
'speed': '497.64 char/s',
762-
'avg_length': 964,
762+
'avg_length': 855,
763763
'loader': 'loader_liquidvl',
764+
'vision_component': 'SigLIP2 NaFlex base (86M)',
765+
'chat_component': 'LFM2-350M',
764766
'license': 'lfm1.0',
765767
},
766768
'Liquid-VL - 1.6B': {
@@ -772,31 +774,24 @@
772774
'requires_cuda': False,
773775
'vram': '1.4 GB',
774776
'speed': '437.5 char/s',
775-
'avg_length': 856,
777+
'avg_length': 722,
776778
'loader': 'loader_liquidvl',
779+
'vision_component': 'SigLIP2 NaFlex shape‑optimized (400M)',
780+
'chat_component': 'LFM2-1.2B',
777781
'license': 'lfm1.0',
778782
},
779-
'InternVL3 - 1b': {# transformers 4.48.3
783+
'InternVL3 - 1b': {
780784
'precision': 'bfloat16',
781785
'quant': '4-bit',
782786
'size': '1b',
783787
'repo_id': 'OpenGVLab/InternVL3-1B',
784788
'cache_dir': 'OpenGVLab--InternVL3-1B',
785789
'requires_cuda': False,
786790
'vram': '2.4 GB',
787-
'avg_length': 527,
791+
'avg_length': 560,
788792
'loader': 'loader_internvl',
789-
'license': 'apache-2.0',
790-
},
791-
'Ovis2 - 1b': {
792-
'precision': 'bfloat16',
793-
'quant': 'n/a',
794-
'size': '1b',
795-
'repo_id': 'AIDC-AI/Ovis2-1B',
796-
'cache_dir': 'AIDC-AI--Ovis2-1B',
797-
'requires_cuda': False,
798-
'vram': '2.4 GB',
799-
'loader': 'loader_ovis',
793+
'vision_component': 'InternViT-300M-448px-V2_5',
794+
'chat_component': 'Qwen2.5-0.5B',
800795
'license': 'apache-2.0',
801796
},
802797
'InternVL3 - 2b': {
@@ -807,30 +802,24 @@
807802
'cache_dir': 'OpenGVLab--InternVL3-2B',
808803
'requires_cuda': False,
809804
'vram': '3.2 GB',
810-
'avg_length': 560,
805+
'avg_length': 626,
811806
'loader': 'loader_internvl',
807+
'vision_component': 'InternViT-300M-448px-V2_5',
808+
'chat_component': 'Qwen2.5-1.5B',
812809
'license': 'apache-2.0',
813810
},
814-
'Granite Vision - 2b': {# transformers 4.46.0.dev0
811+
'Granite Vision - 2b': {
815812
'precision': 'bfloat16',
816813
'quant': '4-bit',
817814
'size': '2b',
818815
'repo_id': 'ibm-granite/granite-vision-3.2-2b',
819816
'cache_dir': 'ibm-granite--granite-vision-3.2-2b',
820817
'requires_cuda': False,
821-
'vram': '4.1 GB',
818+
'vram': '4.1 gb+',
819+
'avg_length': 966,
822820
'loader': 'loader_granite',
823-
'license': 'apache-2.0',
824-
},
825-
'Ovis2 - 2b': {
826-
'precision': 'bfloat16',
827-
'quant': 'n/a',
828-
'size': '2b',
829-
'repo_id': 'AIDC-AI/Ovis2-2B',
830-
'cache_dir': 'AIDC-AI--Ovis2-2B',
831-
'requires_cuda': False,
832-
'vram': '2.4 GB',
833-
'loader': 'loader_ovis',
821+
'vision_component': 'siglip-so400m-patch14-384',
822+
'chat_component': 'granite-3.1-2b-instruct',
834823
'license': 'apache-2.0',
835824
},
836825
'Qwen VL - 3b': {
@@ -841,8 +830,10 @@
841830
'cache_dir': 'Qwen--Qwen2.5-VL-3B-Instruct',
842831
'requires_cuda': True,
843832
'vram': '6.3 GB',
844-
'avg_length': 668,
833+
'avg_length': 975,
845834
'loader': 'loader_qwenvl',
835+
'vision_component': 'Custom ViT',
836+
'chat_component': 'Qwen2.5-VL-3B-Instruct',
846837
'license': 'Custom Non-Commercial',
847838
},
848839
'InternVL3 - 8b': {
@@ -853,56 +844,40 @@
853844
'cache_dir': 'OpenGVLab--InternVL3-8B',
854845
'requires_cuda': True,
855846
'vram': '8.2 GB',
856-
'avg_length': 743,
847+
'avg_length': 717,
857848
'loader': 'loader_internvl',
849+
'vision_component': 'InternViT-300M-448px-V2_5',
850+
'chat_component': 'Qwen2.5-7B',
858851
'license': 'apache-2.0',
859852
},
860-
'Qwen VL - 7b': {# transformers 4.41.2
853+
'Qwen VL - 7b': {
861854
'precision': 'bfloat16',
862855
'quant': '4-bit',
863856
'size': '7b',
864857
'repo_id': 'Qwen/Qwen2.5-VL-7B-Instruct',
865858
'cache_dir': 'Qwen--Qwen2.5-VL-7B-Instruct',
866859
'requires_cuda': True,
867860
'vram': '9.6 GB',
868-
'avg_length': 577,
861+
'avg_length': 918,
869862
'loader': 'loader_qwenvl',
863+
'vision_component': 'Custom ViT',
864+
'chat_component': 'Qwen2.5-VL-3B-Instruct',
870865
'license': 'Custom Non-Commercial',
871866
},
872-
'GLM-4.1V-9B-Thinking': {# transformers 4.53.2
867+
'GLM-4.1V-9B-Thinking': {
873868
'precision': 'bfloat16',
874869
'quant': '4-bit',
875870
'size': '9b',
876871
'repo_id': 'zai-org/GLM-4.1V-9B-Thinking',
877872
'cache_dir': 'zai-org--GLM-4.1V-9B-Thinking',
878873
'requires_cuda': True,
879874
'vram': '10 GB',
875+
'avg_length': 653,
880876
'loader': 'loader_glmv4_thinking',
881877
'vision_component': 'AIMv2-Huge-336',
882878
'chat_component': 'GLM-4-9B-0414',
883879
'license': 'mit',
884880
},
885-
'THUDM glm4v - 9b': {# transformers 4.44.0
886-
'precision': 'bfloat16',
887-
'quant': '4-bit',
888-
'size': '9b',
889-
'repo_id': 'ctranslate2-4you/glm-4v-9b',
890-
'cache_dir': 'ctranslate2-4you--glm-4v-9b',
891-
'requires_cuda': True,
892-
'vram': '10.5 GB',
893-
'loader': 'loader_glmv4'
894-
},
895-
'Molmo-D-0924 - 8b': {# transformers 4.43.3
896-
'precision': 'bfloat16',
897-
'quant': '4-bit',
898-
'size': '8b',
899-
'repo_id': 'ctranslate2-4you/molmo-7B-D-0924-bnb-4bit',
900-
'cache_dir': 'ctranslate2-4you--molmo-7B-D-0924-bnb-4bit',
901-
'requires_cuda': True,
902-
'vram': '10.5 GB',
903-
'loader': 'loader_molmo',
904-
'license': 'apache-2.0',
905-
},
906881
'InternVL3 - 14b': {
907882
'precision': 'bfloat16',
908883
'quant': '4-bit',
@@ -911,7 +886,10 @@
911886
'cache_dir': 'OpenGVLab--InternVL3-14B',
912887
'requires_cuda': True,
913888
'vram': '12.6 GB',
889+
'avg_length': 757,
914890
'loader': 'loader_internvl',
891+
'vision_component': 'InternViT-300M-448px-V2_5',
892+
'chat_component': 'Qwen2.5-14B',
915893
'license': 'apache-2.0',
916894
},
917895
}
@@ -1481,9 +1459,9 @@
14811459
"URL": "https://www.crummy.com/software/BeautifulSoup/bs4/doc/",
14821460
"folder": "beautiful_soup_4"
14831461
},
1484-
"bitsandbytes 0.46.0": {
1485-
"URL": "https://huggingface.co/docs/bitsandbytes/v0.46.0/en/",
1486-
"folder": "bitsandbytes_0460",
1462+
"bitsandbytes 0.47.0": {
1463+
"URL": "https://huggingface.co/docs/bitsandbytes/v0.47.0/en/",
1464+
"folder": "bitsandbytes_0470",
14871465
"scraper_class": "HuggingfaceScraper"
14881466
},
14891467
"Black": {

0 commit comments

Comments
 (0)