completely add liquid-vl models

BBC-Esq · web-flow · commit 3adb8b8866ec · 2025-08-16T16:08:16.000-04:00
diff --git a/src/constants.py b/src/constants.py
@@ -750,7 +750,20 @@
 }
 
 VISION_MODELS = {
-    'Liquid-VL - 1.6B': {# transformers 4.48.3
+    'Liquid-VL - 480M': {
+        'precision': 'bfloat16',
+        'quant': 'n/a',
+        'size': '480m',
+        'repo_id': 'LiquidAI/LFM2-VL-450M',
+        'cache_dir': 'LiquidAI--LFM2-VL-450M',
+        'requires_cuda': False,
+        'vram': '628 MB',
+        'speed': '497.64 char/s',
+        'avg_length': 964,
+        'loader': 'loader_liquidvl',
+        'license': 'lfm1.0',
+    },
+    'Liquid-VL - 1.6B': {
         'precision': 'bfloat16',
         'quant': 'n/a',
         'size': '1.6b',
@@ -759,7 +772,8 @@
         'requires_cuda': False,
         'vram': '1.4 GB',
         'speed': '437.5 char/s',
-        'loader': 'loader_internvl',
+        'avg_length': 856,
+        'loader': 'loader_liquidvl',
         'license': 'lfm1.0',
     },
     'InternVL3 - 1b': {# transformers 4.48.3
@@ -770,6 +784,7 @@
         'cache_dir': 'OpenGVLab--InternVL3-1B',
         'requires_cuda': False,
         'vram': '2.4 GB',
+        'avg_length': 527,
         'loader': 'loader_internvl',
         'license': 'apache-2.0',
     },
@@ -792,6 +807,7 @@
         'cache_dir': 'OpenGVLab--InternVL3-2B',
         'requires_cuda': False,
         'vram': '3.2 GB',
+        'avg_length': 560,
         'loader': 'loader_internvl',
         'license': 'apache-2.0',
     },
@@ -825,6 +841,7 @@
         'cache_dir': 'Qwen--Qwen2.5-VL-3B-Instruct',
         'requires_cuda': True,
         'vram': '6.3 GB',
+        'avg_length': 668,
         'loader': 'loader_qwenvl',
         'license': 'Custom Non-Commercial',
     },
@@ -836,6 +853,7 @@
         'cache_dir': 'OpenGVLab--InternVL3-8B',
         'requires_cuda': True,
         'vram': '8.2 GB',
+        'avg_length': 743,
         'loader': 'loader_internvl',
         'license': 'apache-2.0',
     },
@@ -847,6 +865,7 @@
         'cache_dir': 'Qwen--Qwen2.5-VL-7B-Instruct',
         'requires_cuda': True,
         'vram': '9.6 GB',
+        'avg_length': 577,
         'loader': 'loader_qwenvl',
         'license': 'Custom Non-Commercial',
     },
diff --git a/src/module_process_images.py b/src/module_process_images.py
@@ -63,6 +63,7 @@ def run_loader_in_process(loader_func):
         my_cprint(error_message, "red")
         return []
 
+
 def choose_image_loader():
     with open('config.yaml', 'r') as file:
         config = yaml.safe_load(file)
@@ -81,7 +82,7 @@ def choose_image_loader():
         loader_func = loader_qwenvl(config).process_images
     elif chosen_model == 'GLM-4.1V-9B-Thinking':
         loader_func = loader_glmv4_thinking(config).process_images
-    elif chosen_model in ['Liquid-VL - 1.6B']:
+    elif chosen_model in ['Liquid-VL - 1.6B', 'Liquid-VL - 480M']:
         loader_func = loader_liquidvl(config).process_images
     else:
         my_cprint("No valid image model specified in config.yaml", "red")
@@ -641,14 +642,14 @@ def initialize_model_and_tokenizer(self):
         model.eval()
 
         precision_str = "bfloat16" if use_bf16 else "float16"
-        device_str = device_str_from_model(model, fallback_device=self.device)
+        device_str = "CUDA" if self.device == "cuda" else "CPU"
         my_cprint(f"{chosen_model} loaded into memory on {device_str} ({precision_str})", "green")
 
         return model, None, processor
 
     @torch.inference_mode()
     def process_single_image(self, raw_image):
-        user_message = "Describe in as much detail as possible what this image depicts?"
+        user_message = "Explain everything you see in this picture but your response should be no more than one paragraph, but the paragraph can be as long as you want."
         prompt = (
             "<|im_start|>user\n"
             f"{user_message} <|vis_start|><|image_pad|><|vis_end|>\n"