update llava model (#208)

helloyongyang · web-flow · commit ee61b0fe7fd3 · 2024-11-20T13:52:25.000+08:00
diff --git a/llmc/models/llava.py b/llmc/models/llava.py
@@ -31,10 +31,9 @@ def build_model(self):
         self.model = self.vlm_model.language_model
         self.model_config = self.vlm_model_config.text_config
 
+        self.processor = AutoProcessor.from_pretrained(self.model_path)
+
     def batch_process(self, img_qas):
-        if len(img_qas) == 1:
-            return self.single_process(img_qas[0])
-        processor = AutoProcessor.from_pretrained(self.model_path)
         messages = []
         images = []
         for idx in range(len(img_qas)):
@@ -52,38 +51,17 @@ def batch_process(self, img_qas):
             messages.append(message)
             images.append(image)
         texts = [
-            processor.apply_chat_template(msg, add_generation_prompt=True)
+            self.processor.apply_chat_template(msg, add_generation_prompt=True)
             for msg in messages
         ]
-        inputs = processor(
+        inputs = self.processor(
             text=texts,
             images=images,
             padding=True,
             return_tensors='pt'
         ).to(next(self.vlm_model.parameters()).dtype) # noqa
         return inputs
 
-    def single_process(self, img_qas):
-        processor = AutoProcessor.from_pretrained(self.model_path)
-        img_path = img_qas['img']
-        image = Image.open(img_path) if img_path is not None else None
-        message = [
-            {
-                'role': 'user',
-                'content': [{'type': 'text', 'text': img_qas['question']}]
-            }
-        ]
-        if img_path is not None:
-            message[0]['content'].insert(0, {'type': 'image'})
-        text = processor.apply_chat_template(message, add_generation_prompt=True)
-        inputs = processor(
-            text=text,
-            images=image,
-            padding=True,
-            return_tensors='pt'
-        ).to(next(self.vlm_model.parameters()).dtype) # noqa
-        return inputs
-
     def find_blocks(self, modality='language'):
         if modality == 'language':
             self.blocks = self.model.model.layers