diff --git a/llmc/compression/quantization/awq.py b/llmc/compression/quantization/awq.py index 7fc82821a..d30b7e281 100644 --- a/llmc/compression/quantization/awq.py +++ b/llmc/compression/quantization/awq.py @@ -156,7 +156,7 @@ def search_scale_subset(self, layers_dict, input, inspect_module, subset_kwargs) # Identify the rank with the minimum loss global_best_rank = torch.tensor([dist.get_rank() - if best_error == global_best_error + if abs(best_error - global_best_error) < 1e-5 else -1], device='cuda') dist.all_reduce(global_best_rank, op=dist.ReduceOp.MAX) diff --git a/llmc/models/mllama.py b/llmc/models/mllama.py index a3060c1ae..fbd31b77d 100644 --- a/llmc/models/mllama.py +++ b/llmc/models/mllama.py @@ -7,7 +7,7 @@ except Exception: logger.warning( 'Can not import MllamaForConditionalGeneration. ' - 'Please upgrade transformers.' + 'If you need it, please upgrade transformers.' ) from llmc.utils.registry_factory import MODEL_REGISTRY diff --git a/llmc/models/qwen2vl.py b/llmc/models/qwen2vl.py index 15c1ab24c..ca71f92c2 100644 --- a/llmc/models/qwen2vl.py +++ b/llmc/models/qwen2vl.py @@ -6,7 +6,15 @@ except Exception: logger.warning( 'Can not import Qwen2VLForConditionalGeneration. ' - 'Please upgrade transformers.' + 'If you need it, please upgrade transformers.' + ) + +try: + from qwen_vl_utils import process_vision_info +except Exception: + logger.warning( + 'Can not import qwen_vl_utils. ' + 'If you need it, please pip install qwen-vl-utils' ) from llmc.utils.registry_factory import MODEL_REGISTRY @@ -40,24 +48,33 @@ def build_model(self): self.model = self.vlm_model self.model_config = self.vlm_model_config + self.min_pixels = 256 * 28 * 28 + self.max_pixels = 1280 * 28 * 28 + logger.warning(f'min_pixels is set to: {self.min_pixels}') + logger.warning(f'max_pixels is set to: {self.max_pixels}') + logger.warning('You can refer the link https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct ' + 'to get more info of image Resolution for performance boost.') + self.processor = AutoProcessor.from_pretrained( + self.model_path, + min_pixels=self.min_pixels, + max_pixels=self.max_pixels + ) + def batch_process(self, img_qas): - from qwen_vl_utils import process_vision_info - processor = AutoProcessor.from_pretrained(self.model_path) messages = [] for idx in range(len(img_qas)): img_path = img_qas[idx]['img'] if img_path is not None: + content = [] + if not isinstance(img_path, list): + img_path = [img_path] + for img_idx in range(len(img_path)): + content.append({'type': 'image', 'image': img_path[img_idx]}) + content.append({'type': 'text', 'text': img_qas[idx]['question']}) message = [ { 'role': 'user', - 'content': [ - { - 'type': 'image', 'image': img_path, - 'resized_height': 280, 'resized_width': 420 - # default: original resolution - }, - {'type': 'text', 'text': img_qas[idx]['question']} - ] + 'content': content } ] else: @@ -71,11 +88,11 @@ def batch_process(self, img_qas): ] messages.append(message) texts = [ - processor.apply_chat_template(msg, tokenize=False, add_generation_prompt=True) + self.processor.apply_chat_template(msg, tokenize=False, add_generation_prompt=True) for msg in messages ] image_inputs, video_inputs = process_vision_info(messages) - inputs = processor( + inputs = self.processor( text=texts, images=image_inputs, videos=video_inputs, diff --git a/requirements/runtime.txt b/requirements/runtime.txt index 447488315..4d53ebb4c 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -27,3 +27,4 @@ word2number more_itertools qtorch einops +qwen-vl-utils