33import re
44
55import torch
6- from internvl .model . internvl_chat import InternVLChatModel
6+ from internvl .model import load_model_and_tokenizer
77from internvl .train .dataset import build_transform , dynamic_preprocess
88from PIL import Image
99from tqdm import tqdm
10- from transformers import AutoTokenizer
1110
1211
1312def load_image (image_file , input_size = 224 ):
@@ -47,16 +46,7 @@ def post_processing(response):
4746 parser .add_argument ('--auto' , action = 'store_true' )
4847 args = parser .parse_args ()
4948
50- if args .auto :
51- os .environ ['CUDA_LAUNCH_BLOCKING' ] = '1'
52- kwargs = {'device_map' : 'auto' } if args .auto else {}
53- prompt = 'Answer the question using a single word or phrase.'
54- tokenizer = AutoTokenizer .from_pretrained (args .checkpoint , trust_remote_code = True , use_fast = False )
55- model = InternVLChatModel .from_pretrained (
56- args .checkpoint , low_cpu_mem_usage = True , torch_dtype = torch .bfloat16 ,
57- load_in_8bit = args .load_in_8bit , load_in_4bit = args .load_in_4bit , ** kwargs ).eval ()
58- if not args .load_in_8bit and not args .load_in_4bit and not args .auto :
59- model = model .cuda ()
49+ model , tokenizer = load_model_and_tokenizer (args )
6050 image_size = model .config .force_image_size or model .config .vision_config .image_size
6151 use_thumbnail = model .config .use_thumbnail
6252
@@ -74,6 +64,7 @@ def post_processing(response):
7464
7565 output = os .path .basename (args .checkpoint )
7666 os .makedirs (output , exist_ok = True )
67+ prompt = 'Answer the question using a single word or phrase.'
7768
7869 for filename in os .listdir (args .root ):
7970 fin = open (os .path .join (args .root , filename ), 'r' , encoding = 'utf-8' )
0 commit comments