Skip to content

Commit 5d8f485

Browse files
committed
Fix device error in evaluation (#478)
1 parent c1d6c3f commit 5d8f485

File tree

18 files changed

+86
-188
lines changed

18 files changed

+86
-188
lines changed

internvl_chat/eval/caption/evaluate_caption.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,12 @@
77
from functools import partial
88

99
import torch
10-
from internvl.model.internvl_chat import InternVLChatModel
10+
from internvl.model import load_model_and_tokenizer
1111
from internvl.train.dataset import build_transform, dynamic_preprocess
1212
from PIL import Image
1313
from pycocoevalcap.eval import COCOEvalCap
1414
from pycocotools.coco import COCO
1515
from tqdm import tqdm
16-
from transformers import AutoTokenizer
1716

1817
ds_collections = {
1918
'flickr30k': {
@@ -254,15 +253,7 @@ def evaluate_chat_model():
254253

255254
torch.cuda.set_device(int(os.getenv('LOCAL_RANK', 0)))
256255

257-
if args.auto:
258-
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
259-
kwargs = {'device_map': 'auto'} if args.auto else {}
260-
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
261-
model = InternVLChatModel.from_pretrained(
262-
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
263-
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
264-
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
265-
model = model.cuda()
256+
model, tokenizer = load_model_and_tokenizer(args)
266257
image_size = model.config.force_image_size or model.config.vision_config.image_size
267258
use_thumbnail = model.config.use_thumbnail
268259

internvl_chat/eval/cmmmu/evaluate_cmmmu.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,10 @@
44
import random
55

66
import torch
7-
from internvl.model.internvl_chat import InternVLChatModel
7+
from internvl.model import load_model_and_tokenizer
88
from internvl.train.dataset import build_transform, dynamic_preprocess
99
from PIL import Image
1010
from tqdm import tqdm
11-
from transformers import AutoTokenizer
1211

1312
ds_collections = {
1413
'art_and_design': {
@@ -166,15 +165,7 @@ def evaluate_chat_model():
166165
print('datasets:', args.datasets)
167166
assert args.batch_size == 1, 'Only batch size 1 is supported'
168167

169-
if args.auto:
170-
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
171-
kwargs = {'device_map': 'auto'} if args.auto else {}
172-
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
173-
model = InternVLChatModel.from_pretrained(
174-
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
175-
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
176-
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
177-
model = model.cuda()
168+
model, tokenizer = load_model_and_tokenizer(args)
178169
image_size = model.config.force_image_size or model.config.vision_config.image_size
179170
use_thumbnail = model.config.use_thumbnail
180171

internvl_chat/eval/llava_bench/evaluate_llava_bench.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,10 @@
44
import random
55

66
import torch
7-
from internvl.model.internvl_chat import InternVLChatModel
7+
from internvl.model import load_model_and_tokenizer
88
from internvl.train.dataset import build_transform, dynamic_preprocess
99
from PIL import Image
1010
from tqdm import tqdm
11-
from transformers import AutoTokenizer
1211

1312
ds_collections = {
1413
'llava_bench': {
@@ -104,7 +103,6 @@ def evaluate_chat_model():
104103

105104

106105
if __name__ == '__main__':
107-
108106
parser = argparse.ArgumentParser()
109107
parser.add_argument('--checkpoint', type=str, default='')
110108
parser.add_argument('--datasets', type=str, default='llava_bench')
@@ -128,15 +126,7 @@ def evaluate_chat_model():
128126
print('datasets:', args.datasets)
129127
assert args.batch_size == 1, 'Only batch size 1 is supported'
130128

131-
if args.auto:
132-
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
133-
kwargs = {'device_map': 'auto'} if args.auto else {}
134-
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
135-
model = InternVLChatModel.from_pretrained(
136-
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
137-
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
138-
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
139-
model = model.cuda()
129+
model, tokenizer = load_model_and_tokenizer(args)
140130
image_size = model.config.force_image_size or model.config.vision_config.image_size
141131
use_thumbnail = model.config.use_thumbnail
142132

internvl_chat/eval/mathvista/evaluate_mathvista.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,10 @@
88

99
import torch
1010
from datasets import concatenate_datasets, load_dataset
11-
from internvl.model.internvl_chat import InternVLChatModel
11+
from internvl.model import load_model_and_tokenizer
1212
from internvl.train.dataset import build_transform, dynamic_preprocess
1313
from torch.utils.data import Dataset
1414
from tqdm import tqdm
15-
from transformers import AutoTokenizer
1615

1716
ds_collections = {
1817
'MathVista_testmini': {
@@ -204,15 +203,7 @@ def evaluate_chat_model():
204203

205204
torch.cuda.set_device(int(os.getenv('LOCAL_RANK', 0)))
206205

207-
if args.auto:
208-
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
209-
kwargs = {'device_map': 'auto'} if args.auto else {}
210-
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
211-
model = InternVLChatModel.from_pretrained(
212-
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
213-
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
214-
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
215-
model = model.cuda()
206+
model, tokenizer = load_model_and_tokenizer(args)
216207
image_size = model.config.force_image_size or model.config.vision_config.image_size
217208
use_thumbnail = model.config.use_thumbnail
218209

internvl_chat/eval/mmbench/evaluate_mmbench.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,11 @@
1010

1111
import pandas as pd
1212
import torch
13-
from internvl.model.internvl_chat import InternVLChatModel
13+
from internvl.model import load_model_and_tokenizer
1414
from internvl.train.dataset import build_transform, dynamic_preprocess
1515
from PIL import Image
1616
from torch.utils.data import Dataset
1717
from tqdm import tqdm
18-
from transformers import AutoTokenizer
1918

2019
ds_collections = {
2120
'mmbench_dev_20230712': {
@@ -291,15 +290,7 @@ def evaluate_chat_model():
291290

292291
torch.cuda.set_device(int(os.getenv('LOCAL_RANK', 0)))
293292

294-
if args.auto:
295-
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
296-
kwargs = {'device_map': 'auto'} if args.auto else {}
297-
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
298-
model = InternVLChatModel.from_pretrained(
299-
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
300-
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
301-
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
302-
model = model.cuda()
293+
model, tokenizer = load_model_and_tokenizer(args)
303294
image_size = model.config.force_image_size or model.config.vision_config.image_size
304295
use_thumbnail = model.config.use_thumbnail
305296

internvl_chat/eval/mme/eval.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,10 @@
33
import re
44

55
import torch
6-
from internvl.model.internvl_chat import InternVLChatModel
6+
from internvl.model import load_model_and_tokenizer
77
from internvl.train.dataset import build_transform, dynamic_preprocess
88
from PIL import Image
99
from tqdm import tqdm
10-
from transformers import AutoTokenizer
1110

1211

1312
def load_image(image_file, input_size=224):
@@ -47,16 +46,7 @@ def post_processing(response):
4746
parser.add_argument('--auto', action='store_true')
4847
args = parser.parse_args()
4948

50-
if args.auto:
51-
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
52-
kwargs = {'device_map': 'auto'} if args.auto else {}
53-
prompt = 'Answer the question using a single word or phrase.'
54-
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
55-
model = InternVLChatModel.from_pretrained(
56-
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
57-
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
58-
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
59-
model = model.cuda()
49+
model, tokenizer = load_model_and_tokenizer(args)
6050
image_size = model.config.force_image_size or model.config.vision_config.image_size
6151
use_thumbnail = model.config.use_thumbnail
6252

@@ -74,6 +64,7 @@ def post_processing(response):
7464

7565
output = os.path.basename(args.checkpoint)
7666
os.makedirs(output, exist_ok=True)
67+
prompt = 'Answer the question using a single word or phrase.'
7768

7869
for filename in os.listdir(args.root):
7970
fin = open(os.path.join(args.root, filename), 'r', encoding='utf-8')

internvl_chat/eval/mmmu/evaluate_mmmu.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,11 @@
99
import torch
1010
from data_utils import CAT_SHORT2LONG, process_single_sample
1111
from datasets import concatenate_datasets, load_dataset
12-
from internvl.model.internvl_chat import InternVLChatModel
12+
from internvl.model import load_model_and_tokenizer
1313
from internvl.train.dataset import build_transform, dynamic_preprocess
1414
from PIL import Image
1515
from torch.utils.data import Dataset
1616
from tqdm import tqdm
17-
from transformers import AutoTokenizer
1817

1918
ds_collections = {
2019
'MMMU_validation': {
@@ -286,15 +285,7 @@ def evaluate_chat_model():
286285

287286
torch.cuda.set_device(int(os.getenv('LOCAL_RANK', 0)))
288287

289-
if args.auto:
290-
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
291-
kwargs = {'device_map': 'auto'} if args.auto else {}
292-
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
293-
model = InternVLChatModel.from_pretrained(
294-
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
295-
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
296-
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
297-
model = model.cuda()
288+
model, tokenizer = load_model_and_tokenizer(args)
298289
image_size = model.config.force_image_size or model.config.vision_config.image_size
299290
use_thumbnail = model.config.use_thumbnail
300291

internvl_chat/eval/mmvet/evaluate_mmvet.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,10 @@
55
import time
66

77
import torch
8-
from internvl.model.internvl_chat import InternVLChatModel
8+
from internvl.model import load_model_and_tokenizer
99
from internvl.train.dataset import build_transform, dynamic_preprocess
1010
from PIL import Image
1111
from tqdm import tqdm
12-
from transformers import AutoTokenizer
1312

1413
ds_collections = {
1514
'mmvet': {
@@ -110,7 +109,6 @@ def evaluate_chat_model():
110109

111110

112111
if __name__ == '__main__':
113-
114112
parser = argparse.ArgumentParser()
115113
parser.add_argument('--checkpoint', type=str, default='')
116114
parser.add_argument('--datasets', type=str, default='pope')
@@ -134,15 +132,7 @@ def evaluate_chat_model():
134132
print('datasets:', args.datasets)
135133
assert args.batch_size == 1, 'Only batch size 1 is supported'
136134

137-
if args.auto:
138-
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
139-
kwargs = {'device_map': 'auto'} if args.auto else {}
140-
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
141-
model = InternVLChatModel.from_pretrained(
142-
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
143-
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
144-
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
145-
model = model.cuda()
135+
model, tokenizer = load_model_and_tokenizer(args)
146136
image_size = model.config.force_image_size or model.config.vision_config.image_size
147137
use_thumbnail = model.config.use_thumbnail
148138

internvl_chat/eval/mmvp/evaluate_mmvp.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,11 @@
88
from functools import partial
99

1010
import torch
11-
from internvl.model.internvl_chat import InternVLChatModel
11+
from internvl.model import load_model_and_tokenizer
1212
from internvl.train.dataset import build_transform, dynamic_preprocess
1313
from PIL import Image
1414
from torch.utils.data import Dataset
1515
from tqdm import tqdm
16-
from transformers import AutoTokenizer
1716

1817
ds_collections = {
1918
'MMVP': {
@@ -268,15 +267,7 @@ def evaluate_chat_model():
268267

269268
torch.cuda.set_device(int(os.getenv('LOCAL_RANK', 0)))
270269

271-
if args.auto:
272-
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
273-
kwargs = {'device_map': 'auto'} if args.auto else {}
274-
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
275-
model = InternVLChatModel.from_pretrained(
276-
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
277-
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
278-
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
279-
model = model.cuda()
270+
model, tokenizer = load_model_and_tokenizer(args)
280271
image_size = model.config.force_image_size or model.config.vision_config.image_size
281272
use_thumbnail = model.config.use_thumbnail
282273

internvl_chat/eval/mvbench/evaluate_mvbench.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,11 @@
1111
import numpy as np
1212
import torch
1313
from decord import VideoReader, cpu
14-
from internvl.model.internvl_chat import InternVLChatModel
14+
from internvl.model import load_model_and_tokenizer
1515
from internvl.train.dataset import build_transform, dynamic_preprocess
1616
from PIL import Image
1717
from torch.utils.data import Dataset
1818
from tqdm import tqdm
19-
from transformers import AutoTokenizer
2019

2120
data_list = {
2221
'Action Sequence': ('action_sequence.json', './data/MVBench/video/star/Charades_v1_480/', 'video', True),
@@ -387,15 +386,7 @@ def evaluate_chat_model():
387386

388387
torch.cuda.set_device(int(os.getenv('LOCAL_RANK', 0)))
389388

390-
if args.auto:
391-
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
392-
kwargs = {'device_map': 'auto'} if args.auto else {}
393-
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
394-
model = InternVLChatModel.from_pretrained(
395-
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
396-
load_in_8bit=args.load_in_8bit, load_in_4bit=args.load_in_4bit, **kwargs).eval()
397-
if not args.load_in_8bit and not args.load_in_4bit and not args.auto:
398-
model = model.cuda()
389+
model, tokenizer = load_model_and_tokenizer(args)
399390
image_size = model.config.force_image_size or model.config.vision_config.image_size
400391
use_thumbnail = model.config.use_thumbnail
401392

0 commit comments

Comments
 (0)