-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathrun_task3_evaluation_deepseek_vl2.py
More file actions
153 lines (129 loc) · 5.08 KB
/
run_task3_evaluation_deepseek_vl2.py
File metadata and controls
153 lines (129 loc) · 5.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import argparse
import os
import torch
from PIL import Image
from utils import *
from sklearn.metrics import accuracy_score
from collections import defaultdict
from tqdm import tqdm
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM
from deepseek_vl2.models import DeepseekVLV2Processor, DeepseekVLV2ForCausalLM
from deepseek_vl2.utils.io import load_pil_images
result_map = defaultdict(dict)
model_path_map = {
"deepseek-vl2": {
"deepseek-vl2-small": "Deepseek/deepseek-vl2-small",
"deepseek-vl2-tiny": "Deepseek/deepseek-vl2-tiny"
}
}
def load_deepseek_vl(model_name):
model_path = model_path_map["deepseek-vl2"][model_name]
processor: DeepseekVLV2Processor = DeepseekVLV2Processor.from_pretrained(model_path)
tokenizer = processor.tokenizer
model: DeepseekVLV2ForCausalLM = AutoModelForCausalLM.from_pretrained(
model_path, trust_remote_code=True
)
model = model.to(torch.bfloat16).cuda().eval()
return model, tokenizer, processor
model_example_map = {
"deepseek-vl2": load_deepseek_vl,
}
def load_args():
parser = argparse.ArgumentParser()
parser.add_argument('--model_type',
type=str,
default="deepseek-vl2",
choices=model_example_map.keys())
parser.add_argument('--model_used',
type=str,
default='all',
help='Number of prompts to run.')
parser.add_argument('--cuda',
type=str,
default='2')
parser.add_argument('--entity_task',
type=bool,
default=True)
parser.add_argument('--relation_task',
type=bool,
default=True)
return parser
def run_deepseek_vl(model_type, model_name, args):
model, tokenizer, processor = model_example_map[model_type](model_name)
dataset_dict = load_task3_dataset()
choices = ["A", "B", "C", "D", "E"]
choice_ids = [tokenizer.encode(choice)[-1] for choice in choices]
print(choice_ids)
choice_id_map = {
"A": 0,
"B": 1,
"C": 2,
"D": 3,
"E": 4
}
model = model.eval()
with torch.no_grad():
for dataset_type in dataset_dict:
dataset = dataset_dict[dataset_type]
answers = []
predicts = []
for data_instance in tqdm(dataset):
try:
image = data_instance["image_file"]
load_temp = Image.open(data_instance["image_file"]).convert("RGB")
except:
continue
question = data_instance["input"]
conversation = [
{
"role": "<|User|>",
"content": "<image>\n<|ref|>{}<|/ref|>.".format(question),
"images": [image],
},
{"role": "<|Assistant|>", "content": ""},
]
pil_images = load_pil_images(conversation)
prepare_inputs = processor(
conversations=conversation, images=pil_images, force_batchify=True
).to(model.device)
# run image encoder to get the image embeddings
inputs_embeds = model.prepare_inputs_embeds(**prepare_inputs)
logits = model.language(
inputs_embeds=inputs_embeds,
attention_mask=prepare_inputs.attention_mask
).logits[0, -1, :].cpu()
choice_logits = [logits[idx].item() for idx in choice_ids]
choice_probs = torch.softmax(torch.tensor(choice_logits), dim=-1)
predict_answer = int(torch.argmax(choice_probs).item())
answers.append(choice_id_map[data_instance["answer"]])
predicts.append(predict_answer)
accuracy = accuracy_score(y_true=answers, y_pred=predicts)
print(model_name, dataset_type, accuracy)
result_map[model_name][dataset_type] = accuracy
result_map[model_name][dataset_type] = {
"result": accuracy,
"answer": answers,
"predict": predicts
}
def model_dispatch(model_type, model_name, args):
if model_type == "deepseek-vl2":
run_deepseek_vl(model_type, model_name, args)
else:
raise NotImplementedError
def run_inference(args):
os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda
model = args.model_type
if model not in model_example_map:
raise ValueError(f"Model type {model} is not supported.")
if args.model_used == "all":
all_model_names = model_path_map[model].keys()
for model_name in all_model_names:
model_dispatch(model, model_name, args)
else:
model_dispatch(model, model_name, args)
for model in result_map.keys():
print(model, result_map[model])
if __name__ == "__main__":
parser = load_args()
args = parser.parse_args()
run_inference(args=args)