Skip to content

Commit c225130

Browse files
committed
Support int8 deploy & evaluation
1 parent 935c4f2 commit c225130

File tree

19 files changed

+246
-118
lines changed

19 files changed

+246
-118
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,6 @@ cython_debug/
165165
data_process/
166166
internvl_chat/work_dirs/
167167
internvl_chat/unittest/
168-
internvl_chat/shell/
169168
internvl_chat/data/
170169
Husky2/*
171170
data_process/

README.md

Lines changed: 151 additions & 101 deletions
Large diffs are not rendered by default.

internvl_chat/eval/caption/evaluate_caption.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ def evaluate_chat_model():
234234
parser.add_argument('--seed', type=int, default=0)
235235
parser.add_argument('--dynamic', action='store_true')
236236
parser.add_argument('--max-num', type=int, default=6)
237+
parser.add_argument('--load-in-8bit', action='store_true')
237238
args = parser.parse_args()
238239

239240
if not os.path.exists(args.out_dir):
@@ -253,7 +254,10 @@ def evaluate_chat_model():
253254

254255
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
255256
model = InternVLChatModel.from_pretrained(
256-
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).cuda().eval()
257+
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
258+
load_in_8bit=args.load_in_8bit).eval()
259+
if not args.load_in_8bit:
260+
model = model.cuda()
257261
image_size = model.config.force_image_size or model.config.vision_config.image_size
258262
use_thumbnail = model.config.use_thumbnail
259263

internvl_chat/eval/cmmmu/evaluate_cmmmu.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ def evaluate_chat_model():
154154
parser.add_argument('--seed', type=int, default=0)
155155
parser.add_argument('--dynamic', action='store_true')
156156
parser.add_argument('--max-num', type=int, default=6)
157+
parser.add_argument('--load-in-8bit', action='store_true')
157158
args = parser.parse_args()
158159

159160
if not os.path.exists(args.out_dir):
@@ -165,7 +166,10 @@ def evaluate_chat_model():
165166

166167
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
167168
model = InternVLChatModel.from_pretrained(
168-
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).cuda().eval()
169+
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
170+
load_in_8bit=args.load_in_8bit).eval()
171+
if not args.load_in_8bit:
172+
model = model.cuda()
169173
image_size = model.config.force_image_size or model.config.vision_config.image_size
170174
use_thumbnail = model.config.use_thumbnail
171175

internvl_chat/eval/llava_bench/evaluate_llava_bench.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def evaluate_chat_model():
115115
parser.add_argument('--seed', type=int, default=0)
116116
parser.add_argument('--dynamic', action='store_true')
117117
parser.add_argument('--max-num', type=int, default=6)
118+
parser.add_argument('--load-in-8bit', action='store_true')
118119
args = parser.parse_args()
119120

120121
if not os.path.exists(args.out_dir):
@@ -126,7 +127,10 @@ def evaluate_chat_model():
126127

127128
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
128129
model = InternVLChatModel.from_pretrained(
129-
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).cuda().eval()
130+
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
131+
load_in_8bit=args.load_in_8bit).eval()
132+
if not args.load_in_8bit:
133+
model = model.cuda()
130134
image_size = model.config.force_image_size or model.config.vision_config.image_size
131135
use_thumbnail = model.config.use_thumbnail
132136

internvl_chat/eval/mathvista/evaluate_mathvista.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ def evaluate_chat_model():
183183
parser.add_argument('--seed', type=int, default=0)
184184
parser.add_argument('--dynamic', action='store_true')
185185
parser.add_argument('--max-num', type=int, default=6)
186+
parser.add_argument('--load-in-8bit', action='store_true')
186187
args = parser.parse_args()
187188

188189
if not os.path.exists(args.out_dir):
@@ -202,7 +203,10 @@ def evaluate_chat_model():
202203

203204
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
204205
model = InternVLChatModel.from_pretrained(
205-
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).cuda().eval()
206+
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
207+
load_in_8bit=args.load_in_8bit).eval()
208+
if not args.load_in_8bit:
209+
model = model.cuda()
206210
image_size = model.config.force_image_size or model.config.vision_config.image_size
207211
use_thumbnail = model.config.use_thumbnail
208212

internvl_chat/eval/mmbench/evaluate_mmbench.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ def evaluate_chat_model():
270270
parser.add_argument('--seed', type=int, default=0)
271271
parser.add_argument('--dynamic', action='store_true')
272272
parser.add_argument('--max-num', type=int, default=6)
273+
parser.add_argument('--load-in-8bit', action='store_true')
273274
args = parser.parse_args()
274275

275276
if not os.path.exists(args.out_dir):
@@ -289,7 +290,10 @@ def evaluate_chat_model():
289290

290291
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
291292
model = InternVLChatModel.from_pretrained(
292-
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).cuda().eval()
293+
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
294+
load_in_8bit=args.load_in_8bit).eval()
295+
if not args.load_in_8bit:
296+
model = model.cuda()
293297
image_size = model.config.force_image_size or model.config.vision_config.image_size
294298
use_thumbnail = model.config.use_thumbnail
295299

internvl_chat/eval/mme/eval.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,16 @@ def post_processing(response):
4242
parser.add_argument('--sample', type=bool, default=False)
4343
parser.add_argument('--dynamic', action='store_true')
4444
parser.add_argument('--max-num', type=int, default=6)
45+
parser.add_argument('--load-in-8bit', action='store_true')
4546
args = parser.parse_args()
4647

4748
prompt = 'Answer the question using a single word or phrase.'
4849
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
4950
model = InternVLChatModel.from_pretrained(
50-
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).cuda().eval()
51+
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
52+
load_in_8bit=args.load_in_8bit).eval()
53+
if not args.load_in_8bit:
54+
model = model.cuda()
5155
image_size = model.config.force_image_size or model.config.vision_config.image_size
5256
use_thumbnail = model.config.use_thumbnail
5357

internvl_chat/eval/mmmu/evaluate_mmmu.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,7 @@ def evaluate_chat_model():
260260
parser.add_argument('--seed', type=int, default=0)
261261
parser.add_argument('--dynamic', action='store_true')
262262
parser.add_argument('--max-num', type=int, default=6)
263+
parser.add_argument('--load-in-8bit', action='store_true')
263264
args = parser.parse_args()
264265

265266
if not os.path.exists(args.out_dir):
@@ -279,7 +280,10 @@ def evaluate_chat_model():
279280

280281
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
281282
model = InternVLChatModel.from_pretrained(
282-
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).cuda().eval()
283+
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
284+
load_in_8bit=args.load_in_8bit).eval()
285+
if not args.load_in_8bit:
286+
model = model.cuda()
283287
image_size = model.config.force_image_size or model.config.vision_config.image_size
284288
use_thumbnail = model.config.use_thumbnail
285289

internvl_chat/eval/mmvet/evaluate_mmvet.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ def evaluate_chat_model():
121121
parser.add_argument('--seed', type=int, default=0)
122122
parser.add_argument('--dynamic', action='store_true')
123123
parser.add_argument('--max-num', type=int, default=6)
124+
parser.add_argument('--load-in-8bit', action='store_true')
124125
args = parser.parse_args()
125126

126127
if not os.path.exists(args.out_dir):
@@ -132,7 +133,10 @@ def evaluate_chat_model():
132133

133134
tokenizer = AutoTokenizer.from_pretrained(args.checkpoint, trust_remote_code=True, use_fast=False)
134135
model = InternVLChatModel.from_pretrained(
135-
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16).cuda().eval()
136+
args.checkpoint, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16,
137+
load_in_8bit=args.load_in_8bit).eval()
138+
if not args.load_in_8bit:
139+
model = model.cuda()
136140
image_size = model.config.force_image_size or model.config.vision_config.image_size
137141
use_thumbnail = model.config.use_thumbnail
138142

0 commit comments

Comments
 (0)