Skip to content

Commit 72a7c1d

Browse files
update vlm (#197)
Co-authored-by: chengtao-lv <[email protected]>
1 parent b71d3e2 commit 72a7c1d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+179
-97
lines changed

llmc/__main__.py

Lines changed: 40 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -69,43 +69,50 @@ def main(config):
6969
for ppl_eval in eval_list:
7070
ppl = ppl_eval.eval(model)
7171
logger.info(f'{ppl_eval.dataset} ppl : {ppl}')
72-
73-
if not config.get('calib', False):
74-
blockwise_opt = ALGO_REGISTRY[config.quant.method](
75-
model,
76-
quant_config=config.quant,
77-
input=None,
78-
padding_mask=None,
79-
config=config
80-
)
81-
blockwise_opt.run_block_loop()
82-
dist.barrier()
83-
else:
84-
dataset = BaseDataset(tokenizer.get_tokenizer(), config.calib, model.batch_process)
85-
calib_data, padding_mask = dataset.get_calib_dataset()
86-
padding_side = getattr(tokenizer.get_tokenizer(), 'padding_side', None)
87-
model.collect_first_block_input(calib_data, padding_mask, padding_side, config.calib.type)
88-
del calib_data
89-
gc.collect()
90-
torch.cuda.empty_cache()
91-
if not config.get('sparse', False):
72+
for modality in config.quant.get('quant_objects', ['language']):
73+
if not config.get('calib', False):
9274
blockwise_opt = ALGO_REGISTRY[config.quant.method](
9375
model,
94-
config.quant,
95-
model.get_first_block_input(),
96-
model.get_padding_mask(),
97-
config
76+
quant_config=config.quant,
77+
input=None,
78+
padding_mask=None,
79+
config=config,
80+
modality=modality,
9881
)
82+
blockwise_opt.run_block_loop()
83+
dist.barrier()
9984
else:
100-
blockwise_opt = ALGO_REGISTRY[config.sparse.method](
101-
model,
102-
config.sparse,
103-
model.get_first_block_input(),
104-
model.get_padding_mask(),
105-
config
106-
)
107-
blockwise_opt.run_block_loop()
108-
dist.barrier()
85+
dataset = BaseDataset(tokenizer.get_tokenizer(), config.calib, model.batch_process)
86+
calib_data, padding_mask = dataset.get_calib_dataset()
87+
padding_side = getattr(tokenizer.get_tokenizer(), 'padding_side', None)
88+
model.collect_first_block_input(calib_data,
89+
padding_mask,
90+
padding_side,
91+
config.calib.type,
92+
modality)
93+
del calib_data
94+
gc.collect()
95+
torch.cuda.empty_cache()
96+
if not config.get('sparse', False):
97+
blockwise_opt = ALGO_REGISTRY[config.quant.method](
98+
model,
99+
config.quant,
100+
model.get_first_block_input(),
101+
model.get_padding_mask(),
102+
config,
103+
modality
104+
)
105+
else:
106+
blockwise_opt = ALGO_REGISTRY[config.sparse.method](
107+
model,
108+
config.sparse,
109+
model.get_first_block_input(),
110+
model.get_padding_mask(),
111+
config,
112+
modality
113+
)
114+
blockwise_opt.run_block_loop()
115+
dist.barrier()
109116

110117
if int(os.environ['RANK']) == 0:
111118
if 'eval' in config and 'transformed' in config.eval.eval_pos:

llmc/compression/blockwise_optimization.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66

77

88
class BlockwiseOpt(metaclass=ABCMeta):
9-
def __init__(self, model, quant_config, input, padding_mask, config):
9+
def __init__(self, model, quant_config, input, padding_mask, config, modality='language'):
1010
self.model = model
11+
self.modality = modality
12+
self.model.find_blocks(modality)
1113
self.blocks = model.get_blocks()
1214
self.quant_config = quant_config
1315
self.sparsity_config = quant_config

llmc/compression/quantization/adadim.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99

1010
@ALGO_REGISTRY
1111
class AdaDim(BaseBlockwiseQuantization):
12-
def __init__(self, model, quant_config, input, config):
13-
super().__init__(model, quant_config, input, config)
12+
def __init__(self, model, quant_config, input, config, modality='language'):
13+
super().__init__(model, quant_config, input, config, modality)
1414

1515
def get_layer_out(self, x, layer):
1616
with torch.no_grad():

llmc/compression/quantization/awq.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717

1818
@ALGO_REGISTRY
1919
class Awq(BaseBlockwiseQuantization):
20-
def __init__(self, model, quant_config, input, padding_mask, config):
21-
super().__init__(model, quant_config, input, padding_mask, config)
20+
def __init__(self, model, quant_config, input, padding_mask, config, modality='language'):
21+
super().__init__(model, quant_config, input, padding_mask, config, modality)
2222
special_config = self.quant_config.get('special', {})
2323
self.trans = special_config.get('trans', True)
2424
self.trans_version = special_config.get('trans_version', 'v2')

llmc/compression/quantization/base_blockwise_quantization.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@
2727

2828

2929
class BaseBlockwiseQuantization(BlockwiseOpt):
30-
def __init__(self, model, quant_config, input, padding_mask, config):
31-
super().__init__(model, quant_config, input, padding_mask, config)
30+
def __init__(self, model, quant_config, input, padding_mask, config, modality='language'):
31+
super().__init__(model, quant_config, input, padding_mask, config, modality)
3232
self.set_quant_config()
3333

3434
def w_qdq(self, module, wquantizer):
@@ -439,7 +439,8 @@ def run(self, block, input_feat, handles):
439439

440440
def block_transform(self, block, input_feat, block_kwargs):
441441
logger.info(f'Start transform the {self.block_idx}-th block')
442-
subsets = self.model.get_subsets_in_block(block)
442+
subsets = self.model.get_subsets_in_block(block) \
443+
if self.modality == 'language' else self.model.get_vision_subsets_in_block(block)
443444

444445
if self.act_static:
445446
self.register_non_linear_qparams(block, input_feat)

llmc/compression/quantization/dgq.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313

1414
@ALGO_REGISTRY
1515
class DGQ(BaseBlockwiseQuantization):
16-
def __init__(self, model, quant_config, input, padding_mask, config):
17-
super().__init__(model, quant_config, input, padding_mask, config)
16+
def __init__(self, model, quant_config, input, padding_mask, config, modality='language'):
17+
super().__init__(model, quant_config, input, padding_mask, config, modality)
1818
self.model_dtype = next(self.model.model.parameters()).dtype
1919

2020
def w_qdq(self, module, wquantizer):

llmc/compression/quantization/gptq.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717

1818
@ALGO_REGISTRY
1919
class GPTQ(BaseBlockwiseQuantization):
20-
def __init__(self, model, quant_config, input, padding_mask, config):
21-
super().__init__(model, quant_config, input, padding_mask, config)
20+
def __init__(self, model, quant_config, input, padding_mask, config, modality='language'):
21+
super().__init__(model, quant_config, input, padding_mask, config, modality)
2222
self.dev = torch.device('cuda')
2323
self.model_dtype = next(self.model.model.parameters()).dtype
2424
self.add_quant_config()

llmc/compression/quantization/hqq.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111

1212
@ALGO_REGISTRY
1313
class HQQ(BaseBlockwiseQuantization):
14-
def __init__(self, model, quant_config, input, padding_mask, config):
15-
super().__init__(model, quant_config, input, padding_mask, config)
14+
def __init__(self, model, quant_config, input, padding_mask, config, modality='language'):
15+
super().__init__(model, quant_config, input, padding_mask, config, modality)
1616
self.add_quant_config()
1717

1818
@torch.no_grad()

llmc/compression/quantization/llmint8.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99

1010
@ALGO_REGISTRY
1111
class LlmInt8(BaseBlockwiseQuantization):
12-
def __init__(self, model, quant_config, input, padding_mask, config):
13-
super().__init__(model, quant_config, input, padding_mask, config)
12+
def __init__(self, model, quant_config, input, padding_mask, config, modality='language'):
13+
super().__init__(model, quant_config, input, padding_mask, config, modality)
1414
self.add_quant_config()
1515

1616
@torch.no_grad()

llmc/compression/quantization/ntweak.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919

2020
@ALGO_REGISTRY
2121
class NormTweaking(BaseBlockwiseQuantization):
22-
def __init__(self, model, quant_config, input, padding_mask, config):
23-
super().__init__(model, quant_config, input, padding_mask, config)
22+
def __init__(self, model, quant_config, input, padding_mask, config, modality='language'):
23+
super().__init__(model, quant_config, input, padding_mask, config, modality)
2424
self.add_quant_config()
2525

2626
model_type = self.config['model']['type']

0 commit comments

Comments
 (0)