Skip to content

Commit b0ed71f

Browse files
echarlaixmvafin
authored andcommitted
Add notebook 4bit quant config and description paragraph (#1390)
1 parent 36e5731 commit b0ed71f

File tree

1 file changed

+36
-34
lines changed

1 file changed

+36
-34
lines changed

notebooks/openvino/visual_language_quantization.ipynb

Lines changed: 36 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,7 @@
8282
"execution_count": 2,
8383
"id": "ee1ff192-1b1e-4cec-ab83-119faf494c0c",
8484
"metadata": {},
85-
"outputs": [
86-
{
87-
"name": "stdout",
88-
"output_type": "stream",
89-
"text": [
90-
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg\n"
91-
]
92-
}
93-
],
85+
"outputs": [],
9486
"source": [
9587
"import transformers\n",
9688
"from transformers import AutoProcessor\n",
@@ -113,9 +105,7 @@
113105
"\n",
114106
"# Prepare inputs\n",
115107
"prompt = processor.apply_chat_template(messages, add_generation_prompt=True)\n",
116-
"inputs = processor(text=prompt, images=[load_image(img_url)], return_tensors=\"pt\")\n",
117-
"\n",
118-
"print(img_url)"
108+
"inputs = processor(text=prompt, images=[load_image(img_url)], return_tensors=\"pt\")"
119109
]
120110
},
121111
{
@@ -176,7 +166,7 @@
176166
"source": [
177167
"## Step 4: Configure and Apply Quantization\n",
178168
"\n",
179-
"Now we'll configure the quantization settings and apply them to create an INT8 version of our model. We'll use weight-only quantization for size reduction with minimal accuracy loss. You can explore other quantization options [here](https://huggingface.co/docs/optimum/en/intel/openvino/optimization).\n"
169+
"Now we'll configure the quantization settings and apply them to create a quantized version of our model. You can explore other quantization options [here](https://huggingface.co/docs/optimum/en/intel/openvino/optimization) and by playing with the different quantization configurations defined below.\n"
180170
]
181171
},
182172
{
@@ -188,32 +178,32 @@
188178
}
189179
},
190180
"source": [
191-
"### Step 4a: Configure Quantization Settings\n"
181+
"### Step 4a: Configure Quantization Settings\n",
182+
"\n",
183+
"To apply quantization on your model you need to create a quantization configuration specifying the methodology to use. By default 8bit weight-only quantization will be applied on the text and vision embeddings components, while the language model will be quantized depending on the specified quantization configuration `quantization_config`. A specific quantization configuration can be defined for each components as well, this can be done by creating an instance of `OVPipelineQuantizationConfig`."
192184
]
193185
},
194186
{
195187
"cell_type": "code",
196188
"execution_count": 4,
197189
"id": "7ccb1914-d64e-4daf-b274-b8979e427a83",
198190
"metadata": {},
199-
"outputs": [
200-
{
201-
"name": "stderr",
202-
"output_type": "stream",
203-
"text": [
204-
"The provided dataset won't have any effect on the resulting compressed model because no data-aware quantization algorithm is selected and compression ratio is 1.0.\n"
205-
]
206-
}
207-
],
191+
"outputs": [],
208192
"source": [
209193
"from optimum.intel import OVQuantizationConfig, OVWeightQuantizationConfig, OVPipelineQuantizationConfig\n",
210194
"\n",
211195
"dataset, num_samples = \"contextual\", 50\n",
212196
"\n",
213-
"# weight only data free\n",
214-
"woq_data_free = OVWeightQuantizationConfig(bits=8)\n",
197+
"# weight-only 8bit\n",
198+
"woq_8bit = OVWeightQuantizationConfig(bits=8)\n",
199+
"\n",
200+
"# weight-only 4bit\n",
201+
"woq_4bit = OVWeightQuantizationConfig(bits=4, group_size=16)\n",
215202
"\n",
216203
"# static quantization\n",
204+
"static_8bit = OVQuantizationConfig(bits=8, dataset=dataset, num_samples=num_samples)\n",
205+
"\n",
206+
"# pipeline quantization: applying different quantization on each components\n",
217207
"ppl_q = OVPipelineQuantizationConfig(\n",
218208
" quantization_configs={\n",
219209
" \"lm_model\": OVQuantizationConfig(bits=8),\n",
@@ -234,7 +224,9 @@
234224
}
235225
},
236226
"source": [
237-
"### Step 4b: Apply Quantization\n"
227+
"### Step 4b: Apply Quantization\n",
228+
"\n",
229+
"You can now apply quantization on your model, here we apply wieght-only quantization on our model defined in `woq_8bit`."
238230
]
239231
},
240232
{
@@ -266,7 +258,7 @@
266258
{
267259
"data": {
268260
"application/vnd.jupyter.widget-view+json": {
269-
"model_id": "b0180c8b43204411bc4f84abf2c480c4",
261+
"model_id": "20ceaa0181cf4478a1b6cd99364f75b4",
270262
"version_major": 2,
271263
"version_minor": 0
272264
},
@@ -315,7 +307,7 @@
315307
{
316308
"data": {
317309
"application/vnd.jupyter.widget-view+json": {
318-
"model_id": "af0186e8345a4881bf5d0665ee9a5070",
310+
"model_id": "c3b03024f93b4ad585993b5329928110",
319311
"version_major": 2,
320312
"version_minor": 0
321313
},
@@ -364,7 +356,7 @@
364356
{
365357
"data": {
366358
"application/vnd.jupyter.widget-view+json": {
367-
"model_id": "4c8bf0061d904a78b13631c76caf0dda",
359+
"model_id": "93dcd9c2db0844eb88ba0178b4a897b2",
368360
"version_major": 2,
369361
"version_minor": 0
370362
},
@@ -400,7 +392,7 @@
400392
}
401393
],
402394
"source": [
403-
"q_model = OVModelForVisualCausalLM.from_pretrained(model_id, quantization_config=woq_data_free)\n",
395+
"q_model = OVModelForVisualCausalLM.from_pretrained(model_id, quantization_config=woq_8bit)\n",
404396
"int8_model_path = \"smolvlm_int8\"\n",
405397
"q_model.save_pretrained(int8_model_path)"
406398
]
@@ -493,10 +485,20 @@
493485
},
494486
{
495487
"cell_type": "code",
496-
"execution_count": null,
488+
"execution_count": 8,
497489
"id": "3c862277",
498490
"metadata": {},
499-
"outputs": [],
491+
"outputs": [
492+
{
493+
"name": "stdout",
494+
"output_type": "stream",
495+
"text": [
496+
"FP32 model size: 1028.25 MB\n",
497+
"INT8 model size: 260.94 MB\n",
498+
"INT8 size decrease: 3.94x\n"
499+
]
500+
}
501+
],
500502
"source": [
501503
"fp32_model_size = get_model_size(fp32_model_path)\n",
502504
"int8_model_size = get_model_size(int8_model_path)\n",
@@ -528,7 +530,7 @@
528530
],
529531
"metadata": {
530532
"kernelspec": {
531-
"display_name": "openvino_env",
533+
"display_name": "Python 3 (ipykernel)",
532534
"language": "python",
533535
"name": "python3"
534536
},
@@ -542,7 +544,7 @@
542544
"name": "python",
543545
"nbconvert_exporter": "python",
544546
"pygments_lexer": "ipython3",
545-
"version": "3.12.7"
547+
"version": "3.9.18"
546548
}
547549
},
548550
"nbformat": 4,

0 commit comments

Comments
 (0)