File tree Expand file tree Collapse file tree 1 file changed +22
-0
lines changed
content/learning-paths/servers-and-cloud-computing/llama-vision Expand file tree Collapse file tree 1 file changed +22
-0
lines changed Original file line number Diff line number Diff line change @@ -24,6 +24,28 @@ app = Flask(__name__)
2424# Load model and processor
2525model_id = " meta-llama/Llama-3.2-11B-Vision-Instruct"
2626model = MllamaForConditionalGeneration.from_pretrained(model_id, torch_dtype = torch.float32)
27+
28+ # Apply torchao quantization
29+ from torchao.dtypes import PlainLayout
30+ from torchao.experimental.packed_linear_int8_dynamic_activation_intx_weight_layout import (
31+ PackedLinearInt8DynamicActivationIntxWeightLayout,
32+ )
33+ from torchao.experimental.quant_api import int8_dynamic_activation_intx_weight
34+ from torchao.quantization.granularity import PerGroup
35+ from torchao.quantization.quant_api import quantize_
36+ from torchao.quantization.quant_primitives import MappingType
37+
38+ quantize_(
39+ model,
40+ int8_dynamic_activation_intx_weight(
41+ weight_dtype = torch.int4,
42+ granularity = PerGroup(32 ),
43+ has_weight_zeros = True ,
44+ weight_mapping_type = MappingType.SYMMETRIC_NO_CLIPPING_ERR ,
45+ layout = PackedLinearInt8DynamicActivationIntxWeightLayout(target = " aten" ),
46+ ),
47+ )
48+
2749processor = AutoProcessor.from_pretrained(model_id)
2850model.eval()
2951
You can’t perform that action at this time.
0 commit comments