1- # Validation functions
1+ MAKEFLAGS += --no-print-directory
2+
23define validate_model_path
34 @if [ -z "$(MODEL_PATH ) " ]; then \
45 echo "Error: MODEL_PATH must be provided either as:"; \
@@ -17,6 +18,13 @@ define validate_embedding_model_path
1718 fi
1819endef
1920
21+ define quantize_model
22+ @CONVERTED_MODEL="$(1 ) " QUANTIZED_TYPE="$(QUANTIZED_TYPE ) " \
23+ TOKEN_EMBD_TYPE="$(TOKEN_EMBD_TYPE ) " OUTPUT_TYPE="$(OUTPUT_TYPE ) " \
24+ ./scripts/utils/quantize.sh "$(1 ) " "$(QUANTIZED_TYPE ) " "$(TOKEN_EMBD_TYPE ) " "$(OUTPUT_TYPE ) "
25+ @echo "Export the quantized model path to $(2 ) variable in your environment"
26+ endef
27+
2028# ##
2129# ## Casual Model targets/recipes
2230# ##
@@ -67,9 +75,15 @@ causal-quantize-Q8_0: causal-quantize-model
6775causal-quantize-Q4_0 : QUANTIZED_TYPE = Q4_0
6876causal-quantize-Q4_0 : causal-quantize-model
6977
78+ # For Quantization Aware Trained (QAT) models in Q4_0 we explicitly set the
79+ # token embedding and output types to Q8_0 instead of the default Q6_K.
80+ causal-quantize-qat-Q4_0 : QUANTIZED_TYPE = Q4_0
81+ causal-quantize-qat-Q4_0 : TOKEN_EMBD_TYPE = Q8_0
82+ causal-quantize-qat-Q4_0 : OUTPUT_TYPE = Q8_0
83+ causal-quantize-qat-Q4_0 : causal-quantize-model
84+
7085causal-quantize-model :
71- @CONVERTED_MODEL=" $( CONVERTED_MODEL) " QUANTIZED_TYPE=" $( QUANTIZED_TYPE) " ./scripts/utils/quantize.sh ${CONVERTED_MODEL} ${QUANTIZED_TYPE}
72- @echo " Export the quantized model path to QUANTIZED_MODEL variable in your environment"
86+ $(call quantize_model,$(CONVERTED_MODEL ) ,QUANTIZED_MODEL)
7387
7488causal-run-quantized-model :
7589 @QUANTIZED_MODEL=" $( QUANTIZED_MODEL) " ./scripts/causal/run-converted-model.sh ${QUANTIZED_MODEL}
@@ -117,9 +131,15 @@ embedding-quantize-Q8_0: embedding-quantize-model
117131embedding-quantize-Q4_0 : QUANTIZED_TYPE = Q4_0
118132embedding-quantize-Q4_0 : embedding-quantize-model
119133
134+ # For Quantization Aware Trained (QAT) models in Q4_0 we explicitly set the
135+ # token embedding and output types to Q8_0 instead of the default Q6_K.
136+ embedding-quantize-qat-Q4_0 : QUANTIZED_TYPE = Q4_0
137+ embedding-quantize-qat-Q4_0 : TOKEN_EMBD_TYPE = Q8_0
138+ embedding-quantize-qat-Q4_0 : OUTPUT_TYPE = Q8_0
139+ embedding-quantize-qat-Q4_0 : embedding-quantize-model
140+
120141embedding-quantize-model :
121- @./scripts/utils/quantize.sh ${CONVERTED_EMBEDDING_MODEL} ${QUANTIZED_TYPE}
122- @echo " Export the quantized model path to QUANTIZED_EMBEDDING_MODEL variable in your environment"
142+ $(call quantize_model,$(CONVERTED_EMBEDDING_MODEL ) ,QUANTIZED_EMBEDDING_MODEL)
123143
124144embedding-run-quantized-model :
125145 @./scripts/embedding/run-converted-model.sh ${QUANTIZED_EMBEDDING_MODEL}
0 commit comments