1
- # Validation functions
1
+ MAKEFLAGS += --no-print-directory
2
+
2
3
define validate_model_path
3
4
@if [ -z "$(MODEL_PATH ) " ]; then \
4
5
echo "Error: MODEL_PATH must be provided either as:"; \
@@ -17,6 +18,13 @@ define validate_embedding_model_path
17
18
fi
18
19
endef
19
20
21
+ define quantize_model
22
+ @CONVERTED_MODEL="$(1 ) " QUANTIZED_TYPE="$(QUANTIZED_TYPE ) " \
23
+ TOKEN_EMBD_TYPE="$(TOKEN_EMBD_TYPE ) " OUTPUT_TYPE="$(OUTPUT_TYPE ) " \
24
+ ./scripts/utils/quantize.sh "$(1 ) " "$(QUANTIZED_TYPE ) " "$(TOKEN_EMBD_TYPE ) " "$(OUTPUT_TYPE ) "
25
+ @echo "Export the quantized model path to $(2 ) variable in your environment"
26
+ endef
27
+
20
28
# ##
21
29
# ## Casual Model targets/recipes
22
30
# ##
@@ -67,9 +75,15 @@ causal-quantize-Q8_0: causal-quantize-model
67
75
causal-quantize-Q4_0 : QUANTIZED_TYPE = Q4_0
68
76
causal-quantize-Q4_0 : causal-quantize-model
69
77
78
+ # For Quantization Aware Trained (QAT) models in Q4_0 we explicitly set the
79
+ # token embedding and output types to Q8_0 instead of the default Q6_K.
80
+ causal-quantize-qat-Q4_0 : QUANTIZED_TYPE = Q4_0
81
+ causal-quantize-qat-Q4_0 : TOKEN_EMBD_TYPE = Q8_0
82
+ causal-quantize-qat-Q4_0 : OUTPUT_TYPE = Q8_0
83
+ causal-quantize-qat-Q4_0 : causal-quantize-model
84
+
70
85
causal-quantize-model :
71
- @CONVERTED_MODEL=" $( CONVERTED_MODEL) " QUANTIZED_TYPE=" $( QUANTIZED_TYPE) " ./scripts/utils/quantize.sh ${CONVERTED_MODEL} ${QUANTIZED_TYPE}
72
- @echo " Export the quantized model path to QUANTIZED_MODEL variable in your environment"
86
+ $(call quantize_model,$(CONVERTED_MODEL ) ,QUANTIZED_MODEL)
73
87
74
88
causal-run-quantized-model :
75
89
@QUANTIZED_MODEL=" $( QUANTIZED_MODEL) " ./scripts/causal/run-converted-model.sh ${QUANTIZED_MODEL}
@@ -117,9 +131,15 @@ embedding-quantize-Q8_0: embedding-quantize-model
117
131
embedding-quantize-Q4_0 : QUANTIZED_TYPE = Q4_0
118
132
embedding-quantize-Q4_0 : embedding-quantize-model
119
133
134
+ # For Quantization Aware Trained (QAT) models in Q4_0 we explicitly set the
135
+ # token embedding and output types to Q8_0 instead of the default Q6_K.
136
+ embedding-quantize-qat-Q4_0 : QUANTIZED_TYPE = Q4_0
137
+ embedding-quantize-qat-Q4_0 : TOKEN_EMBD_TYPE = Q8_0
138
+ embedding-quantize-qat-Q4_0 : OUTPUT_TYPE = Q8_0
139
+ embedding-quantize-qat-Q4_0 : embedding-quantize-model
140
+
120
141
embedding-quantize-model :
121
- @./scripts/utils/quantize.sh ${CONVERTED_EMBEDDING_MODEL} ${QUANTIZED_TYPE}
122
- @echo " Export the quantized model path to QUANTIZED_EMBEDDING_MODEL variable in your environment"
142
+ $(call quantize_model,$(CONVERTED_EMBEDDING_MODEL ) ,QUANTIZED_EMBEDDING_MODEL)
123
143
124
144
embedding-run-quantized-model :
125
145
@./scripts/embedding/run-converted-model.sh ${QUANTIZED_EMBEDDING_MODEL}
0 commit comments