File tree Expand file tree Collapse file tree 2 files changed +12
-4
lines changed Expand file tree Collapse file tree 2 files changed +12
-4
lines changed Original file line number Diff line number Diff line change @@ -64,9 +64,9 @@ CLEAN_CACHE=True
6464#
6565# IQ2_XXS : 2.06 bpw quantization
6666# IQ2_XS : 2.31 bpw quantization
67- # IQ3_XXS : 3.06 bpw quantization
6867# Q2_K : 2.63G, +0.6717 ppl @ LLaMA-v1-7B
6968# Q2_K_S : 2.16G, +9.0634 ppl @ LLaMA-v1-7B
69+ # IQ3_XXS : 3.06 bpw quantization
7070# Q3_K_XS : 3-bit extra small quantization
7171# Q3_K_S : 2.75G, +0.5551 ppl @ LLaMA-v1-7B
7272# Q3_K_M : 3.07G, +0.2496 ppl @ LLaMA-v1-7B
Original file line number Diff line number Diff line change @@ -56,13 +56,17 @@ ForEach ($repositoryName in $repositoryDirectories) {
5656 Invoke-Expression " $convertCommand --outfile `" ${unquantizedModelPath} `" `" ${sourceDirectoryPath} `" "
5757 }
5858
59- if ((" IQ2_XXS IQ2_XS IQ3_XXS" .Contains($type )) -and ! (Test-Path - Path $importanceMatrixPath )) {
59+ # We do need to compute an importance matrix for 2-bit quantized models:
60+ # https://github.com/ggerganov/llama.cpp/tree/master/examples/imatrix
61+ $requiresImportanceMatrix = " IQ2_XXS IQ2_XS Q2_K_S" .Contains($type )
62+
63+ if ($requiresImportanceMatrix -and ! (Test-Path - Path $importanceMatrixPath )) {
6064
6165 Write-Host " Computing importance matrix for ${unquantizedModelPath} at ${importanceMatrixPath} ..." - ForegroundColor " DarkYellow"
6266
63- $imatrixCommand = " ${llamaCppDirectory} \build\bin\Release\imatrix.exe"
67+ $matrixCommand = " ${llamaCppDirectory} \build\bin\Release\imatrix.exe"
6468
65- Invoke-Expression " $imatrixCommand -m `" ${unquantizedModelPath} `" -f `" ${trainingDataPath} `" -o `" ${importanceMatrixPath} `" "
69+ Invoke-Expression " $matrixCommand -m `" ${unquantizedModelPath} `" -f `" ${trainingDataPath} `" -o `" ${importanceMatrixPath} `" -ngl 99 "
6670 }
6771
6872 if (! (Test-Path - Path $quantizedModelPath )) {
@@ -71,6 +75,10 @@ ForEach ($repositoryName in $repositoryDirectories) {
7175
7276 $quantizeCommand = " ${llamaCppDirectory} \build\bin\Release\quantize.exe"
7377
78+ if ($requiresImportanceMatrix ) {
79+ $quantizeCommand = " ${quantizeCommand} --imatrix=`" ${importanceMatrixPath} `" "
80+ }
81+
7482 Invoke-Expression " $quantizeCommand `" ${unquantizedModelPath} `" `" ${quantizedModelPath} `" `" ${type} `" "
7583 }
7684 }
You can’t perform that action at this time.
0 commit comments