Add importance matrix calcuation

countzero · countzero · commit 8c38ea726a65 · 2024-02-05T12:42:25.000+01:00
diff --git a/README.md b/README.md
@@ -38,6 +38,10 @@ Create the following `.env` file in the project directory. Make sure to change t
 # convert.py script and the quantize.exe binary.
 LLAMA_CPP_DIRECTORY=C:\windows_llama.cpp\vendor\llama.cpp
 
+# Path to the training data needed for computing the importance
+# matrix to create the SOTA quants: IQ2_XXS, IQ2_XS and IQ3_XXS
+TRAINING_DATA=C:\windows_llama.cpp\vendor\wikitext-2-raw-v1\wikitext-2-raw\wiki.train.raw
+
 # Path to the Git repositories containing the models.
 SOURCE_DIRECTORY=.\source
 
@@ -50,13 +54,17 @@ TARGET_DIRECTORY=.\gguf
 # physical drive to improve the quantization speed.
 CACHE_DIRECTORY=.\cache
 
+# Automatic removal of intermediate files in the cache directory.
+CLEAN_CACHE=True
+
 #
 # Comma separated list of quantization types.
 #
 # Possible llama.cpp quantization types:
 #
 #     IQ2_XXS :  2.06 bpw quantization
 #     IQ2_XS  :  2.31 bpw quantization
+#     IQ3_XXS :  3.06 bpw quantization
 #     Q2_K    :  2.63G, +0.6717 ppl @ LLaMA-v1-7B
 #     Q2_K_S  :  2.16G, +9.0634 ppl @ LLaMA-v1-7B
 #     Q3_K_XS :  3-bit extra small quantization
diff --git a/quantize_weights_for_llama.cpp.ps1 b/quantize_weights_for_llama.cpp.ps1
@@ -15,6 +15,8 @@ $llamaCppDirectory = Resolve-Path -Path $env:LLAMA_CPP_DIRECTORY
 $sourceDirectory = Resolve-Path -Path $env:SOURCE_DIRECTORY
 $targetDirectory = Resolve-Path -Path $env:TARGET_DIRECTORY
 $cacheDirectory = Resolve-Path -Path $env:CACHE_DIRECTORY
+$trainingDataPath = Resolve-Path -Path $env:TRAINING_DATA
+$cleanCache = [System.Convert]::ToBoolean($env:CLEAN_CACHE)
 $quantizationTypes = $env:QUANTIZATION_TYPES -split ','
 
 $naturalSort = { [regex]::Replace($_, '\d+', { $args[0].Value.PadLeft(20) }) }
@@ -39,6 +41,7 @@ ForEach ($repositoryName in $repositoryDirectories) {
     Write-Host "Working on ${repositoryName}..." -ForegroundColor "DarkYellow"
 
     $unquantizedModelPath = Join-Path -Path $cacheDirectory -ChildPath "${repositoryName}.model-unquantized.gguf"
+    $importanceMatrixPath = Join-Path -Path $cacheDirectory -ChildPath "${repositoryName}.importance-matrix.dat"
 
     ForEach ($type in $quantizationTypes) {
 
@@ -53,6 +56,15 @@ ForEach ($repositoryName in $repositoryDirectories) {
             Invoke-Expression "$convertCommand --outfile `"${unquantizedModelPath}`" `"${sourceDirectoryPath}`""
         }
 
+        if (("IQ2_XXS IQ2_XS IQ3_XXS".Contains($type)) -and !(Test-Path -Path $importanceMatrixPath)) {
+
+            Write-Host "Computing importance matrix for ${unquantizedModelPath} at ${importanceMatrixPath}..." -ForegroundColor "DarkYellow"
+
+            $imatrixCommand = "${llamaCppDirectory}\build\bin\Release\imatrix.exe"
+
+            Invoke-Expression "$imatrixCommand -m `"${unquantizedModelPath}`" -f `"${trainingDataPath}`" -o `"${importanceMatrixPath}`""
+        }
+
         if (!(Test-Path -Path $quantizedModelPath)) {
 
             Write-Host "Quantizing ${unquantizedModelPath} to ${quantizedModelPath}..." -ForegroundColor "DarkYellow"
@@ -63,11 +75,17 @@ ForEach ($repositoryName in $repositoryDirectories) {
         }
     }
 
-    if ((Test-Path -Path $unquantizedModelPath)) {
+    if ($cleanCache -and (Test-Path -Path $unquantizedModelPath)) {
 
         Write-Host "Removing intermediate unquantized model ${unquantizedModelPath}..." -ForegroundColor "DarkYellow"
         Remove-Item "${unquantizedModelPath}" -Recurse -Force
     }
+
+    if ($cleanCache -and (Test-Path -Path $importanceMatrixPath)) {
+
+        Write-Host "Removing intermediate unquantized model ${importanceMatrixPath}..." -ForegroundColor "DarkYellow"
+        Remove-Item "${importanceMatrixPath}" -Recurse -Force
+    }
 }
 
 $stopwatch.Stop()