Skip to content

Commit 8c38ea7

Browse files
committed
Add importance matrix calcuation
1 parent b5b834e commit 8c38ea7

File tree

2 files changed

+27
-1
lines changed

2 files changed

+27
-1
lines changed

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ Create the following `.env` file in the project directory. Make sure to change t
3838
# convert.py script and the quantize.exe binary.
3939
LLAMA_CPP_DIRECTORY=C:\windows_llama.cpp\vendor\llama.cpp
4040
41+
# Path to the training data needed for computing the importance
42+
# matrix to create the SOTA quants: IQ2_XXS, IQ2_XS and IQ3_XXS
43+
TRAINING_DATA=C:\windows_llama.cpp\vendor\wikitext-2-raw-v1\wikitext-2-raw\wiki.train.raw
44+
4145
# Path to the Git repositories containing the models.
4246
SOURCE_DIRECTORY=.\source
4347
@@ -50,13 +54,17 @@ TARGET_DIRECTORY=.\gguf
5054
# physical drive to improve the quantization speed.
5155
CACHE_DIRECTORY=.\cache
5256
57+
# Automatic removal of intermediate files in the cache directory.
58+
CLEAN_CACHE=True
59+
5360
#
5461
# Comma separated list of quantization types.
5562
#
5663
# Possible llama.cpp quantization types:
5764
#
5865
# IQ2_XXS : 2.06 bpw quantization
5966
# IQ2_XS : 2.31 bpw quantization
67+
# IQ3_XXS : 3.06 bpw quantization
6068
# Q2_K : 2.63G, +0.6717 ppl @ LLaMA-v1-7B
6169
# Q2_K_S : 2.16G, +9.0634 ppl @ LLaMA-v1-7B
6270
# Q3_K_XS : 3-bit extra small quantization

quantize_weights_for_llama.cpp.ps1

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ $llamaCppDirectory = Resolve-Path -Path $env:LLAMA_CPP_DIRECTORY
1515
$sourceDirectory = Resolve-Path -Path $env:SOURCE_DIRECTORY
1616
$targetDirectory = Resolve-Path -Path $env:TARGET_DIRECTORY
1717
$cacheDirectory = Resolve-Path -Path $env:CACHE_DIRECTORY
18+
$trainingDataPath = Resolve-Path -Path $env:TRAINING_DATA
19+
$cleanCache = [System.Convert]::ToBoolean($env:CLEAN_CACHE)
1820
$quantizationTypes = $env:QUANTIZATION_TYPES -split ','
1921

2022
$naturalSort = { [regex]::Replace($_, '\d+', { $args[0].Value.PadLeft(20) }) }
@@ -39,6 +41,7 @@ ForEach ($repositoryName in $repositoryDirectories) {
3941
Write-Host "Working on ${repositoryName}..." -ForegroundColor "DarkYellow"
4042

4143
$unquantizedModelPath = Join-Path -Path $cacheDirectory -ChildPath "${repositoryName}.model-unquantized.gguf"
44+
$importanceMatrixPath = Join-Path -Path $cacheDirectory -ChildPath "${repositoryName}.importance-matrix.dat"
4245

4346
ForEach ($type in $quantizationTypes) {
4447

@@ -53,6 +56,15 @@ ForEach ($repositoryName in $repositoryDirectories) {
5356
Invoke-Expression "$convertCommand --outfile `"${unquantizedModelPath}`" `"${sourceDirectoryPath}`""
5457
}
5558

59+
if (("IQ2_XXS IQ2_XS IQ3_XXS".Contains($type)) -and !(Test-Path -Path $importanceMatrixPath)) {
60+
61+
Write-Host "Computing importance matrix for ${unquantizedModelPath} at ${importanceMatrixPath}..." -ForegroundColor "DarkYellow"
62+
63+
$imatrixCommand = "${llamaCppDirectory}\build\bin\Release\imatrix.exe"
64+
65+
Invoke-Expression "$imatrixCommand -m `"${unquantizedModelPath}`" -f `"${trainingDataPath}`" -o `"${importanceMatrixPath}`""
66+
}
67+
5668
if (!(Test-Path -Path $quantizedModelPath)) {
5769

5870
Write-Host "Quantizing ${unquantizedModelPath} to ${quantizedModelPath}..." -ForegroundColor "DarkYellow"
@@ -63,11 +75,17 @@ ForEach ($repositoryName in $repositoryDirectories) {
6375
}
6476
}
6577

66-
if ((Test-Path -Path $unquantizedModelPath)) {
78+
if ($cleanCache -and (Test-Path -Path $unquantizedModelPath)) {
6779

6880
Write-Host "Removing intermediate unquantized model ${unquantizedModelPath}..." -ForegroundColor "DarkYellow"
6981
Remove-Item "${unquantizedModelPath}" -Recurse -Force
7082
}
83+
84+
if ($cleanCache -and (Test-Path -Path $importanceMatrixPath)) {
85+
86+
Write-Host "Removing intermediate unquantized model ${importanceMatrixPath}..." -ForegroundColor "DarkYellow"
87+
Remove-Item "${importanceMatrixPath}" -Recurse -Force
88+
}
7189
}
7290

7391
$stopwatch.Stop()

0 commit comments

Comments
 (0)