Skip to content

Commit 659b684

Browse files
committed
Add imatrix computation on CPU as a fallback
1 parent 0ee8e17 commit 659b684

File tree

1 file changed

+17
-4
lines changed

1 file changed

+17
-4
lines changed

quantize_weights_for_llama.cpp.ps1

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ ForEach ($repositoryName in $repositoryDirectories) {
4040

4141
$unquantizedModelPath = Join-Path -Path $cacheDirectory -ChildPath "${repositoryName}.gguf"
4242

43-
# Note that we are not removing *.importance-matrix.dat files because
43+
# Note that we are not removing *.importance-matrix.gguf files because
4444
# they are relatively small but take a _very_ long time to compute.
45-
$importanceMatrixPath = Join-Path -Path $importanceMatrixDirectory -ChildPath "${repositoryName}.importance-matrix.dat"
45+
$importanceMatrixPath = Join-Path -Path $importanceMatrixDirectory -ChildPath "${repositoryName}.importance-matrix.gguf"
4646

4747
# If a repository already contains an unquantized GGUF file we are using it directly.
4848
$unquantizedModelPathFromSource = Join-Path -Path $sourceDirectory -ChildPath $repositoryName | Join-Path -ChildPath "${repositoryName}.gguf"
@@ -66,10 +66,10 @@ ForEach ($repositoryName in $repositoryDirectories) {
6666
}
6767

6868
# We are computing an importance matrix to enhance the quality of the models.
69-
# https://github.com/ggerganov/llama.cpp/tree/master/examples/imatrix
69+
# https://github.com/ggml-org/llama.cpp/tree/master/tools/imatrix
7070
if (!(Test-Path -Path $importanceMatrixPath)) {
7171

72-
Write-Host "Computing importance matrix for ${unquantizedModelPath} at ${importanceMatrixPath}..." -ForegroundColor "DarkYellow"
72+
Write-Host "Computing importance matrix for ${unquantizedModelPath} at ${importanceMatrixPath} on GPU..." -ForegroundColor "DarkYellow"
7373

7474
Invoke-Expression "${llamaCppDirectory}\build\bin\Release\llama-imatrix.exe ``
7575
--model '${unquantizedModelPath}' ``
@@ -79,6 +79,19 @@ ForEach ($repositoryName in $repositoryDirectories) {
7979
--gpu-layers 999"
8080
}
8181

82+
# We are falling back to CPU only importance matrix generation.
83+
if (!(Test-Path -Path $importanceMatrixPath)) {
84+
85+
Write-Host "Computing importance matrix for ${unquantizedModelPath} at ${importanceMatrixPath} on CPU..." -ForegroundColor "DarkYellow"
86+
87+
Invoke-Expression "${llamaCppDirectory}\build\bin\Release\llama-imatrix.exe ``
88+
--model '${unquantizedModelPath}' ``
89+
--file '${trainingDataPath}' ``
90+
--chunks ${trainingDataChunks} ``
91+
--output '${importanceMatrixPath}' ``
92+
--gpu-layers 0"
93+
}
94+
8295
if (!(Test-Path -Path $quantizedModelPath)) {
8396

8497
Write-Host "Quantizing ${unquantizedModelPath} to ${quantizedModelPath}..." -ForegroundColor "DarkYellow"

0 commit comments

Comments
 (0)