Skip to content

Commit 040456a

Browse files
committed
Add support for IQ2_XXS, IQ2_XS and Q2_K_S
1 parent ea51840 commit 040456a

File tree

2 files changed

+11
-17
lines changed

2 files changed

+11
-17
lines changed

README.md

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Think batch quantization like https://huggingface.co/TheBloke does it, but on yo
66

77
## Features
88

9-
- Easy configuration via a `.env` file
9+
- Easy configuration via one `.env` file
1010
- Automates the synchronization of Git repositories containing large files (LFS)
1111
- Only fetches one LFS object at a time
1212
- Displays a progress indicator on downloading LFS objects
@@ -18,7 +18,7 @@ Think batch quantization like https://huggingface.co/TheBloke does it, but on yo
1818

1919
### Prerequisites
2020

21-
Use https://github.com/countzero/windows_llama.cpp to compile a specific version of the [llama.cpp](https://github.com/ggerganov/llama.cpp) project on your machine.
21+
Use https://github.com/countzero/windows_llama.cpp to compile a specific version of the [llama.cpp](https://github.com/ggerganov/llama.cpp) project on your machine. This also makes training data available.
2222

2323

2424
### Clone the repository from GitHub
@@ -35,7 +35,7 @@ Create the following `.env` file in the project directory. Make sure to change t
3535

3636
```Env
3737
# Path to the llama.cpp project that contains the
38-
# convert.py script and the quantize.exe binary.
38+
# required conversion and quantization programs.
3939
LLAMA_CPP_DIRECTORY=C:\windows_llama.cpp\vendor\llama.cpp
4040
4141
# Path to the training data for computing the importance matrix.
@@ -53,9 +53,6 @@ TARGET_DIRECTORY=.\gguf
5353
# physical drive to improve the quantization speed.
5454
CACHE_DIRECTORY=.\cache
5555
56-
# Automatic removal of intermediate files in the cache directory.
57-
CLEAN_CACHE=True
58-
5956
#
6057
# Comma separated list of quantization types.
6158
#
@@ -84,9 +81,10 @@ CLEAN_CACHE=True
8481
# F32 : 26.00G @ 7B
8582
# COPY : only copy tensors, no quantizing
8683
#
87-
# Hint: The sweet spot is Q5_K_M.
84+
# Hint: The sweet spot is Q5_K_M. The smallest quantization
85+
# without the need for an importance matrix is IQ3_XXS.
8886
#
89-
QUANTIZATION_TYPES=Q5_K_M,Q3_K_XS
87+
QUANTIZATION_TYPES=Q5_K_M,IQ3_XXS
9088
```
9189

9290

quantize_weights_for_llama.cpp.ps1

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ ForEach ($repositoryName in $repositoryDirectories) {
5656
Invoke-Expression "$convertCommand --outfile `"${unquantizedModelPath}`" `"${sourceDirectoryPath}`""
5757
}
5858

59-
# We do need to compute an importance matrix for 2-bit quantized models:
59+
# We do need to compute an importance matrix for some 2-bit quantized models:
6060
# https://github.com/ggerganov/llama.cpp/tree/master/examples/imatrix
6161
$requiresImportanceMatrix = "IQ2_XXS IQ2_XS Q2_K_S".Contains($type)
6262

@@ -76,24 +76,20 @@ ForEach ($repositoryName in $repositoryDirectories) {
7676
$quantizeCommand = "${llamaCppDirectory}\build\bin\Release\quantize.exe"
7777

7878
if ($requiresImportanceMatrix) {
79-
$quantizeCommand = "${quantizeCommand} --imatrix=`"${importanceMatrixPath}`""
79+
$quantizeCommand = "${quantizeCommand} --imatrix `"${importanceMatrixPath}`""
8080
}
8181

8282
Invoke-Expression "$quantizeCommand `"${unquantizedModelPath}`" `"${quantizedModelPath}`" `"${type}`""
8383
}
8484
}
8585

86-
if ($cleanCache -and (Test-Path -Path $unquantizedModelPath)) {
86+
# Note that we are not removing *.importance-matrix.dat files because
87+
# they are relatively small but take a _very_ long time to compute.
88+
if (Test-Path -Path $unquantizedModelPath) {
8789

8890
Write-Host "Removing intermediate unquantized model ${unquantizedModelPath}..." -ForegroundColor "DarkYellow"
8991
Remove-Item "${unquantizedModelPath}" -Recurse -Force
9092
}
91-
92-
if ($cleanCache -and (Test-Path -Path $importanceMatrixPath)) {
93-
94-
Write-Host "Removing intermediate unquantized model ${importanceMatrixPath}..." -ForegroundColor "DarkYellow"
95-
Remove-Item "${importanceMatrixPath}" -Recurse -Force
96-
}
9793
}
9894

9995
$stopwatch.Stop()

0 commit comments

Comments
 (0)