Skip to content

Commit 222a195

Browse files
authored
Update gguf-py constants (#298)
* Update GGMLQuantizationType * Update LlamaFileType * Update GGML_QUANT_SIZES
1 parent 9dac3ed commit 222a195

File tree

1 file changed

+227
-117
lines changed

1 file changed

+227
-117
lines changed

gguf-py/gguf/constants.py

Lines changed: 227 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -1171,47 +1171,86 @@ class PoolingType(IntEnum):
11711171

11721172

11731173
class GGMLQuantizationType(IntEnum):
1174-
F32 = 0
1175-
F16 = 1
1176-
Q4_0 = 2
1177-
Q4_1 = 3
1178-
Q5_0 = 6
1179-
Q5_1 = 7
1180-
Q8_0 = 8
1181-
Q8_1 = 9
1182-
Q2_K = 10
1183-
Q3_K = 11
1184-
Q4_K = 12
1185-
Q5_K = 13
1186-
Q6_K = 14
1187-
Q8_K = 15
1188-
IQ2_XXS = 16
1189-
IQ2_XS = 17
1190-
IQ3_XXS = 18
1191-
IQ1_S = 19
1192-
IQ4_NL = 20
1193-
IQ3_S = 21
1194-
IQ2_S = 22
1195-
IQ4_XS = 23
1196-
I8 = 24
1197-
I16 = 25
1198-
I32 = 26
1199-
I64 = 27
1200-
F64 = 28
1201-
IQ1_M = 29
1202-
BF16 = 30
1203-
Q4_0_4_4 = 31
1204-
Q4_0_4_8 = 32
1205-
Q4_0_8_8 = 33
1206-
IQ1_BN = 34,
1207-
IQ2_BN = 35,
1208-
Q8_K64 = 36,
1209-
IQ2_K = 37,
1210-
IQ3_K = 38,
1211-
IQ4_K = 39,
1212-
IQ5_K = 40,
1213-
IQ6_K = 41,
1214-
IQ2_TN = 42,
1174+
F32 = 0
1175+
F16 = 1
1176+
Q4_0 = 2
1177+
Q4_1 = 3
1178+
Q5_0 = 6
1179+
Q5_1 = 7
1180+
Q8_0 = 8
1181+
Q8_1 = 9
1182+
Q2_K = 10
1183+
Q3_K = 11
1184+
Q4_K = 12
1185+
Q5_K = 13
1186+
Q6_K = 14
1187+
Q8_K = 15
1188+
IQ2_XXS = 16
1189+
IQ2_XS = 17
1190+
IQ3_XXS = 18
1191+
IQ1_S = 19
1192+
IQ4_NL = 20
1193+
IQ3_S = 21
1194+
IQ2_S = 22
1195+
IQ4_XS = 23
1196+
I8 = 24
1197+
I16 = 25
1198+
I32 = 26
1199+
I64 = 27
1200+
F64 = 28
1201+
IQ1_M = 29
1202+
BF16 = 30
1203+
Q4_0_4_4 = 31
1204+
Q4_0_4_8 = 32
1205+
Q4_0_8_8 = 33
1206+
I2_S = 36
1207+
Q8_0_X4 = 97
1208+
Q8_1_X4 = 98
1209+
Q8_2_X4 = 99
1210+
Q6_0 = 133
1211+
IQ1_BN = 134
1212+
IQ2_BN = 135
1213+
Q8_K64 = 136
1214+
IQ2_K = 137
1215+
IQ3_K = 138
1216+
IQ4_K = 139
1217+
IQ5_K = 140
1218+
IQ6_K = 141
1219+
IQ4_KS = 144
1220+
IQ2_KS = 145
1221+
IQ4_KSS = 146
1222+
Q8_K16 = 147
1223+
Q8_K32 = 148
1224+
Q8_KR8 = 149
1225+
Q8_K128 = 150
1226+
Q8_KV = 151
1227+
Q4_0_R8 = 202
1228+
Q5_0_R4 = 206
1229+
Q8_0_R8 = 208
1230+
Q2_K_R4 = 210
1231+
Q3_K_R4 = 211
1232+
Q4_K_R4 = 212
1233+
Q5_K_R4 = 213
1234+
Q6_K_R4 = 214
1235+
IQ2_XXS_R4= 216
1236+
IQ2_XS_R4 = 217
1237+
IQ3_XXS_R4= 218
1238+
IQ1_S_R4 = 219
1239+
IQ4_NL_R4 = 220
1240+
IQ3_S_R4 = 221
1241+
IQ2_S_R4 = 222
1242+
IQ4_XS_R8 = 223
1243+
IQ1_M_R4 = 229
1244+
BF16_R16 = 230
1245+
Q6_0_R4 = 233
1246+
IQ2_BN_R4 = 335
1247+
IQ2_K_R4 = 337
1248+
IQ3_K_R4 = 338
1249+
IQ4_K_R4 = 339
1250+
IQ5_K_R4 = 340
1251+
IQ4_KS_R4 = 344
1252+
Q8_KV_R8 = 398
1253+
Q8_K_R8 = 399
12151254

12161255

12171256
class ExpertGatingFuncType(IntEnum):
@@ -1225,50 +1264,71 @@ class ExpertGatingFuncType(IntEnum):
12251264
# from llama_ftype in llama.h
12261265
# ALL VALUES SHOULD BE THE SAME HERE AS THEY ARE OVER THERE.
12271266
class LlamaFileType(IntEnum):
1228-
ALL_F32 = 0
1229-
MOSTLY_F16 = 1 # except 1d tensors
1230-
MOSTLY_Q4_0 = 2 # except 1d tensors
1231-
MOSTLY_Q4_1 = 3 # except 1d tensors
1232-
# MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
1233-
# MOSTLY_Q4_2 = 5 # support has been removed
1234-
# MOSTLY_Q4_3 = 6 # support has been removed
1235-
MOSTLY_Q8_0 = 7 # except 1d tensors
1236-
MOSTLY_Q5_0 = 8 # except 1d tensors
1237-
MOSTLY_Q5_1 = 9 # except 1d tensors
1238-
MOSTLY_Q2_K = 10 # except 1d tensors
1239-
MOSTLY_Q3_K_S = 11 # except 1d tensors
1240-
MOSTLY_Q3_K_M = 12 # except 1d tensors
1241-
MOSTLY_Q3_K_L = 13 # except 1d tensors
1242-
MOSTLY_Q4_K_S = 14 # except 1d tensors
1243-
MOSTLY_Q4_K_M = 15 # except 1d tensors
1244-
MOSTLY_Q5_K_S = 16 # except 1d tensors
1245-
MOSTLY_Q5_K_M = 17 # except 1d tensors
1246-
MOSTLY_Q6_K = 18 # except 1d tensors
1247-
MOSTLY_IQ2_XXS = 19 # except 1d tensors
1248-
MOSTLY_IQ2_XS = 20 # except 1d tensors
1249-
MOSTLY_Q2_K_S = 21 # except 1d tensors
1250-
MOSTLY_IQ3_XS = 22 # except 1d tensors
1251-
MOSTLY_IQ3_XXS = 23 # except 1d tensors
1252-
MOSTLY_IQ1_S = 24 # except 1d tensors
1253-
MOSTLY_IQ4_NL = 25 # except 1d tensors
1254-
MOSTLY_IQ3_S = 26 # except 1d tensors
1255-
MOSTLY_IQ3_M = 27 # except 1d tensors
1256-
MOSTLY_IQ2_S = 28 # except 1d tensors
1257-
MOSTLY_IQ2_M = 29 # except 1d tensors
1258-
MOSTLY_IQ4_XS = 30 # except 1d tensors
1259-
MOSTLY_IQ1_M = 31 # except 1d tensors
1260-
MOSTLY_BF16 = 32 # except 1d tensors
1261-
MOSTLY_Q4_0_4_4 = 33 # except 1d tensors
1262-
MOSTLY_Q4_0_4_8 = 34 # except 1d tensors
1263-
MOSTLY_Q4_0_8_8 = 35 # except 1d tensors
1264-
MOSTLY_IQ1_BN = 36, # except 1d tensors
1265-
MOSTLY_IQ2_BN = 37, # except 1d tensors
1266-
MOSTLY_IQ2_K = 38, # except 1d tensors
1267-
MOSTLY_IQ3_K = 39, # except 1d tensors
1268-
MOSTLY_IQ4_K = 40, # except 1d tensors
1269-
MOSTLY_IQ5_K = 41, # except 1d tensors
1270-
MOSTLY_IQ6_K = 42, # except 1d tensors
1271-
MOSTLY_IQ2_TN = 43, # except 1d tensors
1267+
ALL_F32 = 0
1268+
MOSTLY_F16 = 1 #except 1d tensors
1269+
MOSTLY_Q4_0 = 2 #except 1d tensors
1270+
MOSTLY_Q4_1 = 3 #except 1d tensors
1271+
MOSTLY_Q4_1_SOME_F16 = 4 #tok_embeddings.weight and output.weight are F16
1272+
MOSTLY_Q8_0 = 7 #except 1d tensors
1273+
MOSTLY_Q5_0 = 8 #except 1d tensors
1274+
MOSTLY_Q5_1 = 9 #except 1d tensors
1275+
MOSTLY_Q2_K = 10 #except 1d tensors
1276+
MOSTLY_Q3_K = 11 #except 1d tensors
1277+
MOSTLY_Q4_K = 12 #except 1d tensors
1278+
MOSTLY_Q5_K = 13 #except 1d tensors
1279+
MOSTLY_Q6_K = 14 #except 1d tensors
1280+
MOSTLY_IQ2_XXS = 15 #except 1d tensors
1281+
MOSTLY_IQ2_XS = 16 #except 1d tensors
1282+
MOSTLY_IQ3_XXS = 17 #except 1d tensors
1283+
MOSTLY_IQ1_S = 18 #except 1d tensors
1284+
MOSTLY_IQ4_NL = 19 #except 1d tensors
1285+
MOSTLY_IQ3_S = 20 #except 1d tensors
1286+
MOSTLY_IQ2_S = 21 #except 1d tensors
1287+
MOSTLY_IQ4_XS = 22 #except 1d tensors
1288+
MOSTLY_IQ1_M = 23 #except 1d tensors
1289+
MOSTLY_BF16 = 24 #except 1d tensors
1290+
MOSTLY_Q4_0_4_4 = 25 #except 1d tensors
1291+
MOSTLY_Q4_0_4_8 = 26 #except 1d tensors
1292+
MOSTLY_Q4_0_8_8 = 27 #except 1d tensors
1293+
MOSTLY_Q6_0 = 127 #except 1d tensors
1294+
MOSTLY_IQ1_BN = 128 #except 1d tensors
1295+
MOSTLY_IQ2_BN = 129 #except 1d tensors
1296+
MOSTLY_IQ2_K = 130 #except 1d tensors
1297+
MOSTLY_IQ3_K = 131 #except 1d tensors
1298+
MOSTLY_IQ4_K = 132 #except 1d tensors
1299+
MOSTLY_IQ5_K = 133 #except 1d tensors
1300+
MOSTLY_IQ6_K = 134 #except 1d tensors
1301+
MOSTLY_IQ4_KS = 137 #except 1d tensors
1302+
MOSTLY_IQ2_KS = 138 #except 1d tensors
1303+
MOSTLY_IQ4_KSS = 139 #except 1d tensors
1304+
MOSTLY_Q8_KV = 140 #except 1d tensors
1305+
MOSTLY_Q4_0_R8 = 202 #except 1d tensors
1306+
MOSTLY_Q8_0_R8 = 207 #except 1d tensors
1307+
MOSTLY_Q5_0_R4 = 208 #except 1d tensors
1308+
MOSTLY_Q2_K_R4 = 210 #except 1d tensors
1309+
MOSTLY_Q3_K_R4 = 211 #except 1d tensors
1310+
MOSTLY_Q4_K_R4 = 212 #except 1d tensors
1311+
MOSTLY_Q5_K_R4 = 213 #except 1d tensors
1312+
MOSTLY_Q6_K_R4 = 214 #except 1d tensors
1313+
MOSTLY_IQ2_XXS_R4 = 215 #except 1d tensors
1314+
MOSTLY_IQ2_XS_R4 = 216 #except 1d tensors
1315+
MOSTLY_IQ3_XXS_R4 = 217 #except 1d tensors
1316+
MOSTLY_IQ1_S_R4 = 218 #except 1d tensors
1317+
MOSTLY_IQ4_NL_R4 = 219 #except 1d tensors
1318+
MOSTLY_IQ3_S_R4 = 220 #except 1d tensors
1319+
MOSTLY_IQ2_S_R4 = 221 #except 1d tensors
1320+
MOSTLY_IQ4_XS_R8 = 222 #except 1d tensors
1321+
MOSTLY_IQ1_M_R4 = 223 #except 1d tensors
1322+
MOSTLY_BF16_R16 = 224 #except 1d tensors
1323+
MOSTLY_Q6_0_R4 = 227 #except 1d tensors
1324+
MOSTLY_IQ2_BN_R4 = 329 #except 1d tensors
1325+
MOSTLY_IQ2_K_R4 = 330 #except 1d tensors
1326+
MOSTLY_IQ3_K_R4 = 331 #except 1d tensors
1327+
MOSTLY_IQ4_K_R4 = 332 #except 1d tensors
1328+
MOSTLY_IQ5_K_R4 = 333 #except 1d tensors
1329+
MOSTLY_IQ4_KS_R4 = 337 #except 1d tensors
1330+
MOSTLY_Q8_KV_R8 = 398 #except 1d tensors
1331+
MOSTLY_Q8_K_R8 = 399 #except 1d tensors
12721332

12731333

12741334
GUESSED = 1024 # not specified in the model file
@@ -1313,39 +1373,89 @@ def get_type(val: Any) -> GGUFValueType:
13131373

13141374
# Items here are (block size, type size)
13151375
QK_K = 256
1376+
1377+
#Values generated programatically
13161378
GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
1317-
GGMLQuantizationType.F32: (1, 4),
1318-
GGMLQuantizationType.F16: (1, 2),
1319-
GGMLQuantizationType.Q4_0: (32, 2 + 16),
1320-
GGMLQuantizationType.Q4_1: (32, 2 + 2 + 16),
1321-
GGMLQuantizationType.Q5_0: (32, 2 + 4 + 16),
1322-
GGMLQuantizationType.Q5_1: (32, 2 + 2 + 4 + 16),
1323-
GGMLQuantizationType.Q8_0: (32, 2 + 32),
1324-
GGMLQuantizationType.Q8_1: (32, 4 + 4 + 32),
1325-
GGMLQuantizationType.Q2_K: (256, 2 + 2 + QK_K // 16 + QK_K // 4),
1326-
GGMLQuantizationType.Q3_K: (256, 2 + QK_K // 4 + QK_K // 8 + 12),
1327-
GGMLQuantizationType.Q4_K: (256, 2 + 2 + QK_K // 2 + 12),
1328-
GGMLQuantizationType.Q5_K: (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
1329-
GGMLQuantizationType.Q6_K: (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
1330-
GGMLQuantizationType.Q8_K: (256, 4 + QK_K + QK_K // 8),
1331-
GGMLQuantizationType.IQ2_XXS: (256, 2 + QK_K // 4),
1332-
GGMLQuantizationType.IQ2_XS: (256, 2 + QK_K // 4 + QK_K // 32),
1333-
GGMLQuantizationType.IQ3_XXS: (256, 2 + QK_K // 4 + QK_K // 8),
1334-
GGMLQuantizationType.IQ1_S: (256, 2 + QK_K // 8 + QK_K // 16),
1335-
GGMLQuantizationType.IQ4_NL: (32, 2 + 16),
1336-
GGMLQuantizationType.IQ3_S: (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4),
1337-
GGMLQuantizationType.IQ2_S: (256, 2 + QK_K // 4 + QK_K // 16),
1338-
GGMLQuantizationType.IQ4_XS: (256, 2 + 2 + QK_K // 2 + QK_K // 64),
1339-
GGMLQuantizationType.I8: (1, 1),
1340-
GGMLQuantizationType.I16: (1, 2),
1341-
GGMLQuantizationType.I32: (1, 4),
1342-
GGMLQuantizationType.I64: (1, 8),
1343-
GGMLQuantizationType.F64: (1, 8),
1344-
GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
1345-
GGMLQuantizationType.BF16: (1, 2),
1346-
GGMLQuantizationType.Q4_0_4_4:(32, 2 + 16),
1347-
GGMLQuantizationType.Q4_0_4_8:(32, 2 + 16),
1348-
GGMLQuantizationType.Q4_0_8_8:(32, 2 + 16),
1379+
GGMLQuantizationType.F32 : ( 1, 4),
1380+
GGMLQuantizationType.F16 : ( 1, 2),
1381+
GGMLQuantizationType.Q4_0 : ( 32, 18),
1382+
GGMLQuantizationType.Q4_1 : ( 32, 20),
1383+
GGMLQuantizationType.Q5_0 : ( 32, 22),
1384+
GGMLQuantizationType.Q5_1 : ( 32, 24),
1385+
GGMLQuantizationType.Q8_0 : ( 32, 34),
1386+
GGMLQuantizationType.Q8_1 : ( 32, 36),
1387+
GGMLQuantizationType.Q2_K : ( 256, 84),
1388+
GGMLQuantizationType.Q3_K : ( 256, 110),
1389+
GGMLQuantizationType.Q4_K : ( 256, 144),
1390+
GGMLQuantizationType.Q5_K : ( 256, 176),
1391+
GGMLQuantizationType.Q6_K : ( 256, 210),
1392+
GGMLQuantizationType.Q8_K : ( 256, 292),
1393+
GGMLQuantizationType.IQ2_XXS : ( 256, 66),
1394+
GGMLQuantizationType.IQ2_XS : ( 256, 74),
1395+
GGMLQuantizationType.IQ3_XXS : ( 256, 98),
1396+
GGMLQuantizationType.IQ1_S : ( 256, 50),
1397+
GGMLQuantizationType.IQ4_NL : ( 32, 18),
1398+
GGMLQuantizationType.IQ3_S : ( 256, 110),
1399+
GGMLQuantizationType.IQ2_S : ( 256, 82),
1400+
GGMLQuantizationType.IQ4_XS : ( 256, 136),
1401+
GGMLQuantizationType.I8 : ( 1, 1),
1402+
GGMLQuantizationType.I16 : ( 1, 2),
1403+
GGMLQuantizationType.I32 : ( 1, 4),
1404+
GGMLQuantizationType.I64 : ( 1, 8),
1405+
GGMLQuantizationType.F64 : ( 1, 8),
1406+
GGMLQuantizationType.IQ1_M : ( 256, 56),
1407+
GGMLQuantizationType.BF16 : ( 1, 2),
1408+
GGMLQuantizationType.Q4_0_4_4 : ( 32, 18),
1409+
GGMLQuantizationType.Q4_0_4_8 : ( 32, 18),
1410+
GGMLQuantizationType.Q4_0_8_8 : ( 32, 18),
1411+
GGMLQuantizationType.I2_S : ( 1, 1),
1412+
GGMLQuantizationType.Q8_0_X4 : ( 32, 34),
1413+
GGMLQuantizationType.Q8_1_X4 : ( 32, 36),
1414+
GGMLQuantizationType.Q8_2_X4 : ( 32, 36),
1415+
GGMLQuantizationType.Q6_0 : ( 32, 26),
1416+
GGMLQuantizationType.IQ1_BN : ( 64, 13),
1417+
GGMLQuantizationType.IQ2_BN : ( 64, 16),
1418+
GGMLQuantizationType.Q8_K64 : ( 64, 68),
1419+
GGMLQuantizationType.IQ2_K : ( 256, 76),
1420+
GGMLQuantizationType.IQ3_K : ( 256, 110),
1421+
GGMLQuantizationType.IQ4_K : ( 256, 144),
1422+
GGMLQuantizationType.IQ5_K : ( 256, 176),
1423+
GGMLQuantizationType.IQ6_K : ( 256, 212),
1424+
GGMLQuantizationType.IQ4_KS : ( 256, 136),
1425+
GGMLQuantizationType.IQ2_KS : ( 256, 70),
1426+
GGMLQuantizationType.IQ4_KSS : ( 256, 128),
1427+
GGMLQuantizationType.Q8_K16 : ( 64, 64),
1428+
GGMLQuantizationType.Q8_K32 : ( 256, 292),
1429+
GGMLQuantizationType.Q8_KR8 : ( 256, 292),
1430+
GGMLQuantizationType.Q8_K128 : ( 128, 140),
1431+
GGMLQuantizationType.Q8_KV : ( 32, 32),
1432+
GGMLQuantizationType.Q4_0_R8 : ( 32, 18),
1433+
GGMLQuantizationType.Q5_0_R4 : ( 32, 22),
1434+
GGMLQuantizationType.Q8_0_R8 : ( 32, 34),
1435+
GGMLQuantizationType.Q2_K_R4 : ( 256, 84),
1436+
GGMLQuantizationType.Q3_K_R4 : ( 256, 110),
1437+
GGMLQuantizationType.Q4_K_R4 : ( 256, 144),
1438+
GGMLQuantizationType.Q5_K_R4 : ( 256, 176),
1439+
GGMLQuantizationType.Q6_K_R4 : ( 256, 210),
1440+
GGMLQuantizationType.IQ2_XXS_R4 : ( 256, 66),
1441+
GGMLQuantizationType.IQ2_XS_R4 : ( 256, 74),
1442+
GGMLQuantizationType.IQ3_XXS_R4 : ( 256, 98),
1443+
GGMLQuantizationType.IQ1_S_R4 : ( 32, 6),
1444+
GGMLQuantizationType.IQ4_NL_R4 : ( 32, 18),
1445+
GGMLQuantizationType.IQ3_S_R4 : ( 256, 110),
1446+
GGMLQuantizationType.IQ2_S_R4 : ( 256, 82),
1447+
GGMLQuantizationType.IQ4_XS_R8 : ( 256, 136),
1448+
GGMLQuantizationType.IQ1_M_R4 : ( 32, 7),
1449+
GGMLQuantizationType.BF16_R16 : ( 1, 2),
1450+
GGMLQuantizationType.Q6_0_R4 : ( 32, 26),
1451+
GGMLQuantizationType.IQ2_BN_R4 : ( 64, 16),
1452+
GGMLQuantizationType.IQ2_K_R4 : ( 256, 76),
1453+
GGMLQuantizationType.IQ3_K_R4 : ( 256, 110),
1454+
GGMLQuantizationType.IQ4_K_R4 : ( 256, 144),
1455+
GGMLQuantizationType.IQ5_K_R4 : ( 256, 176),
1456+
GGMLQuantizationType.IQ4_KS_R4 : ( 256, 136),
1457+
GGMLQuantizationType.Q8_KV_R8 : ( 32, 32),
1458+
GGMLQuantizationType.Q8_K_R8 : ( 256, 258),
13491459
}
13501460

13511461

0 commit comments

Comments
 (0)