11import { beforeAll , describe , expect , it } from "vitest" ;
22import type { GGUFParseOutput } from "./gguf" ;
3- import { GGMLQuantizationType , gguf , ggufAllShards , parseGgufShardFilename } from "./gguf" ;
3+ import { GGMLFileQuantizationType , GGMLQuantizationType , gguf , ggufAllShards , parseGgufShardFilename } from "./gguf" ;
44import fs from "node:fs" ;
55
66const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/191239b/llama-2-7b-chat.Q2_K.gguf" ;
@@ -21,9 +21,11 @@ describe("gguf", () => {
2121 if ( ! fs . existsSync ( ".cache" ) ) {
2222 fs . mkdirSync ( ".cache" ) ;
2323 }
24- const res = await fetch ( URL_BIG_METADATA ) ;
25- const arrayBuf = await res . arrayBuffer ( ) ;
26- fs . writeFileSync ( ".cache/model.gguf" , Buffer . from ( arrayBuf ) ) ;
24+ if ( ! fs . existsSync ( ".cache/model.gguf" ) ) {
25+ const res = await fetch ( URL_BIG_METADATA ) ;
26+ const arrayBuf = await res . arrayBuffer ( ) ;
27+ fs . writeFileSync ( ".cache/model.gguf" , Buffer . from ( arrayBuf ) ) ;
28+ }
2729 } ) ;
2830
2931 it ( "should parse a llama2 7b" , async ( ) => {
@@ -37,7 +39,7 @@ describe("gguf", () => {
3739 tensor_count : 291n ,
3840 kv_count : 19n ,
3941 "general.architecture" : "llama" ,
40- "general.file_type" : 10 ,
42+ "general.file_type" : GGMLFileQuantizationType . MOSTLY_Q2_K ,
4143 "general.name" : "LLaMA v2" ,
4244 "general.quantization_version" : 2 ,
4345 "llama.attention.head_count" : 32 ,
@@ -96,7 +98,7 @@ describe("gguf", () => {
9698 tensor_count : 291n ,
9799 kv_count : 24n ,
98100 "general.architecture" : "llama" ,
99- "general.file_type" : 17 ,
101+ "general.file_type" : GGMLFileQuantizationType . MOSTLY_Q5_K_M ,
100102 "general.name" : "mistralai_mistral-7b-instruct-v0.2" ,
101103 "general.quantization_version" : 2 ,
102104 "llama.attention.head_count" : 32 ,
@@ -134,7 +136,7 @@ describe("gguf", () => {
134136 tensor_count : 164n ,
135137 kv_count : 21n ,
136138 "general.architecture" : "gemma" ,
137- "general.file_type" : GGMLQuantizationType . Q8_K , // 15
139+ "general.file_type" : GGMLFileQuantizationType . MOSTLY_Q4_K_M ,
138140 "general.name" : "gemma-2b-it" ,
139141 "general.quantization_version" : 2 ,
140142 "gemma.attention.head_count" : 8 ,
@@ -171,7 +173,7 @@ describe("gguf", () => {
171173 tensor_count : 197n ,
172174 kv_count : 23n ,
173175 "general.architecture" : "bert" ,
174- "general.file_type" : GGMLQuantizationType . F16 ,
176+ "general.file_type" : GGMLFileQuantizationType . MOSTLY_F16 ,
175177 "general.name" : "bge-small-en-v1.5" ,
176178 "bert.attention.causal" : false ,
177179 "bert.attention.head_count" : 12 ,
0 commit comments