Skip to content

Commit 83bd038

Browse files
committed
fix: resolved quantization
1 parent fecacf3 commit 83bd038

File tree

2 files changed

+73
-38
lines changed

2 files changed

+73
-38
lines changed

src/utils/parseModelUri.ts

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -190,17 +190,22 @@ async function fetchHuggingFaceModelManifest({
190190
while (headersToTry.length > 0) {
191191
const headers = headersToTry.shift();
192192

193-
const response = await fetch(manifestUrl, {
194-
headers: {
195-
...(authorizationHeader != null ? {"Authorization": authorizationHeader} : {}),
196-
...headers,
197-
198-
// we need this to get the `ggufFile` field in the response
199-
// https://github.com/ggerganov/llama.cpp/pull/11195
200-
"User-Agent": "llama-cpp"
201-
},
202-
signal
203-
});
193+
let response: Awaited<ReturnType<typeof fetch>> | undefined;
194+
try {
195+
response = await fetch(manifestUrl, {
196+
headers: {
197+
...(authorizationHeader != null ? {"Authorization": authorizationHeader} : {}),
198+
...headers,
199+
200+
// we need this to get the `ggufFile` field in the response
201+
// https://github.com/ggerganov/llama.cpp/pull/11195
202+
"User-Agent": "llama-cpp"
203+
},
204+
signal
205+
});
206+
} catch (err) {
207+
throw new Error(`Failed to fetch manifest for resolving URI ${JSON.stringify(fullUri)}: ${err}`);
208+
}
204209

205210
if ((response.status >= 500 || response.status === 429 || response.status === 401) && headersToTry.length > 0)
206211
continue;
@@ -243,8 +248,11 @@ function parseHuggingFaceUriContent(uri: string, fullUri: string): ParsedModelUr
243248
const actualTag = tagParts.length > 0
244249
? [tag, ...tagParts].join(":").trimEnd()
245250
: (tag ?? "").trimEnd();
246-
const resolvedTag = ggufQuantNames.has(actualTag.toUpperCase())
251+
const assumedQuant = ggufQuantNames.has(actualTag.toUpperCase())
247252
? actualTag.toUpperCase()
253+
: undefined;
254+
const resolvedTag = assumedQuant != null
255+
? assumedQuant
248256
: actualTag;
249257

250258
if (actualModel == null || actualModel === "" || user === "")
@@ -260,12 +268,23 @@ function parseHuggingFaceUriContent(uri: string, fullUri: string): ParsedModelUr
260268
uri: `hf:${user}/${actualModel}${resolvedTag !== "" ? `:${resolvedTag}` : ""}`,
261269
filePrefix,
262270
baseFilename,
263-
possibleFullFilenames: [
264-
`${filePrefix}${baseFilename}.${defaultHuggingFaceFileQuantization}.gguf`,
265-
`${filePrefix}${baseFilename}.${defaultHuggingFaceFileQuantization}-00001-of-${genericFilePartNumber}.gguf`,
266-
`${filePrefix}${baseFilename}.gguf`,
267-
`${filePrefix}${baseFilename}-00001-of-${genericFilePartNumber}.gguf`
268-
],
271+
possibleFullFilenames:
272+
assumedQuant != null
273+
? [
274+
`${filePrefix}${baseFilename}.${assumedQuant}.gguf`,
275+
`${filePrefix}${baseFilename}.${assumedQuant}-00001-of-${genericFilePartNumber}.gguf`
276+
]
277+
: (resolvedTag != null && resolvedTag !== "" && resolvedTag !== "latest")
278+
? [
279+
`${filePrefix}${baseFilename}.${resolvedTag.toUpperCase()}.gguf`,
280+
`${filePrefix}${baseFilename}.${resolvedTag.toUpperCase()}-00001-of-${genericFilePartNumber}.gguf`
281+
]
282+
: [
283+
`${filePrefix}${baseFilename}.${defaultHuggingFaceFileQuantization}.gguf`,
284+
`${filePrefix}${baseFilename}.${defaultHuggingFaceFileQuantization}-00001-of-${genericFilePartNumber}.gguf`,
285+
`${filePrefix}${baseFilename}.gguf`,
286+
`${filePrefix}${baseFilename}-00001-of-${genericFilePartNumber}.gguf`
287+
],
269288
resolveDetails: {
270289
type: "hf",
271290
user,

test/standalone/utils/parseModelUri.test.ts

Lines changed: 36 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -166,10 +166,6 @@ describe("utils", () => {
166166
"hf_mradermacher_Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf",
167167
"hf_mradermacher_Meta-Llama-3.1-8B-Instruct.Q4_K_M-00001-of-{:
168168
{number}
169-
:}.gguf",
170-
"hf_mradermacher_Meta-Llama-3.1-8B-Instruct.gguf",
171-
"hf_mradermacher_Meta-Llama-3.1-8B-Instruct-00001-of-{:
172-
{number}
173169
:}.gguf",
174170
],
175171
"resolveDetails": {
@@ -207,10 +203,6 @@ describe("utils", () => {
207203
"hf_mradermacher_Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf",
208204
"hf_mradermacher_Meta-Llama-3.1-8B-Instruct.Q4_K_M-00001-of-{:
209205
{number}
210-
:}.gguf",
211-
"hf_mradermacher_Meta-Llama-3.1-8B-Instruct.gguf",
212-
"hf_mradermacher_Meta-Llama-3.1-8B-Instruct-00001-of-{:
213-
{number}
214206
:}.gguf",
215207
],
216208
"resolveDetails": {
@@ -245,12 +237,8 @@ describe("utils", () => {
245237
"baseFilename": "Meta-Llama-3.1-70B-Instruct",
246238
"filePrefix": "hf_bartowski_",
247239
"possibleFullFilenames": [
248-
"hf_bartowski_Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf",
249-
"hf_bartowski_Meta-Llama-3.1-70B-Instruct.Q4_K_M-00001-of-{:
250-
{number}
251-
:}.gguf",
252-
"hf_bartowski_Meta-Llama-3.1-70B-Instruct.gguf",
253-
"hf_bartowski_Meta-Llama-3.1-70B-Instruct-00001-of-{:
240+
"hf_bartowski_Meta-Llama-3.1-70B-Instruct.Q5_K_L.gguf",
241+
"hf_bartowski_Meta-Llama-3.1-70B-Instruct.Q5_K_L-00001-of-{:
254242
{number}
255243
:}.gguf",
256244
],
@@ -327,12 +315,8 @@ describe("utils", () => {
327315
"baseFilename": "Meta-Llama-3.1-70B-Instruct",
328316
"filePrefix": "hf_mradermacher_",
329317
"possibleFullFilenames": [
330-
"hf_mradermacher_Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf",
331-
"hf_mradermacher_Meta-Llama-3.1-70B-Instruct.Q4_K_M-00001-of-{:
332-
{number}
333-
:}.gguf",
334-
"hf_mradermacher_Meta-Llama-3.1-70B-Instruct.gguf",
335-
"hf_mradermacher_Meta-Llama-3.1-70B-Instruct-00001-of-{:
318+
"hf_mradermacher_Meta-Llama-3.1-70B-Instruct.Q8_0.gguf",
319+
"hf_mradermacher_Meta-Llama-3.1-70B-Instruct.Q8_0-00001-of-{:
336320
{number}
337321
:}.gguf",
338322
],
@@ -354,5 +338,37 @@ describe("utils", () => {
354338
expect(err).toMatchInlineSnapshot('[Error: Cannot get quantization "Q8_0" for model "hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF" or it does not exist]');
355339
}
356340
});
341+
342+
test("Hugging Face simple URI is resolved 5", async () => {
343+
const parsedModelUri = parseModelUri("hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF:invalid");
344+
345+
expect(parsedModelUri).toMatchInlineSnapshot(`
346+
{
347+
"baseFilename": "Meta-Llama-3.1-70B-Instruct",
348+
"filePrefix": "hf_mradermacher_",
349+
"possibleFullFilenames": [
350+
"hf_mradermacher_Meta-Llama-3.1-70B-Instruct.INVALID.gguf",
351+
"hf_mradermacher_Meta-Llama-3.1-70B-Instruct.INVALID-00001-of-{:
352+
{number}
353+
:}.gguf",
354+
],
355+
"resolveDetails": {
356+
"model": "Meta-Llama-3.1-70B-Instruct-GGUF",
357+
"tag": "invalid",
358+
"type": "hf",
359+
"user": "mradermacher",
360+
},
361+
"type": "unresolved",
362+
"uri": "hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF:invalid",
363+
}
364+
`);
365+
366+
try {
367+
await resolveParsedModelUri(parsedModelUri);
368+
expect.unreachable("This quantization cannot be resolved due to not existing");
369+
} catch (err) {
370+
expect(err).toMatchInlineSnapshot('[Error: Cannot get quantization "invalid" for model "hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF" or it does not exist]');
371+
}
372+
});
357373
});
358374
});

0 commit comments

Comments
 (0)