Skip to content

Commit 7c7a4ff

Browse files
committed
Merge branch 'main' into inference-providers
2 parents 7e634bc + 3c3c008 commit 7c7a4ff

File tree

6 files changed

+52
-6
lines changed

6 files changed

+52
-6
lines changed

.github/workflows/inference-publish.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ jobs:
5454
git tag "inference-v$BUMPED_VERSION"
5555
5656
- name: "Check Deps are published before publishing this package"
57-
run: pnpm -w check-deps gguf
57+
run: pnpm -w check-deps tasks
5858

5959
- run: pnpm publish --no-git-checks .
6060
env:

packages/gguf/src/gguf.spec.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,4 +283,9 @@ describe("gguf", () => {
283283
expect(parseGGUFQuantLabel("Codestral-22B-v0.1-IQ3_XS.gguf")).toEqual(undefined); // TODO: investigate IQ3_XS
284284
expect(parseGGUFQuantLabel("Codestral-22B-v0.1-Q4_0_4_4.gguf")).toEqual("Q4_0"); // TODO: investigate Q4_0_4_4
285285
});
286+
287+
it("calculate tensor data offset", async () => {
288+
const { tensorDataOffset } = await gguf(URL_LLAMA);
289+
expect(tensorDataOffset).toEqual(741056n);
290+
});
286291
});

packages/gguf/src/gguf.ts

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ export { parseGGUFQuantLabel, GGUF_QUANT_RE, GGUF_QUANT_RE_GLOBAL } from "@huggi
1010

1111
export const RE_GGUF_FILE = /\.gguf$/;
1212
export const RE_GGUF_SHARD_FILE = /^(?<prefix>.*?)-(?<shard>\d{5})-of-(?<total>\d{5})\.gguf$/;
13+
const GGUF_DEFAULT_ALIGNMENT = 32; // defined in ggml.h
14+
const GGML_PAD = (x: number, n: number) => (x + n - 1) & ~(n - 1); // defined in ggml.h
1315
const PARALLEL_DOWNLOADS = 20;
1416

1517
export interface GgufShardFileInfo {
@@ -384,14 +386,18 @@ export async function gguf(
384386
});
385387
}
386388

389+
// calculate absolute offset of tensor data
390+
const alignment: number = Number(metadata["general.alignment"] ?? GGUF_DEFAULT_ALIGNMENT);
391+
const tensorDataOffset = BigInt(GGML_PAD(offset, alignment));
392+
387393
if (params?.computeParametersCount) {
388394
const parameterCount = tensorInfos
389395
.map(({ shape }) => shape.reduce((acc, val) => acc * Number(val), 1))
390396
.reduce((acc, val) => acc + val, 0);
391397

392-
return { metadata, tensorInfos, parameterCount };
398+
return { metadata, tensorInfos, tensorDataOffset, parameterCount };
393399
} else {
394-
return { metadata, tensorInfos };
400+
return { metadata, tensorInfos, tensorDataOffset };
395401
}
396402
}
397403

@@ -429,7 +435,10 @@ export async function ggufAllShards(
429435
parameterCount: shards.map(({ parameterCount }) => parameterCount).reduce((acc, val) => acc + val, 0),
430436
};
431437
} else {
432-
const { metadata, tensorInfos, parameterCount } = await gguf(url, { ...params, computeParametersCount: true });
433-
return { shards: [{ metadata, tensorInfos }], parameterCount };
438+
const { metadata, tensorInfos, tensorDataOffset, parameterCount } = await gguf(url, {
439+
...params,
440+
computeParametersCount: true,
441+
});
442+
return { shards: [{ metadata, tensorInfos, tensorDataOffset }], parameterCount };
434443
}
435444
}

packages/gguf/src/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,4 +155,5 @@ export interface GGUFTensorInfo {
155155
export interface GGUFParseOutput<Options extends GGUFMetadataOptions = { strict: true }> {
156156
metadata: GGUFMetadata<Options>;
157157
tensorInfos: GGUFTensorInfo[];
158+
tensorDataOffset: bigint;
158159
}

packages/tasks/src/hardware.ts

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,24 @@ export const SKUS = {
307307
memory: [16],
308308
},
309309
},
310+
INTEL: {
311+
"Arc A750": {
312+
tflops: 34.41,
313+
memory: [8],
314+
},
315+
"Arc A770": {
316+
tflops: 39.32,
317+
memory: [8, 16],
318+
},
319+
"Arc B570": {
320+
tflops: 23.04,
321+
memory: [10],
322+
},
323+
"Arc B580": {
324+
tflops: 27.34,
325+
memory: [12],
326+
},
327+
},
310328
QUALCOMM: {
311329
"Snapdragon X Elite X1E-00-1DE": {
312330
tflops: 4.6,
@@ -485,7 +503,7 @@ export const SKUS = {
485503
},
486504
"Apple M4 Pro": {
487505
tflops: 9.2,
488-
memory: [24, 48],
506+
memory: [24, 48, 64],
489507
},
490508
"Apple M4 Max": {
491509
tflops: 18.4,

packages/tasks/src/model-libraries.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,12 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
166166
filter: false,
167167
countDownloads: `path:"adapter_config.json"`,
168168
},
169+
cosmos: {
170+
prettyLabel: "Cosmos",
171+
repoName: "Cosmos",
172+
repoUrl: "https://github.com/NVIDIA/Cosmos",
173+
countDownloads: `path:"config.json" OR path_extension:"pt"`,
174+
},
169175
"cxr-foundation": {
170176
prettyLabel: "CXR Foundation",
171177
repoName: "cxr-foundation",
@@ -249,6 +255,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = {
249255
filter: false,
250256
countDownloads: `path_extension:"pt"`,
251257
},
258+
cosyvoice: {
259+
prettyLabel: "CosyVoice",
260+
repoName: "CosyVoice",
261+
repoUrl: "https://github.com/FunAudioLLM/CosyVoice",
262+
filter: false,
263+
countDownloads: `path_extension:"onnx" OR path_extension:"pt"`,
264+
},
252265
cotracker: {
253266
prettyLabel: "CoTracker",
254267
repoName: "CoTracker",

0 commit comments

Comments
 (0)