Skip to content

Commit 529a1d4

Browse files
committed
feat(pull): download only highest-precision GGUF in multi-quant repos
Add ggufpick to rank quantizations from filenames, filter model snapshot downloads to one precision tier (all shards), and pick the same for FindModelFile. Skip filtering when quant tokens are unknown. Made-with: Cursor
1 parent e785341 commit 529a1d4

File tree

6 files changed

+347
-2
lines changed

6 files changed

+347
-2
lines changed

internal/csghub/snapshot.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ import (
77
"path/filepath"
88
"strings"
99
"sync"
10+
11+
"github.com/opencsgs/csghub-lite/internal/ggufpick"
1012
)
1113

1214
// SnapshotProgress reports progress for a multi-file download.
@@ -135,6 +137,10 @@ func (c *Client) downloadSnapshot(ctx context.Context, repoType, namespace, name
135137
}
136138
}
137139

140+
if repoType == "models" {
141+
downloadFiles = filterGGUFMultiQuantDownload(downloadFiles)
142+
}
143+
138144
if len(downloadFiles) == 0 {
139145
return nil, fmt.Errorf("no files found in %s/%s", namespace, name)
140146
}
@@ -194,6 +200,45 @@ func (c *Client) downloadSnapshot(ctx context.Context, repoType, namespace, name
194200
return downloadFiles, nil
195201
}
196202

203+
func repoFileBaseName(f RepoFile) string {
204+
if f.Name != "" {
205+
return f.Name
206+
}
207+
return filepath.Base(f.Path)
208+
}
209+
210+
func filterGGUFMultiQuantDownload(files []RepoFile) []RepoFile {
211+
var weights []RepoFile
212+
for _, f := range files {
213+
if ggufpick.IsWeightGGUF(repoFileBaseName(f)) {
214+
weights = append(weights, f)
215+
}
216+
}
217+
if len(weights) <= 1 {
218+
return files
219+
}
220+
entries := make([]ggufpick.FileEntry, len(weights))
221+
for i, f := range weights {
222+
entries[i] = ggufpick.FileEntry{Path: f.Path, Name: repoFileBaseName(f), Size: f.Size}
223+
}
224+
filtered := ggufpick.FilterWeightGGUFFiles(entries)
225+
kept := make(map[string]struct{}, len(filtered))
226+
for _, e := range filtered {
227+
kept[e.Path] = struct{}{}
228+
}
229+
out := make([]RepoFile, 0, len(files)-len(weights)+len(filtered))
230+
for _, f := range files {
231+
if !ggufpick.IsWeightGGUF(repoFileBaseName(f)) {
232+
out = append(out, f)
233+
continue
234+
}
235+
if _, ok := kept[f.Path]; ok {
236+
out = append(out, f)
237+
}
238+
}
239+
return out
240+
}
241+
197242
// ParseModelID splits a model identifier like "namespace/name" into parts.
198243
func ParseModelID(modelID string) (namespace, name string, err error) {
199244
return ParseRepoID(modelID)

internal/csghub/snapshot_test.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,37 @@
11
package csghub
22

33
import (
4+
"reflect"
45
"testing"
56
)
67

8+
func TestFilterGGUFMultiQuantDownload(t *testing.T) {
9+
files := []RepoFile{
10+
{Type: "file", Path: "README.md", Name: "README.md"},
11+
{Type: "file", Path: "Q8_0.gguf", Name: "Q8_0.gguf", LFS: true},
12+
{Type: "file", Path: "Q4_0.gguf", Name: "Q4_0.gguf", LFS: true},
13+
}
14+
got := filterGGUFMultiQuantDownload(files)
15+
var names []string
16+
for _, f := range got {
17+
names = append(names, f.Name)
18+
}
19+
want := []string{"README.md", "Q8_0.gguf"}
20+
if !reflect.DeepEqual(names, want) {
21+
t.Errorf("got %v, want %v", names, want)
22+
}
23+
}
24+
25+
func TestFilterGGUFMultiQuantDownload_singleGGUF(t *testing.T) {
26+
files := []RepoFile{
27+
{Type: "file", Path: "Q4_0.gguf", Name: "Q4_0.gguf"},
28+
}
29+
got := filterGGUFMultiQuantDownload(files)
30+
if len(got) != 1 {
31+
t.Fatalf("len = %d", len(got))
32+
}
33+
}
34+
735
func TestParseModelID(t *testing.T) {
836
tests := []struct {
937
name string

internal/ggufpick/quant.go

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
package ggufpick
2+
3+
import (
4+
"path/filepath"
5+
"regexp"
6+
"strings"
7+
)
8+
9+
// quantRanks: higher value = higher numerical precision / less aggressive quantization.
10+
// Unknown tokens return -1 from quantRankFromStem.
11+
var quantRanks = map[string]int{
12+
"f32": 1000,
13+
"bf16": 990,
14+
"f16": 980,
15+
"fp16": 980,
16+
"q8_0": 920,
17+
"q8_1": 915,
18+
"q6_k": 880,
19+
"q5_k_m": 860,
20+
"q5_k_s": 855,
21+
"q5_k": 850,
22+
"q5_1": 840,
23+
"q5_0": 835,
24+
"q4_k_m": 800,
25+
"q4_k_s": 795,
26+
"q4_k": 790,
27+
"q4_1": 785,
28+
"q4_0": 780,
29+
"q3_k_l": 750,
30+
"q3_k_m": 745,
31+
"q3_k_s": 740,
32+
"q3_k_xl": 738,
33+
"q3_k": 735,
34+
"q2_k": 700,
35+
"tq2_0": 680,
36+
"tq1_0": 670,
37+
"iq4_nl": 650,
38+
"iq4_xs": 640,
39+
"iq3_m": 620,
40+
"iq3_s": 610,
41+
"iq3_xs": 600,
42+
"iq3_xxs": 590,
43+
"iq2_m": 570,
44+
"iq2_xs": 560,
45+
"iq2_xxs": 550,
46+
"iq1_m": 520,
47+
"iq1_s": 510,
48+
}
49+
50+
var shardSuffixRe = regexp.MustCompile(`-\d+-of-\d+$`)
51+
52+
// IsMMProjGGUF reports whether name looks like a multimodal projector GGUF.
53+
func IsMMProjGGUF(name string) bool {
54+
lower := strings.ToLower(name)
55+
return strings.HasSuffix(lower, ".gguf") && strings.Contains(lower, "mmproj")
56+
}
57+
58+
// IsWeightGGUF is a non-mmproj .gguf file (main model weights).
59+
func IsWeightGGUF(name string) bool {
60+
lower := strings.ToLower(filepath.Base(name))
61+
if !strings.HasSuffix(lower, ".gguf") {
62+
return false
63+
}
64+
return !strings.Contains(lower, "mmproj")
65+
}
66+
67+
// QuantRank returns a precision rank for a weight GGUF basename; higher is better.
68+
// Returns -1 if no known quantization token is found.
69+
func QuantRank(basename string) int {
70+
stem := normalizeGGUFStem(filepath.Base(basename))
71+
if stem == "" {
72+
return -1
73+
}
74+
return quantRankFromStem(stem)
75+
}
76+
77+
func normalizeGGUFStem(basename string) string {
78+
lower := strings.ToLower(basename)
79+
if !strings.HasSuffix(lower, ".gguf") {
80+
return ""
81+
}
82+
stem := basename[:len(basename)-len(".gguf")]
83+
stem = strings.ToLower(stem)
84+
stem = shardSuffixRe.ReplaceAllString(stem, "")
85+
return stem
86+
}
87+
88+
func quantRankFromStem(stem string) int {
89+
tokens := strings.Split(stem, "-")
90+
if len(tokens) == 0 {
91+
return -1
92+
}
93+
// Try last 1..3 tokens joined with underscores (e.g. q8_0, q4_k_m).
94+
for n := 3; n >= 1; n-- {
95+
if len(tokens) < n {
96+
continue
97+
}
98+
cand := strings.Join(tokens[len(tokens)-n:], "_")
99+
if r, ok := quantRanks[cand]; ok {
100+
return r
101+
}
102+
}
103+
return -1
104+
}
105+
106+
// FileEntry is a minimal file description for GGUF download filtering.
107+
type FileEntry struct {
108+
Path string
109+
Name string
110+
Size int64
111+
}
112+
113+
// FilterWeightGGUFFiles keeps every shard of the highest-known-precision variant.
114+
// If there is at most one weight file, or no file has a known quant token, entries are returned unchanged.
115+
func FilterWeightGGUFFiles(entries []FileEntry) []FileEntry {
116+
if len(entries) <= 1 {
117+
return entries
118+
}
119+
ranks := make([]int, len(entries))
120+
maxRank := -1
121+
known := false
122+
for i, e := range entries {
123+
base := e.Name
124+
if base == "" {
125+
base = filepath.Base(e.Path)
126+
}
127+
r := QuantRank(base)
128+
ranks[i] = r
129+
if r >= 0 {
130+
known = true
131+
if r > maxRank {
132+
maxRank = r
133+
}
134+
}
135+
}
136+
if !known {
137+
return entries
138+
}
139+
var out []FileEntry
140+
for i, e := range entries {
141+
if ranks[i] == maxRank {
142+
out = append(out, e)
143+
}
144+
}
145+
if len(out) == 0 {
146+
return entries
147+
}
148+
return out
149+
}
150+
151+
// BestWeightGGUFName picks the highest-precision weight GGUF basename.
152+
// Tie-breaker: lexicographic order on name for stability.
153+
func BestWeightGGUFName(names []string) string {
154+
if len(names) == 0 {
155+
return ""
156+
}
157+
if len(names) == 1 {
158+
return names[0]
159+
}
160+
best := names[0]
161+
bestR := QuantRank(best)
162+
for _, n := range names[1:] {
163+
r := QuantRank(n)
164+
if r > bestR || (r == bestR && n < best) {
165+
best = n
166+
bestR = r
167+
}
168+
}
169+
return best
170+
}

internal/ggufpick/quant_test.go

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
package ggufpick
2+
3+
import (
4+
"reflect"
5+
"testing"
6+
)
7+
8+
func TestQuantRank(t *testing.T) {
9+
tests := []struct {
10+
name string
11+
want int
12+
}{
13+
{"Qwen3-0.6B-Q8_0.gguf", quantRanks["q8_0"]},
14+
{"model-Q4_K_M.gguf", quantRanks["q4_k_m"]},
15+
{"Llama-3-8B-Q4_K_M-00001-of-00003.gguf", quantRanks["q4_k_m"]},
16+
{"weights-f16.gguf", quantRanks["f16"]},
17+
{"x-bf16.gguf", quantRanks["bf16"]},
18+
{"x-f32.gguf", quantRanks["f32"]},
19+
{"unknown.gguf", -1},
20+
}
21+
for _, tt := range tests {
22+
t.Run(tt.name, func(t *testing.T) {
23+
if g := QuantRank(tt.name); g != tt.want {
24+
t.Errorf("QuantRank(%q) = %d, want %d", tt.name, g, tt.want)
25+
}
26+
})
27+
}
28+
}
29+
30+
func TestFilterWeightGGUFFiles(t *testing.T) {
31+
entries := []FileEntry{
32+
{Path: "a/Q4_0.gguf", Name: "Q4_0.gguf", Size: 100},
33+
{Path: "a/Q8_0.gguf", Name: "Q8_0.gguf", Size: 200},
34+
{Path: "a/Q4_K_M.gguf", Name: "Q4_K_M.gguf", Size: 150},
35+
}
36+
got := FilterWeightGGUFFiles(entries)
37+
if len(got) != 1 || got[0].Name != "Q8_0.gguf" {
38+
t.Errorf("FilterWeightGGUFFiles = %#v, want single Q8_0", got)
39+
}
40+
41+
sharded := []FileEntry{
42+
{Path: "M-Q4_0-00001-of-00002.gguf", Name: "M-Q4_0-00001-of-00002.gguf"},
43+
{Path: "M-Q4_0-00002-of-00002.gguf", Name: "M-Q4_0-00002-of-00002.gguf"},
44+
{Path: "M-Q8_0-00001-of-00002.gguf", Name: "M-Q8_0-00001-of-00002.gguf"},
45+
{Path: "M-Q8_0-00002-of-00002.gguf", Name: "M-Q8_0-00002-of-00002.gguf"},
46+
}
47+
got2 := FilterWeightGGUFFiles(sharded)
48+
wantPaths := map[string]bool{
49+
"M-Q8_0-00001-of-00002.gguf": true,
50+
"M-Q8_0-00002-of-00002.gguf": true,
51+
}
52+
if len(got2) != 2 {
53+
t.Fatalf("len = %d, want 2: %#v", len(got2), got2)
54+
}
55+
for _, e := range got2 {
56+
if !wantPaths[e.Path] {
57+
t.Errorf("unexpected path %q", e.Path)
58+
}
59+
}
60+
}
61+
62+
func TestFilterWeightGGUFFiles_unknownOnlyNoOp(t *testing.T) {
63+
entries := []FileEntry{
64+
{Path: "a.gguf", Name: "a.gguf"},
65+
{Path: "b.gguf", Name: "b.gguf"},
66+
}
67+
got := FilterWeightGGUFFiles(entries)
68+
if !reflect.DeepEqual(got, entries) {
69+
t.Errorf("expected unchanged, got %#v", got)
70+
}
71+
}
72+
73+
func TestBestWeightGGUFName(t *testing.T) {
74+
names := []string{"x-Q4_0.gguf", "x-Q8_0.gguf", "x-Q4_K_M.gguf"}
75+
if g := BestWeightGGUFName(names); g != "x-Q8_0.gguf" {
76+
t.Errorf("got %q, want x-Q8_0.gguf", g)
77+
}
78+
}

internal/model/manifest.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import (
55
"os"
66
"path/filepath"
77
"strings"
8+
9+
"github.com/opencsgs/csghub-lite/internal/ggufpick"
810
)
911

1012
// Vision-related HuggingFace architecture suffixes/names.
@@ -109,13 +111,18 @@ func FindModelFile(modelDir string) (string, Format, error) {
109111
return "", FormatUnknown, err
110112
}
111113

112-
// Prefer GGUF files (skip multimodal projector files)
114+
// Prefer GGUF weight files (skip multimodal projector); pick highest precision if several.
115+
var ggufNames []string
113116
for _, e := range entries {
114117
lower := strings.ToLower(e.Name())
115118
if !e.IsDir() && strings.HasSuffix(lower, ".gguf") && !strings.Contains(lower, "mmproj") {
116-
return filepath.Join(modelDir, e.Name()), FormatGGUF, nil
119+
ggufNames = append(ggufNames, e.Name())
117120
}
118121
}
122+
if len(ggufNames) > 0 {
123+
best := ggufpick.BestWeightGGUFName(ggufNames)
124+
return filepath.Join(modelDir, best), FormatGGUF, nil
125+
}
119126
// Then SafeTensors
120127
for _, e := range entries {
121128
if !e.IsDir() && strings.HasSuffix(strings.ToLower(e.Name()), ".safetensors") {

0 commit comments

Comments
 (0)