feat(pull): download only highest-precision GGUF in multi-quant repos

ganisback · ganisback · commit 529a1d48c7df · 2026-03-21T19:56:40.000+08:00
Add ggufpick to rank quantizations from filenames, filter model snapshot
downloads to one precision tier (all shards), and pick the same for
FindModelFile. Skip filtering when quant tokens are unknown.

Made-with: Cursor
diff --git a/internal/csghub/snapshot.go b/internal/csghub/snapshot.go
@@ -7,6 +7,8 @@ import (
 	"path/filepath"
 	"strings"
 	"sync"
+
+	"github.com/opencsgs/csghub-lite/internal/ggufpick"
 )
 
 // SnapshotProgress reports progress for a multi-file download.
@@ -135,6 +137,10 @@ func (c *Client) downloadSnapshot(ctx context.Context, repoType, namespace, name
 		}
 	}
 
+	if repoType == "models" {
+		downloadFiles = filterGGUFMultiQuantDownload(downloadFiles)
+	}
+
 	if len(downloadFiles) == 0 {
 		return nil, fmt.Errorf("no files found in %s/%s", namespace, name)
 	}
@@ -194,6 +200,45 @@ func (c *Client) downloadSnapshot(ctx context.Context, repoType, namespace, name
 	return downloadFiles, nil
 }
 
+func repoFileBaseName(f RepoFile) string {
+	if f.Name != "" {
+		return f.Name
+	}
+	return filepath.Base(f.Path)
+}
+
+func filterGGUFMultiQuantDownload(files []RepoFile) []RepoFile {
+	var weights []RepoFile
+	for _, f := range files {
+		if ggufpick.IsWeightGGUF(repoFileBaseName(f)) {
+			weights = append(weights, f)
+		}
+	}
+	if len(weights) <= 1 {
+		return files
+	}
+	entries := make([]ggufpick.FileEntry, len(weights))
+	for i, f := range weights {
+		entries[i] = ggufpick.FileEntry{Path: f.Path, Name: repoFileBaseName(f), Size: f.Size}
+	}
+	filtered := ggufpick.FilterWeightGGUFFiles(entries)
+	kept := make(map[string]struct{}, len(filtered))
+	for _, e := range filtered {
+		kept[e.Path] = struct{}{}
+	}
+	out := make([]RepoFile, 0, len(files)-len(weights)+len(filtered))
+	for _, f := range files {
+		if !ggufpick.IsWeightGGUF(repoFileBaseName(f)) {
+			out = append(out, f)
+			continue
+		}
+		if _, ok := kept[f.Path]; ok {
+			out = append(out, f)
+		}
+	}
+	return out
+}
+
 // ParseModelID splits a model identifier like "namespace/name" into parts.
 func ParseModelID(modelID string) (namespace, name string, err error) {
 	return ParseRepoID(modelID)
diff --git a/internal/csghub/snapshot_test.go b/internal/csghub/snapshot_test.go
@@ -1,9 +1,37 @@
 package csghub
 
 import (
+	"reflect"
 	"testing"
 )
 
+func TestFilterGGUFMultiQuantDownload(t *testing.T) {
+	files := []RepoFile{
+		{Type: "file", Path: "README.md", Name: "README.md"},
+		{Type: "file", Path: "Q8_0.gguf", Name: "Q8_0.gguf", LFS: true},
+		{Type: "file", Path: "Q4_0.gguf", Name: "Q4_0.gguf", LFS: true},
+	}
+	got := filterGGUFMultiQuantDownload(files)
+	var names []string
+	for _, f := range got {
+		names = append(names, f.Name)
+	}
+	want := []string{"README.md", "Q8_0.gguf"}
+	if !reflect.DeepEqual(names, want) {
+		t.Errorf("got %v, want %v", names, want)
+	}
+}
+
+func TestFilterGGUFMultiQuantDownload_singleGGUF(t *testing.T) {
+	files := []RepoFile{
+		{Type: "file", Path: "Q4_0.gguf", Name: "Q4_0.gguf"},
+	}
+	got := filterGGUFMultiQuantDownload(files)
+	if len(got) != 1 {
+		t.Fatalf("len = %d", len(got))
+	}
+}
+
 func TestParseModelID(t *testing.T) {
 	tests := []struct {
 		name      string
diff --git a/internal/ggufpick/quant.go b/internal/ggufpick/quant.go
@@ -0,0 +1,170 @@
+package ggufpick
+
+import (
+	"path/filepath"
+	"regexp"
+	"strings"
+)
+
+// quantRanks: higher value = higher numerical precision / less aggressive quantization.
+// Unknown tokens return -1 from quantRankFromStem.
+var quantRanks = map[string]int{
+	"f32":     1000,
+	"bf16":    990,
+	"f16":     980,
+	"fp16":    980,
+	"q8_0":    920,
+	"q8_1":    915,
+	"q6_k":    880,
+	"q5_k_m":  860,
+	"q5_k_s":  855,
+	"q5_k":    850,
+	"q5_1":    840,
+	"q5_0":    835,
+	"q4_k_m":  800,
+	"q4_k_s":  795,
+	"q4_k":    790,
+	"q4_1":    785,
+	"q4_0":    780,
+	"q3_k_l":  750,
+	"q3_k_m":  745,
+	"q3_k_s":  740,
+	"q3_k_xl": 738,
+	"q3_k":    735,
+	"q2_k":    700,
+	"tq2_0":   680,
+	"tq1_0":   670,
+	"iq4_nl":  650,
+	"iq4_xs":  640,
+	"iq3_m":   620,
+	"iq3_s":   610,
+	"iq3_xs":  600,
+	"iq3_xxs": 590,
+	"iq2_m":   570,
+	"iq2_xs":  560,
+	"iq2_xxs": 550,
+	"iq1_m":   520,
+	"iq1_s":   510,
+}
+
+var shardSuffixRe = regexp.MustCompile(`-\d+-of-\d+$`)
+
+// IsMMProjGGUF reports whether name looks like a multimodal projector GGUF.
+func IsMMProjGGUF(name string) bool {
+	lower := strings.ToLower(name)
+	return strings.HasSuffix(lower, ".gguf") && strings.Contains(lower, "mmproj")
+}
+
+// IsWeightGGUF is a non-mmproj .gguf file (main model weights).
+func IsWeightGGUF(name string) bool {
+	lower := strings.ToLower(filepath.Base(name))
+	if !strings.HasSuffix(lower, ".gguf") {
+		return false
+	}
+	return !strings.Contains(lower, "mmproj")
+}
+
+// QuantRank returns a precision rank for a weight GGUF basename; higher is better.
+// Returns -1 if no known quantization token is found.
+func QuantRank(basename string) int {
+	stem := normalizeGGUFStem(filepath.Base(basename))
+	if stem == "" {
+		return -1
+	}
+	return quantRankFromStem(stem)
+}
+
+func normalizeGGUFStem(basename string) string {
+	lower := strings.ToLower(basename)
+	if !strings.HasSuffix(lower, ".gguf") {
+		return ""
+	}
+	stem := basename[:len(basename)-len(".gguf")]
+	stem = strings.ToLower(stem)
+	stem = shardSuffixRe.ReplaceAllString(stem, "")
+	return stem
+}
+
+func quantRankFromStem(stem string) int {
+	tokens := strings.Split(stem, "-")
+	if len(tokens) == 0 {
+		return -1
+	}
+	// Try last 1..3 tokens joined with underscores (e.g. q8_0, q4_k_m).
+	for n := 3; n >= 1; n-- {
+		if len(tokens) < n {
+			continue
+		}
+		cand := strings.Join(tokens[len(tokens)-n:], "_")
+		if r, ok := quantRanks[cand]; ok {
+			return r
+		}
+	}
+	return -1
+}
+
+// FileEntry is a minimal file description for GGUF download filtering.
+type FileEntry struct {
+	Path string
+	Name string
+	Size int64
+}
+
+// FilterWeightGGUFFiles keeps every shard of the highest-known-precision variant.
+// If there is at most one weight file, or no file has a known quant token, entries are returned unchanged.
+func FilterWeightGGUFFiles(entries []FileEntry) []FileEntry {
+	if len(entries) <= 1 {
+		return entries
+	}
+	ranks := make([]int, len(entries))
+	maxRank := -1
+	known := false
+	for i, e := range entries {
+		base := e.Name
+		if base == "" {
+			base = filepath.Base(e.Path)
+		}
+		r := QuantRank(base)
+		ranks[i] = r
+		if r >= 0 {
+			known = true
+			if r > maxRank {
+				maxRank = r
+			}
+		}
+	}
+	if !known {
+		return entries
+	}
+	var out []FileEntry
+	for i, e := range entries {
+		if ranks[i] == maxRank {
+			out = append(out, e)
+		}
+	}
+	if len(out) == 0 {
+		return entries
+	}
+	return out
+}
+
+// BestWeightGGUFName picks the highest-precision weight GGUF basename.
+// Tie-breaker: lexicographic order on name for stability.
+func BestWeightGGUFName(names []string) string {
+	if len(names) == 0 {
+		return ""
+	}
+	if len(names) == 1 {
+		return names[0]
+	}
+	best := names[0]
+	bestR := QuantRank(best)
+	for _, n := range names[1:] {
+		r := QuantRank(n)
+		if r > bestR || (r == bestR && n < best) {
+			best = n
+			bestR = r
+		}
+	}
+	return best
+}
diff --git a/internal/ggufpick/quant_test.go b/internal/ggufpick/quant_test.go
@@ -0,0 +1,78 @@
+package ggufpick
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestQuantRank(t *testing.T) {
+	tests := []struct {
+		name string
+		want int
+	}{
+		{"Qwen3-0.6B-Q8_0.gguf", quantRanks["q8_0"]},
+		{"model-Q4_K_M.gguf", quantRanks["q4_k_m"]},
+		{"Llama-3-8B-Q4_K_M-00001-of-00003.gguf", quantRanks["q4_k_m"]},
+		{"weights-f16.gguf", quantRanks["f16"]},
+		{"x-bf16.gguf", quantRanks["bf16"]},
+		{"x-f32.gguf", quantRanks["f32"]},
+		{"unknown.gguf", -1},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if g := QuantRank(tt.name); g != tt.want {
+				t.Errorf("QuantRank(%q) = %d, want %d", tt.name, g, tt.want)
+			}
+		})
+	}
+}
+
+func TestFilterWeightGGUFFiles(t *testing.T) {
+	entries := []FileEntry{
+		{Path: "a/Q4_0.gguf", Name: "Q4_0.gguf", Size: 100},
+		{Path: "a/Q8_0.gguf", Name: "Q8_0.gguf", Size: 200},
+		{Path: "a/Q4_K_M.gguf", Name: "Q4_K_M.gguf", Size: 150},
+	}
+	got := FilterWeightGGUFFiles(entries)
+	if len(got) != 1 || got[0].Name != "Q8_0.gguf" {
+		t.Errorf("FilterWeightGGUFFiles = %#v, want single Q8_0", got)
+	}
+
+	sharded := []FileEntry{
+		{Path: "M-Q4_0-00001-of-00002.gguf", Name: "M-Q4_0-00001-of-00002.gguf"},
+		{Path: "M-Q4_0-00002-of-00002.gguf", Name: "M-Q4_0-00002-of-00002.gguf"},
+		{Path: "M-Q8_0-00001-of-00002.gguf", Name: "M-Q8_0-00001-of-00002.gguf"},
+		{Path: "M-Q8_0-00002-of-00002.gguf", Name: "M-Q8_0-00002-of-00002.gguf"},
+	}
+	got2 := FilterWeightGGUFFiles(sharded)
+	wantPaths := map[string]bool{
+		"M-Q8_0-00001-of-00002.gguf": true,
+		"M-Q8_0-00002-of-00002.gguf": true,
+	}
+	if len(got2) != 2 {
+		t.Fatalf("len = %d, want 2: %#v", len(got2), got2)
+	}
+	for _, e := range got2 {
+		if !wantPaths[e.Path] {
+			t.Errorf("unexpected path %q", e.Path)
+		}
+	}
+}
+
+func TestFilterWeightGGUFFiles_unknownOnlyNoOp(t *testing.T) {
+	entries := []FileEntry{
+		{Path: "a.gguf", Name: "a.gguf"},
+		{Path: "b.gguf", Name: "b.gguf"},
+	}
+	got := FilterWeightGGUFFiles(entries)
+	if !reflect.DeepEqual(got, entries) {
+		t.Errorf("expected unchanged, got %#v", got)
+	}
+}
+
+func TestBestWeightGGUFName(t *testing.T) {
+	names := []string{"x-Q4_0.gguf", "x-Q8_0.gguf", "x-Q4_K_M.gguf"}
+	if g := BestWeightGGUFName(names); g != "x-Q8_0.gguf" {
+		t.Errorf("got %q, want x-Q8_0.gguf", g)
+	}
+}
diff --git a/internal/model/manifest.go b/internal/model/manifest.go
@@ -5,6 +5,8 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
+
+	"github.com/opencsgs/csghub-lite/internal/ggufpick"
 )
 
 // Vision-related HuggingFace architecture suffixes/names.
@@ -109,13 +111,18 @@ func FindModelFile(modelDir string) (string, Format, error) {
 		return "", FormatUnknown, err
 	}
 
-	// Prefer GGUF files (skip multimodal projector files)
+	// Prefer GGUF weight files (skip multimodal projector); pick highest precision if several.
+	var ggufNames []string
 	for _, e := range entries {
 		lower := strings.ToLower(e.Name())
 		if !e.IsDir() && strings.HasSuffix(lower, ".gguf") && !strings.Contains(lower, "mmproj") {
-			return filepath.Join(modelDir, e.Name()), FormatGGUF, nil
+			ggufNames = append(ggufNames, e.Name())
 		}
 	}
+	if len(ggufNames) > 0 {
+		best := ggufpick.BestWeightGGUFName(ggufNames)
+		return filepath.Join(modelDir, best), FormatGGUF, nil
+	}
 	// Then SafeTensors
 	for _, e := range entries {
 		if !e.IsDir() && strings.HasSuffix(strings.ToLower(e.Name()), ".safetensors") {
diff --git a/internal/model/manifest_test.go b/internal/model/manifest_test.go