@@ -31,27 +31,27 @@ rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf
31
31
# 1. Get a model
32
32
(
33
33
cd $WORK_PATH
34
- " $ROOT_DIR " /scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0- GGUF --file gemma-1.1-2b-it. Q8_0.gguf
34
+ " $ROOT_DIR " /scripts/hf.sh --repo ggml-org/Qwen3-0.6B- GGUF --file Qwen3-0.6B- Q8_0.gguf
35
35
)
36
36
echo PASS
37
37
38
38
# 2. Split with max tensors strategy
39
- $SPLIT --split-max-tensors 28 $WORK_PATH /gemma-1.1-2b-it. Q8_0.gguf $WORK_PATH /ggml-model-split
39
+ $SPLIT --split-max-tensors 28 $WORK_PATH /Qwen3-0.6B- Q8_0.gguf $WORK_PATH /ggml-model-split
40
40
echo PASS
41
41
echo
42
42
43
43
# 2b. Test the sharded model is loading properly
44
- $MAIN -no-cnv --model $WORK_PATH /ggml-model-split-00001-of-00006 .gguf --n-predict 32
44
+ $MAIN -no-cnv --model $WORK_PATH /ggml-model-split-00001-of-00012 .gguf -p " I believe the meaning of life is " --n-predict 32
45
45
echo PASS
46
46
echo
47
47
48
48
# 3. Merge
49
- $SPLIT --merge $WORK_PATH /ggml-model-split-00001-of-00006 .gguf $WORK_PATH /ggml-model-merge.gguf
49
+ $SPLIT --merge $WORK_PATH /ggml-model-split-00001-of-00012 .gguf $WORK_PATH /ggml-model-merge.gguf
50
50
echo PASS
51
51
echo
52
52
53
53
# 3b. Test the merged model is loading properly
54
- $MAIN -no-cnv --model $WORK_PATH /ggml-model-merge.gguf --n-predict 32
54
+ $MAIN -no-cnv --model $WORK_PATH /ggml-model-merge.gguf -p " I believe the meaning of life is " - -n-predict 32
55
55
echo PASS
56
56
echo
57
57
@@ -61,12 +61,12 @@ echo PASS
61
61
echo
62
62
63
63
# 4b. Test the sharded model is loading properly
64
- $MAIN -no-cnv --model $WORK_PATH /ggml-model-split-32-tensors-00001-of-00007 .gguf --n-predict 32
64
+ $MAIN -no-cnv --model $WORK_PATH /ggml-model-split-32-tensors-00001-of-00011 .gguf -p " I believe the meaning of life is " --n-predict 32
65
65
echo PASS
66
66
echo
67
67
68
68
# 5. Merge
69
- # $SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006 .gguf $WORK_PATH/ggml-model-merge-2.gguf
69
+ # $SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00012 .gguf $WORK_PATH/ggml-model-merge-2.gguf
70
70
# echo PASS
71
71
# echo
72
72
76
76
# echo
77
77
78
78
# 6. Split with size strategy
79
- $SPLIT --split-max-size 2G $WORK_PATH /ggml-model-merge.gguf $WORK_PATH /ggml-model-split-2G
79
+ $SPLIT --split-max-size 500M $WORK_PATH /ggml-model-merge.gguf $WORK_PATH /ggml-model-split-500M
80
80
echo PASS
81
81
echo
82
82
83
83
# 6b. Test the sharded model is loading properly
84
- $MAIN -no-cnv --model $WORK_PATH /ggml-model-split-2G -00001-of-00002.gguf --n-predict 32
84
+ $MAIN -no-cnv --model $WORK_PATH /ggml-model-split-500M -00001-of-00002.gguf -p " I believe the meaning of life is " --n-predict 32
85
85
echo PASS
86
86
echo
87
87
0 commit comments