Skip to content

Commit 1d660d2

Browse files
authored
ci : use smaller model (ggml-org#16168)
* ci : switch from gemma to qwen3 0.6b * ci : use smaller model for some tests
1 parent a20d810 commit 1d660d2

File tree

3 files changed

+20
-45
lines changed

3 files changed

+20
-45
lines changed

ci/run.sh

Lines changed: 5 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -210,33 +210,9 @@ function gg_sum_ctest_release {
210210
gg_printf '```\n'
211211
}
212212

213-
# test_scripts_debug
213+
# test_scripts
214214

215-
function gg_run_test_scripts_debug {
216-
cd ${SRC}
217-
218-
set -e
219-
220-
(cd ./tools/gguf-split && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
221-
(cd ./tools/quantize && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
222-
223-
set +e
224-
}
225-
226-
function gg_sum_test_scripts_debug {
227-
gg_printf '### %s\n\n' "${ci}"
228-
229-
gg_printf 'Runs test scripts in debug mode\n'
230-
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
231-
gg_printf '```\n'
232-
gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)"
233-
gg_printf '```\n'
234-
gg_printf '\n'
235-
}
236-
237-
# test_scripts_release
238-
239-
function gg_run_test_scripts_release {
215+
function gg_run_test_scripts {
240216
cd ${SRC}
241217

242218
set -e
@@ -247,10 +223,10 @@ function gg_run_test_scripts_release {
247223
set +e
248224
}
249225

250-
function gg_sum_test_scripts_release {
226+
function gg_sum_test_scripts {
251227
gg_printf '### %s\n\n' "${ci}"
252228

253-
gg_printf 'Runs test scripts in release mode\n'
229+
gg_printf 'Runs test scripts\n'
254230
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
255231
gg_printf '```\n'
256232
gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)"
@@ -627,8 +603,7 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
627603
test $ret -eq 0 && gg_run rerank_tiny
628604

629605
if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then
630-
test $ret -eq 0 && gg_run test_scripts_debug
631-
test $ret -eq 0 && gg_run test_scripts_release
606+
test $ret -eq 0 && gg_run test_scripts
632607
fi
633608

634609
test $ret -eq 0 && gg_run qwen3_0_6b

tools/gguf-split/tests.sh

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,27 +31,27 @@ rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf
3131
# 1. Get a model
3232
(
3333
cd $WORK_PATH
34-
"$ROOT_DIR"/scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf
34+
"$ROOT_DIR"/scripts/hf.sh --repo ggml-org/Qwen3-0.6B-GGUF --file Qwen3-0.6B-Q8_0.gguf
3535
)
3636
echo PASS
3737

3838
# 2. Split with max tensors strategy
39-
$SPLIT --split-max-tensors 28 $WORK_PATH/gemma-1.1-2b-it.Q8_0.gguf $WORK_PATH/ggml-model-split
39+
$SPLIT --split-max-tensors 28 $WORK_PATH/Qwen3-0.6B-Q8_0.gguf $WORK_PATH/ggml-model-split
4040
echo PASS
4141
echo
4242

4343
# 2b. Test the sharded model is loading properly
44-
$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-00001-of-00006.gguf --n-predict 32
44+
$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-00001-of-00012.gguf -p "I believe the meaning of life is" --n-predict 32
4545
echo PASS
4646
echo
4747

4848
# 3. Merge
49-
$SPLIT --merge $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-merge.gguf
49+
$SPLIT --merge $WORK_PATH/ggml-model-split-00001-of-00012.gguf $WORK_PATH/ggml-model-merge.gguf
5050
echo PASS
5151
echo
5252

5353
# 3b. Test the merged model is loading properly
54-
$MAIN -no-cnv --model $WORK_PATH/ggml-model-merge.gguf --n-predict 32
54+
$MAIN -no-cnv --model $WORK_PATH/ggml-model-merge.gguf -p "I believe the meaning of life is" --n-predict 32
5555
echo PASS
5656
echo
5757

@@ -61,12 +61,12 @@ echo PASS
6161
echo
6262

6363
# 4b. Test the sharded model is loading properly
64-
$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00007.gguf --n-predict 32
64+
$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00011.gguf -p "I believe the meaning of life is" --n-predict 32
6565
echo PASS
6666
echo
6767

6868
# 5. Merge
69-
#$SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf $WORK_PATH/ggml-model-merge-2.gguf
69+
#$SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00012.gguf $WORK_PATH/ggml-model-merge-2.gguf
7070
#echo PASS
7171
#echo
7272

@@ -76,12 +76,12 @@ echo
7676
#echo
7777

7878
# 6. Split with size strategy
79-
$SPLIT --split-max-size 2G $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-2G
79+
$SPLIT --split-max-size 500M $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-500M
8080
echo PASS
8181
echo
8282

8383
# 6b. Test the sharded model is loading properly
84-
$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-2G-00001-of-00002.gguf --n-predict 32
84+
$MAIN -no-cnv --model $WORK_PATH/ggml-model-split-500M-00001-of-00002.gguf -p "I believe the meaning of life is" --n-predict 32
8585
echo PASS
8686
echo
8787

tools/quantize/tests.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,32 +32,32 @@ rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-requant*.gguf
3232
# 1. Get a model
3333
(
3434
cd $WORK_PATH
35-
"$ROOT_DIR"/scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf
35+
"$ROOT_DIR"/scripts/hf.sh --repo ggml-org/Qwen3-0.6B-GGUF --file Qwen3-0.6B-Q8_0.gguf
3636
)
3737
echo PASS
3838

3939
# 2. Split model
40-
$SPLIT --split-max-tensors 28 $WORK_PATH/gemma-1.1-2b-it.Q8_0.gguf $WORK_PATH/ggml-model-split
40+
$SPLIT --split-max-tensors 28 $WORK_PATH/Qwen3-0.6B-Q8_0.gguf $WORK_PATH/ggml-model-split
4141
echo PASS
4242
echo
4343

4444
# 3. Requant model with '--keep-split'
45-
$QUANTIZE --allow-requantize --keep-split $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-requant.gguf Q4_K
45+
$QUANTIZE --allow-requantize --keep-split $WORK_PATH/ggml-model-split-00001-of-00012.gguf $WORK_PATH/ggml-model-requant.gguf Q4_K
4646
echo PASS
4747
echo
4848

4949
# 3a. Test the requanted model is loading properly
50-
$MAIN -no-cnv --model $WORK_PATH/ggml-model-requant-00001-of-00006.gguf --n-predict 32
50+
$MAIN -no-cnv --model $WORK_PATH/ggml-model-requant-00001-of-00012.gguf -p "I believe the meaning of life is" --n-predict 32
5151
echo PASS
5252
echo
5353

5454
# 4. Requant mode without '--keep-split'
55-
$QUANTIZE --allow-requantize $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-requant-merge.gguf Q4_K
55+
$QUANTIZE --allow-requantize $WORK_PATH/ggml-model-split-00001-of-00012.gguf $WORK_PATH/ggml-model-requant-merge.gguf Q4_K
5656
echo PASS
5757
echo
5858

5959
# 4b. Test the requanted model is loading properly
60-
$MAIN -no-cnv --model $WORK_PATH/ggml-model-requant-merge.gguf --n-predict 32
60+
$MAIN -no-cnv --model $WORK_PATH/ggml-model-requant-merge.gguf -p "I believe the meaning of life is" --n-predict 32
6161
echo PASS
6262
echo
6363

0 commit comments

Comments
 (0)