Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions contrib/fine-tuning/convert-gguf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ set -e
# cmake -B build
# cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)

# Run 'ollama serve' in a separate terminal

export TOKENIZERS_PARALLELISM=false
LLAMA_CPP_PATH=/Users/appthreat/work/llama.cpp
cd $LLAMA_CPP_PATH
Expand Down Expand Up @@ -52,12 +54,14 @@ GGUF_MODEL_Q8_0_NAME=${HF_ORG}/${TOOL_BASE_MODEL}-${PARAM_SIZE}-Q8_0-${FORMAT}
GGUF_MODEL_Q8_0_PATH=${CDXGEN_FT_PATH}/${HF_ORG}/${TOOL_BASE_MODEL}-${PARAM_SIZE}-Q8_0-${FORMAT}
FUSED_MODEL=${CDXGEN_FT_PATH}/${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}

# Direct conversion to 8-bit from the fused BF16 version
rm -rf ${GGUF_MODEL_Q8_0_PATH}
mkdir -p ${GGUF_MODEL_Q8_0_PATH}
python convert_hf_to_gguf.py --outtype q8_0 --outfile ${CDXGEN_FT_PATH}/${HF_ORG}/${TOOL_BASE_MODEL}-${PARAM_SIZE}-Q8_0-${FORMAT}/${TOOL_BASE_MODEL}-${PARAM_SIZE}-q8_0.gguf --model-name ${GGUF_MODEL_Q8_0_NAME} ${FUSED_MODEL}
cp ${MODEL_FILE_PATH} ${GGUF_MODEL_Q8_0_PATH}/Modelfile
cp ${FUSED_MODEL}/*.json ${FUSED_MODEL}/merges.txt ${GGUF_MODEL_Q8_0_PATH}/

# BF16
GGUF_MODEL_BF16_NAME=${HF_ORG}/${TOOL_BASE_MODEL}-${PARAM_SIZE}-BF16-${FORMAT}
GGUF_MODEL_BF16_PATH=${CDXGEN_FT_PATH}/${HF_ORG}/${TOOL_BASE_MODEL}-${PARAM_SIZE}-BF16-${FORMAT}
rm -rf ${GGUF_MODEL_BF16_PATH}
Expand All @@ -67,6 +71,16 @@ cp ${MODEL_FILE_PATH} ${GGUF_MODEL_BF16_PATH}/Modelfile
sed -i '' 's|./${TOOL_BASE_MODEL}-${PARAM_SIZE}-q8_0.gguf|./${TOOL_BASE_MODEL}-${PARAM_SIZE}-bf16.gguf|g' ${GGUF_MODEL_BF16_PATH}/Modelfile
cp ${FUSED_MODEL}/*.json ${FUSED_MODEL}/merges.txt ${GGUF_MODEL_BF16_PATH}/

# MXFP4 - MOE only
GGUF_MODEL_MXFP4_NAME=${HF_ORG}/${TOOL_BASE_MODEL}-${PARAM_SIZE}-MXFP4-${FORMAT}
GGUF_MODEL_MXFP4_PATH=${CDXGEN_FT_PATH}/${HF_ORG}/${TOOL_BASE_MODEL}-${PARAM_SIZE}-MXFP4-${FORMAT}
rm -rf ${GGUF_MODEL_MXFP4_PATH}
mkdir -p ${GGUF_MODEL_MXFP4_PATH}
llama-quantize ${CDXGEN_FT_PATH}/${HF_ORG}/${TOOL_BASE_MODEL}-${PARAM_SIZE}-BF16-${FORMAT}/${TOOL_BASE_MODEL}-${PARAM_SIZE}-bf16.gguf ${GGUF_MODEL_MXFP4_PATH}/${TOOL_BASE_MODEL}-${PARAM_SIZE}-MXFP4.gguf MXFP4_MOE
cp ${MODEL_FILE_PATH} ${GGUF_MODEL_MXFP4_PATH}/Modelfile
sed -i '' 's|./${TOOL_BASE_MODEL}-${PARAM_SIZE}-q8_0.gguf|./${TOOL_BASE_MODEL}-${PARAM_SIZE}-MXFP4.gguf|g' ${GGUF_MODEL_MXFP4_PATH}/Modelfile
cp ${FUSED_MODEL}/*.json ${FUSED_MODEL}/merges.txt ${GGUF_MODEL_MXFP4_PATH}/

if [ "$TOOL_BASE_MODEL" == "cdx1-mini" ] || [ "$TOOL_BASE_MODEL" == "cdx1-nano" ]; then
GGUF_MODEL_Q6_K_NAME=${HF_ORG}/${TOOL_BASE_MODEL}-${PARAM_SIZE}-Q6_K-${FORMAT}
GGUF_MODEL_Q6_K_PATH=${CDXGEN_FT_PATH}/${HF_ORG}/${TOOL_BASE_MODEL}-${PARAM_SIZE}-Q6_K-${FORMAT}
Expand Down Expand Up @@ -114,6 +128,7 @@ fi
export HF_HUB_ENABLE_HF_TRANSFER=0
hf auth whoami
hf upload --quiet --exclude "**/README.md" --repo-type model ${GGUF_MODEL_Q8_0_NAME} ${GGUF_MODEL_Q8_0_PATH} .
hf upload --quiet --exclude "**/README.md" --repo-type model ${GGUF_MODEL_MXFP4_NAME} ${GGUF_MODEL_MXFP4_PATH} .
if [ "$TOOL_BASE_MODEL" == "cdx1-mini" ] || [ "$TOOL_BASE_MODEL" == "cdx1-nano" ]; then
hf upload --quiet --exclude "**/README.md" --repo-type model ${GGUF_MODEL_Q6_K_NAME} ${GGUF_MODEL_Q6_K_PATH} .
else
Expand All @@ -123,11 +138,18 @@ else
fi
hf upload --quiet --exclude "**/README.md" --repo-type model ${GGUF_MODEL_BF16_NAME} ${GGUF_MODEL_BF16_PATH} .

### upload to ollama registry. Move this to a separate script in the future.

ollama pull hf.co/${GGUF_MODEL_Q8_0_NAME}
ollama cp hf.co/${GGUF_MODEL_Q8_0_NAME} ${GGUF_MODEL_Q8_0_NAME}
ollama push ${GGUF_MODEL_Q8_0_NAME}
ollama rm hf.co/${GGUF_MODEL_Q8_0_NAME}

ollama pull hf.co/${GGUF_MODEL_MXFP4_NAME}
ollama cp hf.co/${GGUF_MODEL_MXFP4_NAME} ${GGUF_MODEL_MXFP4_NAME}
ollama push ${GGUF_MODEL_MXFP4_NAME}
ollama rm hf.co/${GGUF_MODEL_MXFP4_NAME}

if [ "$TOOL_BASE_MODEL" == "cdx1-mini" ] || [ "$TOOL_BASE_MODEL" == "cdx1-nano" ]; then
ollama pull hf.co/${GGUF_MODEL_Q6_K_NAME}
ollama cp hf.co/${GGUF_MODEL_Q6_K_NAME} ${GGUF_MODEL_Q6_K_NAME}
Expand Down
6 changes: 6 additions & 0 deletions contrib/fine-tuning/fine-tune-mlx.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ FUSED_MODEL=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}
QUANT_MODEL_8BIT=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}-8bit
QUANT_MODEL_6BIT=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}-6bit
QUANT_MODEL_4BIT=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}-4bit
QUANT_MODEL_MXFP4=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}-MXFP4
DWQ_QUANT_MODEL_4BIT=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}-4bit-DWQ

### mlx-lm needs train.jsonl and valid.jsonl
Expand Down Expand Up @@ -109,6 +110,11 @@ mlx_lm.convert --hf-path ${FUSED_MODEL} --mlx-path ${QUANT_MODEL_6BIT} -q --q-bi
echo "Test ${QUANT_MODEL_6BIT} with the prompt 'Tell me about cdxgen'. Must yield a better response."
mlx_lm.generate --model ./${QUANT_MODEL_6BIT} --prompt "Tell me about cdxgen" --temp ${TEMP} --max-tokens ${MAX_TOKENS}

rm -rf ${QUANT_MODEL_MXFP4}
mlx_lm.convert --hf-path ${FUSED_MODEL} --mlx-path ${QUANT_MODEL_MXFP4} -q --q-bits 4 --q-group-size 32 --q-mode mxfp4 --dtype bfloat16
echo "Test ${QUANT_MODEL_MXFP4} with the prompt 'Tell me about cdxgen'. Must yield a better response."
mlx_lm.generate --model ./${QUANT_MODEL_MXFP4} --prompt "Tell me about cdxgen" --temp ${TEMP} --max-tokens ${MAX_TOKENS}

# 4-bit for a small model has very poor performance
if [ "$TOOL_BASE_MODEL" != "cdx1-mini" ] && [ "$TOOL_BASE_MODEL" != "cdx1-nano" ]; then
rm -rf ${QUANT_MODEL_4BIT}
Expand Down
10 changes: 6 additions & 4 deletions contrib/fine-tuning/upload-hf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ FUSED_MODEL=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}
QUANT_MODEL_8BIT=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}-8bit
QUANT_MODEL_6BIT=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}-6bit
QUANT_MODEL_4BIT=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}-4bit
QUANT_MODEL_MXFP4=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}-MXFP4
DWQ_QUANT_MODEL_4BIT=${HF_ORG}/${TOOL_BASE_MODEL}-${TUNING_TOOL}-4bit-DWQ

hf auth whoami
Expand All @@ -20,12 +21,13 @@ hf upload --quiet --repo-type dataset CycloneDX/cdx-docs ./guides guides
hf upload --quiet --repo-type dataset CycloneDX/cdx-docs ./semantics semantics

echo "Uploading models. Please wait ..."
hf upload --quiet --exclude "**/README.md" --repo-type model ${QUANT_MODEL_8BIT} ./${QUANT_MODEL_8BIT} .
hf upload --quiet --exclude "**/README.md" --repo-type model ${QUANT_MODEL_6BIT} ./${QUANT_MODEL_6BIT} .
hf upload --quiet --exclude "**/README.md" --repo-type model ${QUANT_MODEL_8BIT} ./${QUANT_MODEL_8BIT} --delete "*.safetensors" .
hf upload --quiet --exclude "**/README.md" --repo-type model ${QUANT_MODEL_MXFP4} ./${QUANT_MODEL_MXFP4} --delete "*.safetensors" .
hf upload --quiet --exclude "**/README.md" --repo-type model ${QUANT_MODEL_6BIT} ./${QUANT_MODEL_6BIT} --delete "*.safetensors" .
if [ "$TOOL_BASE_MODEL" != "cdx1-mini" ] && [ "$TOOL_BASE_MODEL" != "cdx1-nano" ]; then
hf upload --quiet --exclude "**/README.md" --repo-type model ${QUANT_MODEL_4BIT} ./${QUANT_MODEL_4BIT} .
hf upload --quiet --exclude "**/README.md" --repo-type model ${QUANT_MODEL_4BIT} ./${QUANT_MODEL_4BIT} --delete "*.safetensors" .
fi
#if [ "$TOOL_BASE_MODEL" != "cdx1-mini" ]; then
# hf upload --quiet --exclude "**/README.md" --repo-type model ${DWQ_QUANT_MODEL_4BIT} ./${DWQ_QUANT_MODEL_4BIT} .
#fi
hf upload --quiet --exclude "**/README.md" --repo-type model ${FUSED_MODEL} ./${FUSED_MODEL} .
hf upload --quiet --exclude "**/README.md" --repo-type model ${FUSED_MODEL} ./${FUSED_MODEL} --delete "*.safetensors" .