@@ -1096,14 +1096,14 @@ jobs:
10961096 wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
10971097 export PRMT="Once upon a time in a land far away"
10981098 echo "Generate eager"
1099- python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
1099+ python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --dtype float32 -- quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
11001100 echo "Generate compile"
1101- python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile
1101+ python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --dtype float32 -- quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile
11021102 echo "Export and run ET (C++ runner)"
1103- python torchchat.py export stories110M --output-pte-path ./model.pte --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
1103+ python torchchat.py export stories110M --output-pte-path ./model.pte --dtype float32 -- quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
11041104 ./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}"
11051105 echo "Export and run AOTI (C++ runner)"
1106- python torchchat.py export stories110M --output-dso-path ./model.so --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
1106+ python torchchat.py export stories110M --output-dso-path ./model.so --dtype float32 -- quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
11071107 ./cmake-out/aoti_run ./model.so -z ./tokenizer.model -t 0 -i "${PRMT}"
11081108 echo "Generate AOTI"
11091109 python torchchat.py generate stories110M --dso-path ./model.so --prompt "${PRMT}"
0 commit comments