Skip to content
This repository was archived by the owner on Sep 10, 2025. It is now read-only.

Commit 4ee1b96

Browse files
authored
Merge branch 'main' into new-intx-quantizer
2 parents c2108d6 + 2766a95 commit 4ee1b96

File tree

11 files changed

+148
-75
lines changed

11 files changed

+148
-75
lines changed

.ci/scripts/run-docs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,16 @@ fi
88

99
# Pre-initialize variables
1010
filepath=""
11-
parameters="--replace 'llama3:stories15M,-l3:-l2' --suppress huggingface-cli,HF_TOKEN"
11+
# cuda supports padding, so no need to replace quantization for now.
12+
# otherwise add: 'cuda.json:cuda-32.json' to replace rules
13+
parameters="--replace llama3:stories15M,-l3:-l2,mobile.json:mobile-32.json --suppress huggingface-cli,HF_TOKEN"
1214
script_name="./run-${1}.sh" # Dynamically initialize script name
1315

1416
# Use a case statement to handle the $1 argument
1517
case "$1" in
1618
"readme")
1719
filepath="README.md"
20+
parameters="--replace llama3.1:stories15M,-l3:-l2,mobile.json:mobile-32.json --suppress huggingface-cli,HF_TOKEN"
1821
;;
1922
"quantization")
2023
filepath="docs/quantization.md"
@@ -38,7 +41,7 @@ case "$1" in
3841
;;
3942
"distributed")
4043
filepath="docs/distributed.md"
41-
parameters="--replace 'llama3.1:stories110M,-l3:-l2' --suppress huggingface-cli,HF_TOKEN" # Use stories110M to avoid need for authentication
44+
parameters="--replace llama3.1:stories110M,-l3:-l2 --suppress huggingface-cli,HF_TOKEN" # Use stories110M to avoid need for authentication
4245
;;
4346
"local")
4447
filepath="docs/local-model.md"
@@ -63,5 +66,6 @@ echo "::group::Run $1"
6366
echo "*******************************************"
6467
cat "$script_name"
6568
echo "*******************************************"
66-
bash -x "$script_name"
69+
set -x
70+
. "$script_name"
6771
echo "::endgroup::"

.github/workflows/pull.yml

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -515,12 +515,11 @@ jobs:
515515
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
516516
517517
echo "******************************************"
518-
echo "*** can't test --quantize torchchat/quant_config/mobile.json ***"
519-
echo "*** testing --quantize torchchat/quant_config/mobile-32.json ***"
518+
echo "*** [TEST DISABLED] Can't test --quantize torchchat/quant_config/mobile.json ***"
519+
echo "*** Testing --quantize torchchat/quant_config/mobile-32.json instead ***"
520520
echo "******************************************"
521-
python torchchat.py export --quantize torchchat/quant_config/mobile-32.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
522-
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
523-
521+
# python torchchat.py export --quantize torchchat/quant_config/mobile-32.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
522+
# python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
524523
525524
echo "******************************************"
526525
echo "******* Emb: channel-wise quantized ******"
@@ -535,16 +534,16 @@ jobs:
535534
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
536535
537536
echo "******************************************"
538-
echo "**** Emb 4bit: channel-wise quantized ****"
537+
echo "**** [TEST DISABLED] Emb 4bit: channel-wise quantized ****"
539538
echo "******************************************"
540-
python torchchat.py export --quant '{"embedding" : {"bitwidth": 8, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
541-
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
539+
# python torchchat.py export --quant '{"embedding" : {"bitwidth": 4, "groupsize": 0}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
540+
# python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
542541
543542
echo "******************************************"
544-
echo "****** Emb 4bit: group-wise quantized ****"
543+
echo "****** [TEST DISABLED] Emb 4bit: group-wise quantized ****"
545544
echo "******************************************"
546-
python torchchat.py export --quant '{"embedding" : {"bitwidth": 8, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
547-
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
545+
# python torchchat.py export --quant '{"embedding" : {"bitwidth": 4, "groupsize": 8}}' --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
546+
# python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
548547
549548
echo "******************************************"
550549
echo "******* INT8 channel-wise quantized ******"

.github/workflows/run-readme-pr-linuxaarch64.yml

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@ jobs:
2323
uname -a
2424
echo "::endgroup::"
2525
26-
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs readme
26+
which pip || true
27+
which pip3 || true
28+
which conda || true
29+
# TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs readme
2730
2831
echo "::group::Completion"
2932
echo "tests complete"
@@ -44,8 +47,12 @@ jobs:
4447
echo "::group::Print machine info"
4548
uname -a
4649
echo "::endgroup::"
47-
48-
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs quantization
50+
51+
which pip || true
52+
which pip3 || true
53+
which conda || true
54+
55+
# TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs quantization
4956

5057
test-gguf-cpu:
5158
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
@@ -62,7 +69,11 @@ jobs:
6269
uname -a
6370
echo "::endgroup::"
6471
65-
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs gguf
72+
which pip || true
73+
which pip3 || true
74+
which conda || true
75+
76+
# TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs gguf
6677
6778
echo "::group::Completion"
6879
echo "tests complete"
@@ -84,7 +95,11 @@ jobs:
8495
uname -a
8596
echo "::endgroup::"
8697
87-
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs advanced
98+
which pip || true
99+
which pip3 || true
100+
which conda || true
101+
102+
# TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs advanced
88103
89104
echo "::group::Completion"
90105
echo "tests complete"
@@ -106,7 +121,11 @@ jobs:
106121
uname -a
107122
echo "::endgroup::"
108123
109-
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs evaluation
124+
which pip || true
125+
which pip3 || true
126+
which conda || true
127+
128+
# TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs evaluation
110129
111130
echo "::group::Completion"
112131
echo "tests complete"

.github/workflows/run-readme-pr-macos.yml

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,13 @@ jobs:
3333
sysctl machdep.cpu.core_count
3434
echo "::endgroup::"
3535
36+
which pip || true
37+
which pip3 || true
38+
which conda || true
39+
3640
echo "using workaround for #1416 and #1315 by setting torchchat device explicitly"
37-
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs readme
41+
export TORCHCHAT_DEVICE=cpu
42+
# . .ci/scripts/run-docs readme
3843
3944
echo "::group::Completion"
4045
echo "tests complete"
@@ -70,8 +75,9 @@ jobs:
7075
echo "::endgroup::"
7176
7277
echo "using workaround for #1416 and #1315 by setting torchchat device explicitly"
73-
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs quantization
74-
78+
export TORCHCHAT_DEVICE=cpu
79+
# . .ci/scripts/run-docs quantization
80+
7581
echo "::group::Completion"
7682
echo "tests complete"
7783
echo "*******************************************"
@@ -106,7 +112,8 @@ jobs:
106112
echo "::endgroup::"
107113
108114
echo "using workaround for #1416 and #1315 by setting torchchat device explicitly"
109-
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs gguf
115+
export TORCHCHAT_DEVICE=cpu
116+
# .ci/scripts/run-docs gguf
110117
111118
echo "::group::Completion"
112119
echo "tests complete"
@@ -141,7 +148,8 @@ jobs:
141148
echo "::endgroup::"
142149
143150
echo "using workaround for #1416 and #1315 by setting torchchat device explicitly"
144-
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs advanced
151+
export TORCHCHAT_DEVICE=cpu
152+
# . .ci/scripts/run-docs advanced
145153
146154
echo "::group::Completion"
147155
echo "tests complete"
@@ -175,7 +183,7 @@ jobs:
175183
sysctl machdep.cpu.core_count
176184
echo "::endgroup::"
177185
178-
.ci/scripts/run-docs evaluation
186+
# .ci/scripts/run-docs evaluation
179187
180188
echo "::group::Completion"
181189
echo "tests complete"
@@ -209,7 +217,8 @@ jobs:
209217
sysctl machdep.cpu.core_count
210218
echo "::endgroup::"
211219
212-
.ci/scripts/run-docs multimodal
220+
# metadata does not install properly on macos
221+
# .ci/scripts/run-docs multimodal
213222
214223
echo "::group::Completion"
215224
echo "tests complete"
@@ -243,7 +252,8 @@ jobs:
243252
sysctl machdep.cpu.core_count
244253
echo "::endgroup::"
245254
246-
.ci/scripts/run-docs native
255+
echo ".ci/scripts/run-docs native DISABLED"
256+
# .ci/scripts/run-docs native
247257
248258
echo "::group::Completion"
249259
echo "tests complete"

.github/workflows/run-readme-pr-mps.yml

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
sysctl machdep.cpu.core_count
2727
echo "::endgroup::"
2828
29-
.ci/scripts/run-docs readme
29+
# .ci/scripts/run-docs readme
3030
3131
echo "::group::Completion"
3232
echo "tests complete"
@@ -54,7 +54,7 @@ jobs:
5454
sysctl machdep.cpu.core_count
5555
echo "::endgroup::"
5656
57-
.ci/scripts/run-docs quantization
57+
# .ci/scripts/run-docs quantization
5858
5959
echo "::group::Completion"
6060
echo "tests complete"
@@ -81,7 +81,7 @@ jobs:
8181
sysctl machdep.cpu.core_count
8282
echo "::endgroup::"
8383
84-
.ci/scripts/run-docs gguf
84+
# .ci/scripts/run-docs gguf
8585
8686
echo "::group::Completion"
8787
echo "tests complete"
@@ -108,7 +108,7 @@ jobs:
108108
sysctl machdep.cpu.core_count
109109
echo "::endgroup::"
110110
111-
.ci/scripts/run-docs advanced
111+
# .ci/scripts/run-docs advanced
112112
113113
echo "::group::Completion"
114114
echo "tests complete"
@@ -135,7 +135,7 @@ jobs:
135135
sysctl machdep.cpu.core_count
136136
echo "::endgroup::"
137137
138-
.ci/scripts/run-docs evaluation
138+
# .ci/scripts/run-docs evaluation
139139
140140
echo "::group::Completion"
141141
echo "tests complete"
@@ -162,7 +162,8 @@ jobs:
162162
sysctl machdep.cpu.core_count
163163
echo "::endgroup::"
164164
165-
.ci/scripts/run-docs multimodal
165+
# metadata does not install properly on macos
166+
# .ci/scripts/run-docs multimodal
166167
167168
echo "::group::Completion"
168169
echo "tests complete"
@@ -189,7 +190,8 @@ jobs:
189190
sysctl machdep.cpu.core_count
190191
echo "::endgroup::"
191192
192-
.ci/scripts/run-docs native
193+
echo ".ci/scripts/run-docs native DISABLED"
194+
# .ci/scripts/run-docs native
193195
194196
echo "::group::Completion"
195197
echo "tests complete"

0 commit comments

Comments
 (0)