File tree
6 files changed
+39
-39
lines changed- backends/xnnpack
- operators
- utils
- examples/models
- llama
- source_transformation
- phi_4_mini
- third-party
6 files changed
+39
-39
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
185 | 185 | | |
186 | 186 | | |
187 | 187 | | |
188 | | - | |
| 188 | + | |
189 | 189 | | |
190 | 190 | | |
191 | 191 | | |
192 | | - | |
193 | | - | |
194 | | - | |
| 192 | + | |
195 | 193 | | |
196 | 194 | | |
197 | 195 | | |
| |||
202 | 200 | | |
203 | 201 | | |
204 | 202 | | |
205 | | - | |
206 | | - | |
207 | | - | |
208 | | - | |
209 | | - | |
| 203 | + | |
210 | 204 | | |
211 | 205 | | |
212 | 206 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
58 | 58 | | |
59 | 59 | | |
60 | 60 | | |
61 | | - | |
| 61 | + | |
| 62 | + | |
62 | 63 | | |
63 | | - | |
64 | | - | |
65 | | - | |
66 | | - | |
| 64 | + | |
| 65 | + | |
67 | 66 | | |
68 | 67 | | |
69 | | - | |
70 | | - | |
71 | | - | |
72 | | - | |
| 68 | + | |
| 69 | + | |
73 | 70 | | |
74 | | - | |
75 | | - | |
76 | | - | |
77 | | - | |
78 | | - | |
| 71 | + | |
| 72 | + | |
79 | 73 | | |
80 | 74 | | |
81 | 75 | | |
| |||
229 | 223 | | |
230 | 224 | | |
231 | 225 | | |
232 | | - | |
| 226 | + | |
233 | 227 | | |
234 | 228 | | |
235 | 229 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
416 | 416 | | |
417 | 417 | | |
418 | 418 | | |
419 | | - | |
| 419 | + | |
420 | 420 | | |
421 | 421 | | |
422 | 422 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
112 | 112 | | |
113 | 113 | | |
114 | 114 | | |
115 | | - | |
116 | | - | |
117 | | - | |
| 115 | + | |
| 116 | + | |
| 117 | + | |
| 118 | + | |
| 119 | + | |
| 120 | + | |
| 121 | + | |
118 | 122 | | |
119 | 123 | | |
120 | 124 | | |
| |||
124 | 128 | | |
125 | 129 | | |
126 | 130 | | |
127 | | - | |
128 | | - | |
| 131 | + | |
| 132 | + | |
| 133 | + | |
| 134 | + | |
| 135 | + | |
129 | 136 | | |
130 | 137 | | |
131 | 138 | | |
| |||
777 | 784 | | |
778 | 785 | | |
779 | 786 | | |
780 | | - | |
| 787 | + | |
| 788 | + | |
781 | 789 | | |
782 | 790 | | |
783 | 791 | | |
784 | 792 | | |
785 | | - | |
| 793 | + | |
786 | 794 | | |
787 | | - | |
| 795 | + | |
788 | 796 | | |
789 | 797 | | |
790 | 798 | | |
791 | 799 | | |
792 | 800 | | |
793 | 801 | | |
794 | | - | |
| 802 | + | |
795 | 803 | | |
796 | | - | |
| 804 | + | |
| 805 | + | |
| 806 | + | |
| 807 | + | |
797 | 808 | | |
798 | 809 | | |
799 | 810 | | |
800 | 811 | | |
801 | 812 | | |
802 | 813 | | |
803 | 814 | | |
804 | | - | |
| 815 | + | |
805 | 816 | | |
806 | 817 | | |
807 | 818 | | |
808 | 819 | | |
809 | 820 | | |
810 | 821 | | |
811 | | - | |
| 822 | + | |
812 | 823 | | |
813 | 824 | | |
814 | 825 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
110 | 110 | | |
111 | 111 | | |
112 | 112 | | |
113 | | - | |
| 113 | + | |
| 114 | + | |
114 | 115 | | |
115 | 116 | | |
116 | 117 | | |
| |||
- README.md+33-6
- benchmarks/microbenchmarks/utils.py+15-9
- ruff.toml+1
- scripts/hf_eval.py-253
- test/dtypes/test_affine_quantized.py+91-60
- test/integration/test_integration.py+14-9
- test/prototype/test_paretoq.py+52
- test/quantization/pt2e/test_duplicate_dq.py+324
- test/quantization/pt2e/test_graph_utils.py+134
- test/quantization/pt2e/test_metadata_porting.py+526
- test/quantization/pt2e/test_numeric_debugger.py+373
- test/quantization/pt2e/test_quantize_pt2e.py+2.8k
- test/quantization/pt2e/test_quantize_pt2e_qat.py+1.2k
- test/quantization/pt2e/test_representation.py+327
- test/quantization/pt2e/test_x86inductor_quantizer.py+2.9k
- test/quantization/pt2e/test_xnnpack_quantizer.py+1.1k
- test/quantization/test_quant_api.py+58-17
- test/quantization/test_quant_primitives.py+12-8
- torchao/_models/llama/generate.py+24-11
- torchao/dtypes/__init__.py+2-1
- torchao/dtypes/affine_quantized_tensor.py+6-1
- torchao/dtypes/affine_quantized_tensor_ops.py+21
- torchao/dtypes/uintx/__init__.py+4-2
- torchao/dtypes/uintx/int4_xpu_layout.py+445
- torchao/dtypes/uintx/packed_linear_int8_dynamic_activation_intx_weight_layout.py+158-148
- torchao/dtypes/uintx/q_dq_layout.py+182-5
- torchao/experimental/kernels/cpu/aarch64/matmul/channelwise_8bit_a_channelwise_8bit_b_1x8x16_f32_neondot-impl.h+9-5
- torchao/experimental/kernels/cpu/aarch64/matmul/channelwise_8bit_a_channelwise_8bit_b_4x8x8_f32_neondot-impl.h+411
- torchao/experimental/kernels/cpu/aarch64/matmul/fp32_a_input_channelwise_8bit_b_4x16x4_f32_impl.h+328
- torchao/experimental/kernels/cpu/aarch64/matmul/matmul.h+223
- torchao/experimental/kernels/cpu/aarch64/matmul/matmul_utils.h+83
- torchao/experimental/kernels/cpu/aarch64/tests/test_qmatmul.cpp+166-17
- torchao/experimental/kernels/cpu/interface/quantized_matmul.h+4-6
- torchao/experimental/kernels/cpu/interface/test_qmatmul_interface.cpp+28
- torchao/experimental/op_lib_utils.py+18
- torchao/experimental/ops/mps/mps_op_lib.py+46
- torchao/experimental/packed_linear_int8_dynamic_activation_intx_weight_layout.py-5
- torchao/experimental/quant_api.py+124-597
- torchao/experimental/quant_passes.py+20-17
- torchao/experimental/tests/test_embedding_xbit_quantizer.py+25-25
- torchao/experimental/tests/test_int8_dynamic_activation_intx_weight.py+133-83
- torchao/experimental/tests/test_linear_8bit_act_xbit_weight_quantizer.py-111
- torchao/experimental/tests/test_quant_passes.py+32-25
- torchao/kernel/intmm.py+2-2
- torchao/prototype/hqq/hqq_tinygemm_linear.py+3-3
- torchao/prototype/paretoq/1_run_train.sh+35
- torchao/prototype/paretoq/2_run_eval.sh+38
- torchao/prototype/paretoq/README.md+79
- torchao/prototype/paretoq/__init__.py
- torchao/prototype/paretoq/main_result_234bit.jpg
- torchao/prototype/paretoq/main_result_scaling_law.jpg
- torchao/prototype/paretoq/main_result_ternary.jpg
- torchao/prototype/paretoq/models/__init__.py
- torchao/prototype/paretoq/models/configuration_llama.py+231
- torchao/prototype/paretoq/models/modeling_llama_quant.py+1.2k
- torchao/prototype/paretoq/models/utils_quant.py+289
- torchao/prototype/paretoq/requirement.txt+5
- torchao/prototype/paretoq/train.py+122
- torchao/prototype/paretoq/utils/datautils.py+120
- torchao/prototype/paretoq/utils/process_args.py+94
- torchao/prototype/paretoq/utils/utils.py+58
- torchao/quantization/pt2e/__init__.py+175
- torchao/quantization/pt2e/fake_quantize.py+654
- torchao/quantization/pt2e/observer.py+2.1k
- torchao/quantization/pt2e/pt2e/__init__.py
- torchao/quantization/pt2e/pt2e/_affine_quantization.py+813
- torchao/quantization/pt2e/pt2e/_numeric_debugger.py+348
- torchao/quantization/pt2e/pt2e/constant_fold.py+410
- torchao/quantization/pt2e/pt2e/convert.py+1.4k
- torchao/quantization/pt2e/pt2e/duplicate_dq_pass.py+88
- torchao/quantization/pt2e/pt2e/export_utils.py+246
- torchao/quantization/pt2e/pt2e/graph_utils.py+186
- torchao/quantization/pt2e/pt2e/lowering.py+65
- torchao/quantization/pt2e/pt2e/port_metadata_pass.py+230
- torchao/quantization/pt2e/pt2e/prepare.py+668
- torchao/quantization/pt2e/pt2e/qat_utils.py+997
- torchao/quantization/pt2e/pt2e/representation/__init__.py+5
- torchao/quantization/pt2e/pt2e/representation/rewrite.py+835
- torchao/quantization/pt2e/pt2e/utils.py+611
- torchao/quantization/pt2e/quant_type.py+40
- torchao/quantization/pt2e/quantize_pt2e.py+282
- torchao/quantization/pt2e/quantizer/__init__.py+21
- torchao/quantization/pt2e/quantizer/composable_quantizer.py+84
- torchao/quantization/pt2e/quantizer/embedding_quantizer.py+103
- torchao/quantization/pt2e/quantizer/quantizer.py+189
- torchao/quantization/pt2e/quantizer/utils.py+89
- torchao/quantization/pt2e/quantizer/x86_inductor_quantizer.py+1.6k
- torchao/quantization/pt2e/quantizer/xnnpack_quantizer.py+454
- torchao/quantization/pt2e/quantizer/xnnpack_quantizer_utils.py+1.1k
- torchao/quantization/pt2e/quantizer/xpu_inductor_quantizer.py+131
- torchao/quantization/pt2e/utils.py+851
- torchao/quantization/quant_api.py+190-18
- torchao/quantization/quant_primitives.py+15-17
- torchao/quantization/subclass.py+91-9
- torchao/quantization/utils.py+60-12
- torchao/testing/pt2e/__init__.py
- torchao/testing/pt2e/utils.py+172
- torchao/utils.py+14
- tutorials/developer_api_guide/export_to_executorch.py+5-9
0 commit comments