2 files changed
+2
-2
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
430 | 430 | | |
431 | 431 | | |
432 | 432 | | |
433 | | - | |
| 433 | + | |
434 | 434 | | |
435 | 435 | | |
436 | 436 | | |
| |||
- .github/workflows/regression_test_rocm.yml+1-1
- CITATION.cff+1-1
- README.md+12-7
- benchmarks/benchmark_blockwise_scaled_linear_triton.py+1-1
- benchmarks/float8/bench_grouped_mm.py+1-34
- benchmarks/float8/float8_roofline.py+1-1
- benchmarks/inference/bench_float8_inference.py+40
- benchmarks/mx_formats/cast_bench.py+47-4
- docs/source/api_ref_qat.rst+8-5
- docs/source/finetuning.rst+11-24
- test/core/test_config.py+20
- test/dtypes/test_affine_quantized_float.py+69-39
- test/float8/test_base.py+3-7
- test/prototype/blockwise_fp8_training/test_blockwise_kernels.py+325
- test/prototype/blockwise_fp8_training/test_blockwise_linear.py+73
- test/prototype/moe_training/test_fsdp.py+3-4
- test/prototype/moe_training/test_fsdp_tp.py+6-9
- test/prototype/moe_training/test_scaled_grouped_mm.py+142-4
- test/prototype/moe_training/test_tp.py+6-9
- test/prototype/moe_training/test_training.py+10-5
- test/prototype/mx_formats/test_mx_linear.py+52-5
- test/prototype/test_awq.py+203-142
- test/prototype/test_blockwise_triton.py+1-1
- test/prototype/test_codebook_coreml.py+1-3
- test/prototype/test_dynamic_activation_lut.py+1-1
- test/prototype/test_parq.py+2-2
- test/quantization/test_qat.py+211-116
- torchao/_models/_eval.py+13-7
- torchao/_models/llama/eval.py+40
- torchao/core/config.py+14-2
- torchao/experimental/CMakeLists.txt+5-1
- torchao/experimental/kernels/cpu/aarch64/linear/groupwise_lowbit_weight/groupwise_lowbit_weight_lut.h+24-4
- torchao/experimental/kernels/cpu/aarch64/tests/test_lut.cpp+22-14
- torchao/experimental/kernels/cpu/aarch64/tests/test_utils.h+1-9
- torchao/experimental/ops/groupwise_lowbit_weight_lut/groupwise_lowbit_weight_lut.cpp+16-11
- torchao/experimental/ops/groupwise_lowbit_weight_lut/kernel_config.h+22-17
- torchao/experimental/ops/groupwise_lowbit_weight_lut/kernel_selector.h+21-15
- torchao/experimental/ops/groupwise_lowbit_weight_lut/op_groupwise_lowbit_weight_lut-impl.h-2
- torchao/experimental/ops/groupwise_lowbit_weight_lut/op_groupwise_lowbit_weight_lut_aten.cpp+80
- torchao/experimental/ops/groupwise_lowbit_weight_lut/op_groupwise_lowbit_weight_lut_executorch.cpp+32
- torchao/experimental/ops/groupwise_lowbit_weight_lut/packed_weights_format.h+1-1
- torchao/experimental/ops/tests/CMakeLists.txt+20
- torchao/experimental/ops/tests/build_and_run_tests.sh+1
- torchao/experimental/ops/tests/test_groupwise_lowbit_weight_lut.cpp+342
- torchao/experimental/tests/test_embedding_xbit_quantizer.py+2-2
- torchao/experimental/tests/test_int8_dynamic_activation_intx_weight.py+4-4
- torchao/float8/README.md+16-14
- torchao/prototype/awq/__init__.py+4-4
- torchao/prototype/awq/api.py+67-143
- torchao/prototype/awq/core.py+46-97
- torchao/prototype/awq/example.py+99-59
- torchao/prototype/blockwise_fp8_inference/README.md
- torchao/prototype/blockwise_fp8_inference/__init__.py
- torchao/prototype/blockwise_fp8_inference/blockwise_linear.py+1-1
- torchao/prototype/blockwise_fp8_inference/blockwise_quantization.py
- torchao/prototype/blockwise_fp8_training/__init__.py
- torchao/prototype/blockwise_fp8_training/kernels.py+829
- torchao/prototype/blockwise_fp8_training/linear.py+185
- torchao/prototype/inductor/int8_sdpa_lowering.py+40-1
- torchao/prototype/moe_quant/utils.py+1-12
- torchao/prototype/moe_training/kernels/jagged_float8_scales.py+6-2
- torchao/prototype/moe_training/scaled_grouped_mm.py+297-2
- torchao/prototype/moe_training/tensor.py+3-1
- torchao/prototype/moe_training/utils.py+136
- torchao/prototype/mx_formats/README.md+5-2
- torchao/prototype/mx_formats/config.py+62-1
- torchao/prototype/mx_formats/mx_linear.py+30-4
- torchao/prototype/mx_formats/mx_tensor.py+1-25
- torchao/prototype/quantization/codebook_coreml/api.py+1-2
- torchao/prototype/quantization/codebook_coreml/codebook_ops.py+98-57
- torchao/prototype/quantization/codebook_coreml/codebook_quantized_tensor.py+15-1
- torchao/quantization/linear_activation_scale.py+9-51
- torchao/quantization/qat/README.md+53-44
- torchao/quantization/qat/__init__.py+21-8
- torchao/quantization/qat/api.py+188-219
- torchao/quantization/qat/embedding.py+8-5
- torchao/quantization/qat/fake_quantize_config.py+303
- torchao/quantization/qat/fake_quantizer.py+7-3
- torchao/quantization/qat/linear.py+29-24
- torchao/quantization/quant_api.py+3-3
- torchao/quantization/quant_primitives.py-10
- torchao/quantization/transform_module.py+2-2
- torchao/testing/training/roofline_utils.py+2
- torchao/utils.py+13-1
0 commit comments