2 files changed
+2
-2
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
35 | 35 | | |
36 | 36 | | |
37 | 37 | | |
38 | | - | |
| 38 | + | |
39 | 39 | | |
40 | 40 | | |
41 | 41 | | |
| |||
- .github/scripts/torchao_model_releases/quantize_and_upload.py+15-5
- .github/scripts/torchao_model_releases/release.sh+8-3
- .github/workflows/release_model.yml+1-1
- .github/workflows/torchao_experimental_test.yml+1
- benchmarks/prototype/blockwise_fp8_training/bench_1x128_128x128_gemms.py+12-10
- benchmarks/prototype/blockwise_fp8_training/bench_1x128_128x1_gemms.py+14-13
- benchmarks/prototype/blockwise_fp8_training/bench_linear_fwd_bwd.py+196
- benchmarks/prototype/moe_training/benchmark_moe_fsdp.py+1-4
- benchmarks/prototype/moe_training/benchmark_per_group_colwise_scaling_kernels.py+65-64
- benchmarks/prototype/moe_training/benchmark_per_group_rowwise_scaling_kernels.py+251
- benchmarks/prototype/moe_training/benchmark_rowwise_3d_quant_kernels.py+59-23
- benchmarks/prototype/moe_training/benchmark_scaled_grouped_mm_dq.py+3-3
- benchmarks/utils.py+12-12
- packaging/post_build_script.sh+1
- scripts/clean_release_notes.py+2-2
- setup.py+6-8
- test/prototype/blockwise_fp8_training/test_blockwise_kernels.py+38-36
- test/prototype/inductor/test_int8_sdpa_fusion.py+1
- test/prototype/moe_training/test_fsdp.py+29-12
- test/prototype/moe_training/test_fsdp_tp.py+8
- test/prototype/moe_training/test_kernels.py+82-5
- test/prototype/moe_training/test_scaled_grouped_mm.py+20-12
- test/prototype/moe_training/test_tp.py+27-7
- test/prototype/moe_training/test_training.py+29-104
- test/prototype/mx_formats/test_mx_tensor.py+2-2
- test/prototype/mx_formats/test_nvfp4_tensor.py+2-2
- test/prototype/test_awq.py+4-22
- test/prototype/test_dynamic_activation_lut.py+7-3
- test/quantization/quantize_/workflows/int4/test_int4_opaque_tensor.py+85
- test/quantization/quantize_/workflows/int4/test_int4_tensor.py+20
- test/quantization/quantize_/workflows/intx/test_intx_opaque_tensor.py+339
- test/quantization/quantize_/workflows/intx/test_intx_unpacked_tensor.py-145
- test/quantization/quantize_/workflows/intx/test_intx_unpacked_to_int8_tensor.py+417
- test/quantization/test_qat.py+40
- test/test_ops.py+64
- torchao/csrc/cpu/scaled_embedding_bag.cpp+183
- torchao/ops.py+19
- torchao/prototype/awq/api.py+7-2
- torchao/prototype/awq/example.py+6-21
- torchao/prototype/blockwise_fp8_training/kernels.py+144-83
- torchao/prototype/blockwise_fp8_training/linear.py+47-27
- torchao/prototype/moe_training/kernels/__init__.py+3
- torchao/prototype/moe_training/kernels/float8_rowwise.py+216-20
- torchao/prototype/moe_training/kernels/jagged_float8_scales.py+2-2
- torchao/prototype/moe_training/kernels/mxfp8.py+135
- torchao/prototype/moe_training/scaled_grouped_mm.py+79-63
- torchao/prototype/moe_training/utils.py+6-6
- torchao/prototype/mx_formats/nvfp4_tensor.py+6-6
- torchao/prototype/mx_formats/utils.py+73
- torchao/quantization/__init__.py+6-2
- torchao/quantization/qat/fake_quantize_config.py+24-7
- torchao/quantization/quant_api.py+104-14
- torchao/quantization/quantize_/common/__init__.py+2
- torchao/quantization/quantize_/common/packing_format.py+8-1
- torchao/quantization/quantize_/common/protocol.py+22
- torchao/quantization/quantize_/workflows/__init__.py+11-2
- torchao/quantization/quantize_/workflows/int4/int4_opaque_tensor.py+195
- torchao/quantization/quantize_/workflows/int4/int4_tensor.py+51-9
- torchao/quantization/quantize_/workflows/intx/__init__.py-5
- torchao/quantization/quantize_/workflows/intx/intx_opaque_tensor.py+345
- torchao/quantization/quantize_/workflows/intx/intx_unpacked_to_int8_tensor.py+51-25
- torchao/utils.py+37-31
0 commit comments