Commit c50df01
committed
Add torch, torchvision and torchaudio dependencies
1 parent 12edd72 commit c50df01
File tree
6 files changed
+8
-5
lines changed- backends
- vulkan/third-party
- xnnpack/third-party
- extension/llm
- third-party
6 files changed
+8
-5
lines changedSubmodule Vulkan-Headers updated 94 files
- .cmake-format.py-34
- .gitattributes+8
- .github/ISSUE_TEMPLATE/bug_report.md+26-6
- .github/dependabot.yml+12
- .github/pull_request_template.md+18
- .github/workflows/ci.yml+132
- .gitignore+10
- .reuse/dep5+15
- BUILD.gn+33-19
- BUILD.md+25-253
- CMakeLists.txt+125-44
- CODE_OF_CONDUCT.adoc+10
- CODE_OF_CONDUCT.md-1
- CONTRIBUTING.md+40
- LICENSE.md+18
- LICENSES/Apache-2.0.txt
- LICENSES/MIT.txt+9
- Makefile.release+110
- README.md+43-68
- SECURITY.md+11
- cmake/Copyright_cmake.txt-126
- cmake/cmake_uninstall.cmake.in-21
- include/vk_video/vulkan_video_codec_av1std.h+394
- include/vk_video/vulkan_video_codec_av1std_decode.h+109
- include/vk_video/vulkan_video_codec_av1std_encode.h+143
- include/vk_video/vulkan_video_codec_h264std.h+44-37
- include/vk_video/vulkan_video_codec_h264std_decode.h+10-36
- include/vk_video/vulkan_video_codec_h264std_encode.h+46-31
- include/vk_video/vulkan_video_codec_h265std.h+144-57
- include/vk_video/vulkan_video_codec_h265std_decode.h+9-8
- include/vk_video/vulkan_video_codec_h265std_encode.h+55-33
- include/vk_video/vulkan_video_codec_vp9std.h+151
- include/vk_video/vulkan_video_codec_vp9std_decode.h+68
- include/vk_video/vulkan_video_codecs_common.h+6-1
- include/vulkan/vk_icd.h+23-24
- include/vulkan/vk_layer.h+6-27
- include/vulkan/vk_platform.h+1-1
- include/vulkan/vk_sdk_platform.h-69
- include/vulkan/vulkan.cppm+10.2k
- include/vulkan/vulkan.h+13-2
- include/vulkan/vulkan.hpp+17.4k-7.9k
- include/vulkan/vulkan_android.h+35-1
- include/vulkan/vulkan_beta.h+266-911
- include/vulkan/vulkan_core.h+15.0k-5.5k
- include/vulkan/vulkan_directfb.h+6-1
- include/vulkan/vulkan_enums.hpp+7.2k-13.2k
- include/vulkan/vulkan_extension_inspection.hpp+3.7k
- include/vulkan/vulkan_format_traits.hpp+5.7k-3.4k
- include/vulkan/vulkan_fuchsia.h+25-1
- include/vulkan/vulkan_funcs.hpp+23.7k-9.4k
- include/vulkan/vulkan_ggp.h+5-1
- include/vulkan/vulkan_handles.hpp+16.3k-8.1k
- include/vulkan/vulkan_hash.hpp+12.7k-5.2k
- include/vulkan/vulkan_hpp_macros.hpp+326
- include/vulkan/vulkan_ios.h+4-1
- include/vulkan/vulkan_macos.h+4-1
- include/vulkan/vulkan_metal.h+192-2
- include/vulkan/vulkan_ohos.h+53
- include/vulkan/vulkan_raii.hpp+22.2k-10.8k
- include/vulkan/vulkan_screen.h+61-1
- include/vulkan/vulkan_shared.hpp+1.2k
- include/vulkan/vulkan_static_assertions.hpp+9.2k
- include/vulkan/vulkan_structs.hpp+116.4k-48.1k
- include/vulkan/vulkan_to_string.hpp+10.4k
- include/vulkan/vulkan_vi.h+4-1
- include/vulkan/vulkan_video.cppm+171
- include/vulkan/vulkan_video.hpp+5.4k
- include/vulkan/vulkan_wayland.h+6-1
- include/vulkan/vulkan_win32.h+58-1
- include/vulkan/vulkan_xcb.h+6-1
- include/vulkan/vulkan_xlib.h+6-1
- include/vulkan/vulkan_xlib_xrandr.h+6-1
- registry/apiconventions.py+13-3
- registry/base_generator.py+937
- registry/cgenerator.py+166-41
- registry/generator.py+279-80
- registry/genvk.py-817
- registry/parse_dependency.py+404
- registry/profiles/VP_KHR_roadmap.json+390
- registry/reg.py+409-99
- registry/spec_tools/conventions.py+228-24
- registry/spec_tools/util.py+2-2
- registry/stripAPI.py+42
- registry/validusage.json+92.8k-29.3k
- registry/video.xml+2.0k
- registry/vk.xml+18.9k-8.1k
- registry/vkconventions.py+66-27
- registry/vulkan_object.py+484
- tests/CMakeLists.txt+33
- tests/integration/CMakeLists.txt+89
- tests/vk_hpp.cpp+13
- tests/vk_hpp_module.cpp+13
- tests/vk_icd.c+14
- tests/vk_layer.c+14
- .github/workflows/build.yml+20-3
- .github/workflows/update.yml+11-4
- CMakeLists.txt+23-33
- LICENSE.md+1-1
- README.md+3-3
- generate.py+20-2
- test/cmake_using_installed_headers/CMakeLists.txt+1-1
- test/cmake_using_source_directly/CMakeLists.txt+3-2
- test/cmake_using_subdir_headers/CMakeLists.txt+1-1
- test/cmake_using_subdir_static/CMakeLists.txt+1-1
- volk.c+877-120
- volk.h+974-110
Submodule tokenizers updated from d202b36 to f09feca
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
67 | 67 | | |
68 | 68 | | |
69 | 69 | | |
| 70 | + | |
| 71 | + | |
| 72 | + | |
70 | 73 | | |
71 | 74 | | |
72 | 75 | | |
| |||
- .github/workflows/1xH100_tests.yml+53
- .github/workflows/1xL4_tests.yml+3-8
- .github/workflows/4xH100_tests.yml+51
- .github/workflows/build_wheels_linux.yml+3-3
- .github/workflows/regression_test.yml+2-2
- .github/workflows/regression_test_rocm.yml+3-1
- .github/workflows/torchao_experimental_test.yml+53-53
- README.md+30-21
- benchmarks/dashboard/ci_microbenchmark_runner.py+50-4
- benchmarks/dashboard/microbenchmark_quantization_config.yml+1
- benchmarks/float8/bench_linear_float8.py+1-1
- benchmarks/float8/bench_padding.py+1-1
- benchmarks/float8/float8_roofline.py+3-15
- benchmarks/microbenchmarks/benchmark_inference.py+9-7
- benchmarks/microbenchmarks/profiler.py+10-1
- benchmarks/microbenchmarks/test/test_benchmark_profiler.py+2-2
- benchmarks/mx_formats/cast_bench.py+69-11
- docs/source/api_ref_qat.rst+58
- docs/source/api_ref_quantization.rst-18
- docs/source/finetuning.rst+115-2
- docs/source/index.rst+1
- setup.py+37
- test/float8/test_base.py+5-5
- test/float8/test_compile.py+11-7
- test/float8/test_dtensor.py+5-5
- test/float8/test_everything_multi_gpu.sh+21
- test/float8/test_everything_single_gpu.sh+16
- test/float8/test_fsdp2/test_fsdp2.py+1-1
- test/prototype/moe_training/test_scaled_grouped_mm.py+1-1
- test/prototype/mx_formats/test_kernels.py+106-1
- test/prototype/mx_formats/test_mx_dtensor.py+12-1
- test/prototype/mx_formats/test_mx_linear.py+24-11
- test/quantization/quantize_/workflows/int4/test_int4_preshuffled_tensor.py+73-26
- test/quantization/test_qat.py+4-4
- torchao/__init__.py+3-1
- torchao/csrc/cuda/mx_kernels/mxfp8_cuda.cu+112
- torchao/csrc/cuda/mx_kernels/mxfp8_extension.cpp+128
- torchao/csrc/cuda/mx_kernels/mxfp8_quantize.cuh+1.0k
- torchao/csrc/cuda/mx_kernels/ptx.cuh+290
- torchao/dtypes/nf4tensor.py+1-1
- torchao/experimental/CMakeLists.txt+1-1
- torchao/experimental/Utils.cmake+1-1
- torchao/experimental/kernels/cpu/aarch64/linear/groupwise_lowbit_weight/groupwise_lowbit_weight_lut.h+26-4
- torchao/experimental/kernels/cpu/aarch64/tests/test_lut.cpp+3-3
- torchao/experimental/kernels/cpu/aarch64/tests/test_utils.h+1-9
- torchao/experimental/ops/groupwise_lowbit_weight_lut/groupwise_lowbit_weight_lut.cpp+235
- torchao/experimental/ops/groupwise_lowbit_weight_lut/groupwise_lowbit_weight_lut.h+126
- torchao/experimental/ops/groupwise_lowbit_weight_lut/kernel_config.h+229
- torchao/experimental/ops/groupwise_lowbit_weight_lut/kernel_selector.h+240
- torchao/experimental/ops/groupwise_lowbit_weight_lut/packed_weights_format.h+110
- torchao/float8/__init__.py+5-5
- torchao/float8/distributed_utils.py+2-2
- torchao/float8/float8_linear.py+1-1
- torchao/float8/float8_ops.py+54-47
- torchao/float8/float8_scaling_utils.py+4-4
- torchao/float8/float8_tensor_parallel.py+7-7
- torchao/float8/float8_training_tensor.py+12-12
- torchao/float8/fsdp_utils.py+13-9
- torchao/float8/inference.py+1-1
- torchao/prototype/float8nocompile/float8nocompile_linear.py+5-1
- torchao/prototype/float8nocompile/float8nocompile_scaling_utils.py+1-1
- torchao/prototype/float8nocompile/kernels/fp8_dynamic_tensorwise.py+30-26
- torchao/prototype/float8nocompile/kernels/fp8_dynamic_tensorwise_test.py+1-1
- torchao/prototype/mx_formats/config.py+16-4
- torchao/prototype/mx_formats/kernels.py+138-5
- torchao/prototype/mx_formats/mx_linear.py+42-15
- torchao/prototype/quantization/autoquant_v2.py+2-2
- torchao/prototype/spinquant/hadamard_utils.py+2-2
- torchao/prototype/spinquant/spinquant.py+58-35
- torchao/quantization/__init__.py+3-3
- torchao/quantization/prototype/qat/affine_fake_quantized_tensor.py+4-4
- torchao/quantization/pt2e/prepare.py+7-13
- torchao/quantization/pt2e/quantizer/composable_quantizer.py+5-4
- torchao/quantization/pt2e/quantizer/duplicate_dq_pass.py+4-7
- torchao/quantization/pt2e/quantizer/embedding_quantizer.py+2-1
- torchao/quantization/pt2e/quantizer/port_metadata_pass.py+8-13
- torchao/quantization/pt2e/quantizer/quantizer.py+3
- torchao/quantization/pt2e/quantizer/utils.py+6-8
- torchao/quantization/qat/__init__.py+6
- torchao/quantization/qat/affine_fake_quantized_tensor.py+18-22
- torchao/quantization/qat/api.py+2-2
- torchao/quantization/qat/linear.py+23-23
- torchao/quantization/qat/utils.py-26
- torchao/quantization/quant_api.py+24-7
- torchao/quantization/quantize_/__init__.py-9
- torchao/quantization/quantize_/workflows/__init__.py+7
- torchao/quantization/quantize_/workflows/int4/__init__.py
- torchao/quantization/quantize_/workflows/int4/int4_preshuffled_tensor.py+166-81
- version.txt+1-1
0 commit comments