Commit e532f3a
File tree
187 files changed
+11645
-3242
lines changed- .github
- scripts
- workflows
- 3rdparty
- aiter
- aot
- configs
- model_configs
- dist
- device_communicators
- jit
- ops
- triton
- _triton_kernels
- configs/gemm
- gluon
- utils
- _triton
- utility
- csrc
- ck_batched_gemm_a8w8
- ck_batched_gemm_bf16
- ck_gemm_a4w4_blockscale
- ck_gemm_a8w8_blockscale_bpreshuffle
- ck_gemm_a8w8_blockscale
- ck_gemm_a8w8_bpreshuffle
- ck_gemm_a8w8
- ck_gemm_moe_2stages_codegen
- ck_tile_gemm_moe_2stages/include
- cpp_itfs
- gluon_aot_tools
- pa_gluon_aot
- sampling
- include
- torch
- kernels
- mla
- metadata
- py_itfs_ck
- py_itfs_cu
- pybind
- gradlib
- gradlib
- hsa
- gfx942
- bf16gemm
- fmha_v3_fwd
- MI300
- MI308
- fmoe_2stages
- mla
- topk_per_row_decode
- topk_per_row_prefill
- gfx950
- bf16gemm
- fmha_v3_fwd
- fmoe_2stages
- mla
- op_tests
- cpp/mha
- multigpu_tests
- op_benchmarks/triton
- triton_tests
- gemm
- basic
- fused
- quant
Some content is hidden
Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
187 files changed
+11645
-3242
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
12 | 12 | | |
13 | 13 | | |
14 | 14 | | |
| 15 | + | |
15 | 16 | | |
16 | 17 | | |
17 | 18 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
| 1 | + | |
| 2 | + | |
| 3 | + | |
| 4 | + | |
| 5 | + | |
| 6 | + | |
| 7 | + | |
| 8 | + | |
| 9 | + | |
| 10 | + | |
| 11 | + | |
| 12 | + | |
| 13 | + | |
| 14 | + | |
| 15 | + | |
| 16 | + | |
| 17 | + | |
| 18 | + | |
| 19 | + | |
| 20 | + | |
| 21 | + | |
| 22 | + | |
| 23 | + | |
| 24 | + | |
| 25 | + | |
| 26 | + | |
| 27 | + | |
| 28 | + | |
| 29 | + | |
| 30 | + | |
| 31 | + | |
| 32 | + | |
| 33 | + | |
| 34 | + | |
| 35 | + | |
| 36 | + | |
| 37 | + | |
| 38 | + | |
| 39 | + | |
| 40 | + | |
| 41 | + | |
| 42 | + | |
| 43 | + | |
| 44 | + | |
| 45 | + | |
| 46 | + | |
| 47 | + | |
| 48 | + | |
| 49 | + | |
| 50 | + | |
| 51 | + | |
| 52 | + | |
| 53 | + | |
| 54 | + | |
| 55 | + | |
| 56 | + | |
| 57 | + | |
| 58 | + | |
| 59 | + | |
| 60 | + | |
| 61 | + | |
| 62 | + | |
| 63 | + | |
| 64 | + | |
| 65 | + | |
| 66 | + | |
| 67 | + | |
| 68 | + | |
| 69 | + | |
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
133 | 133 | | |
134 | 134 | | |
135 | 135 | | |
136 | | - | |
| 136 | + | |
| 137 | + | |
| 138 | + | |
| 139 | + | |
| 140 | + | |
| 141 | + | |
| 142 | + | |
| 143 | + | |
137 | 144 | | |
138 | 145 | | |
139 | 146 | | |
| |||
242 | 249 | | |
243 | 250 | | |
244 | 251 | | |
245 | | - | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
103 | 103 | | |
104 | 104 | | |
105 | 105 | | |
| 106 | + | |
106 | 107 | | |
107 | 108 | | |
108 | 109 | | |
| |||
Submodule composable_kernel updated 70 files
- CHANGELOG.md+1
- CMakeLists.txt-16
- Jenkinsfile+46-3
- example/ck_tile/01_fmha/codegen/ops/fmha_batch_prefill.py+29-12
- example/ck_tile/01_fmha/codegen/ops/fmha_fwd.py+1-1
- example/ck_tile/01_fmha/fmha_bwd_runner.hpp+23-10
- example/ck_tile/01_fmha/fmha_fwd.hpp+9
- example/ck_tile/01_fmha/script/fmha_bwd_known_fails_gfx1201.txt
- example/ck_tile/17_grouped_gemm/CMakeLists.txt+12-1
- example/ck_tile/17_grouped_gemm/quant_grouped_gemm.cpp+116-320
- example/ck_tile/17_grouped_gemm/quant_grouped_gemm_bf8_aquant.cpp+7
- example/ck_tile/17_grouped_gemm/quant_grouped_gemm_bf8_bquant.cpp+7
- example/ck_tile/17_grouped_gemm/quant_grouped_gemm_bf8_rowcol.cpp+7
- example/ck_tile/17_grouped_gemm/quant_grouped_gemm_bf8_tensor.cpp+7
- example/ck_tile/17_grouped_gemm/quant_grouped_gemm_config.hpp+2-52
- example/ck_tile/17_grouped_gemm/quant_grouped_gemm_fp8_aquant.cpp+7
- example/ck_tile/17_grouped_gemm/quant_grouped_gemm_fp8_bquant.cpp+7
- example/ck_tile/17_grouped_gemm/quant_grouped_gemm_fp8_rowcol.cpp+7
- example/ck_tile/17_grouped_gemm/quant_grouped_gemm_fp8_tensor.cpp+7
- example/ck_tile/17_grouped_gemm/quant_invoke_grouped_gemm_kernel.hpp+313
- example/ck_tile/17_grouped_gemm/quant_run_grouped_gemm_example.hpp+76-95
- example/ck_tile/38_block_scale_gemm/run_gemm_quant_example.inc-35
- experimental/builder/include/ck_tile/builder/conv_signature_concepts.hpp+16-4
- experimental/builder/include/ck_tile/builder/conv_signature_utils.hpp+192
- experimental/builder/test/test_conv_description.cpp+92
- include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp+10-2
- include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle_v3.hpp+10-2
- include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_large_tensor_cshuffle.hpp+10-2
- include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_splitk_xdl_cshuffle_two_stage.hpp+41-16
- include/ck_tile/core/config.hpp-4
- include/ck_tile/host/reference/reference_gemm.hpp+69-70
- include/ck_tile/ops/flatmm/pipeline/flatmm_pipeline_agmem_bgmem_creg_v1.hpp+1-1
- include/ck_tile/ops/flatmm/pipeline/mx_flatmm_pipeline_agmem_bgmem_creg_v1.hpp+1-1
- include/ck_tile/ops/flatmm/pipeline/mx_flatmm_pipeline_agmem_bgmem_creg_v1_policy.hpp+5-3
- include/ck_tile/ops/fmha/kernel/fmha_batch_prefill_kernel.hpp+117-70
- include/ck_tile/ops/fmha/kernel/fmha_fwd_kernel.hpp+18-7
- include/ck_tile/ops/gemm/kernel/universal_gemm_kernel.hpp+45-11
- include/ck_tile/ops/gemm_quant/kernel/gemm_quant_kernel.hpp-26
- include/ck_tile/ops/gemm_quant/pipeline/gemm_bquant_pipeline_ag_bg_cr_v3.hpp+2-2
- include/ck_tile/ops/grouped_convolution/kernel/grouped_convolution_backward_data_kernel.hpp+52-23
- include/ck_tile/ops/grouped_convolution/kernel/grouped_convolution_backward_weight_kernel.hpp+14-32
- library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_cshufflev3_scaleadd_ab_instance.hpp+124-16
- library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd_ab.hpp+94-4
- library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeLists.txt+8-2
- library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp+16-16
- library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp+52
- library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp+52
- library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp+52
- library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp+16-16
- library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp+52
- library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp+52
- library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp+52
- profiler/src/profile_grouped_conv_fwd_bilinear.cpp-4
- test/ck_tile/gemm_block_scale/CMakeLists.txt+10-4
- test/ck_tile/gemm_block_scale/test_gemm_quant_abquant_base.cpp
- test/ck_tile/gemm_block_scale/test_gemm_quant_abquant_padding.cpp+39
- test/ck_tile/gemm_block_scale/test_gemm_quant_base.hpp+4-3
- test/ck_tile/gemm_block_scale/test_gemm_quant_fixtures.hpp+6
- test/ck_tile/gemm_weight_preshuffle/test_gemm_pipeline_ut_cases.inc-19
- test/ck_tile/grouped_conv/test_ck_tile_grouped_conv_bwd_weight.cpp+26-2
- test/ck_tile/grouped_gemm_quant/CMakeLists.txt+14-11
- test/ck_tile/grouped_gemm_quant/test_grouped_gemm_quant_aquant.cpp+17-1
- test/ck_tile/grouped_gemm_quant/test_grouped_gemm_quant_bquant.cpp+8-3
- test/ck_tile/grouped_gemm_quant/test_grouped_gemm_quant_bquant_preshuffleb.cpp+38
- test/ck_tile/grouped_gemm_quant/test_grouped_gemm_quant_ut_cases.inc+1-1
- test/ck_tile/grouped_gemm_quant/test_grouped_gemm_util_quant.hpp+7-4
- test/grouped_convnd_fwd/test_grouped_convnd_fwd_scaleadd_ab.cpp-4
- tile_engine/ops/gemm/gemm_multi_d/configs/default_ci_config.json+89
- tile_engine/ops/gemm/gemm_preshuffle/configs/default_ci_config.json+89
- tile_engine/ops/gemm/gemm_universal/configs/default_ci_config.json+89
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
43 | 43 | | |
44 | 44 | | |
45 | 45 | | |
46 | | - | |
47 | | - | |
48 | | - | |
49 | | - | |
50 | | - | |
51 | | - | |
52 | | - | |
53 | | - | |
54 | | - | |
55 | | - | |
56 | | - | |
57 | | - | |
58 | | - | |
59 | | - | |
60 | | - | |
61 | | - | |
62 | | - | |
63 | | - | |
64 | | - | |
65 | | - | |
66 | | - | |
67 | | - | |
68 | | - | |
69 | | - | |
70 | | - | |
71 | | - | |
72 | | - | |
73 | | - | |
74 | | - | |
75 | | - | |
76 | | - | |
77 | | - | |
78 | | - | |
| 46 | + | |
| 47 | + | |
| 48 | + | |
| 49 | + | |
| 50 | + | |
| 51 | + | |
| 52 | + | |
| 53 | + | |
| 54 | + | |
| 55 | + | |
| 56 | + | |
| 57 | + | |
| 58 | + | |
| 59 | + | |
| 60 | + | |
| 61 | + | |
| 62 | + | |
| 63 | + | |
| 64 | + | |
| 65 | + | |
| 66 | + | |
| 67 | + | |
| 68 | + | |
| 69 | + | |
| 70 | + | |
| 71 | + | |
| 72 | + | |
| 73 | + | |
| 74 | + | |
| 75 | + | |
| 76 | + | |
| 77 | + | |
| 78 | + | |
| 79 | + | |
79 | 80 | | |
80 | 81 | | |
81 | 82 | | |
82 | 83 | | |
83 | | - | |
84 | | - | |
85 | | - | |
86 | | - | |
87 | | - | |
88 | | - | |
| 84 | + | |
| 85 | + | |
| 86 | + | |
| 87 | + | |
| 88 | + | |
| 89 | + | |
89 | 90 | | |
90 | 91 | | |
91 | 92 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
| 1 | + | |
| 2 | + | |
| 3 | + | |
| 4 | + | |
| 5 | + | |
| 6 | + | |
| 7 | + | |
| 8 | + | |
| 9 | + | |
| 10 | + | |
| 11 | + | |
| 12 | + | |
| 13 | + | |
| 14 | + | |
| 15 | + | |
| 16 | + | |
| 17 | + | |
| 18 | + | |
| 19 | + | |
| 20 | + | |
| 21 | + | |
| 22 | + | |
| 23 | + | |
| 24 | + | |
| 25 | + | |
| 26 | + | |
| 27 | + | |
| 28 | + | |
| 29 | + | |
| 30 | + | |
| 31 | + | |
| 32 | + | |
| 33 | + | |
| 34 | + | |
| 35 | + | |
| 36 | + | |
| 37 | + | |
| 38 | + | |
| 39 | + | |
| 40 | + | |
| 41 | + | |
| 42 | + | |
| 43 | + | |
| 44 | + | |
| 45 | + | |
| 46 | + | |
| 47 | + | |
| 48 | + | |
| 49 | + | |
| 50 | + | |
| 51 | + | |
| 52 | + | |
| 53 | + | |
| 54 | + | |
| 55 | + | |
| 56 | + | |
| 57 | + | |
| 58 | + | |
| 59 | + | |
| 60 | + | |
| 61 | + | |
| 62 | + | |
| 63 | + | |
| 64 | + | |
| 65 | + | |
| 66 | + | |
| 67 | + | |
| 68 | + | |
| 69 | + | |
| 70 | + | |
| 71 | + | |
| 72 | + | |
| 73 | + | |
| 74 | + | |
| 75 | + | |
| 76 | + | |
| 77 | + | |
| 78 | + | |
| 79 | + | |
| 80 | + | |
| 81 | + | |
| 82 | + | |
| 83 | + | |
| 84 | + | |
| 85 | + | |
| 86 | + | |
| 87 | + | |
| 88 | + | |
| 89 | + | |
0 commit comments