|
160 | 160 | - [x] 📖 [sgemv_k16_f32_kernel](./sgemv/sgemv.cu)
|
161 | 161 | - [X] 📖 [warp_reduce_fp32/fp16/bf16_kernel](./reduce/block_all_reduce.cu)
|
162 | 162 | - [X] 📖 [block_reduce_fp32_kernel](./reduce/block_all_reduce.cu)
|
163 |
| -- [X] 📖 [block_all_reduce_sum_f32_acc_with_f32_kernel](./reduce/block_all_reduce.cu) |
164 |
| -- [X] 📖 [block_all_reduce_sum_f32x4_acc_with_f32_kernel](./reduce/block_all_reduce.cu) |
165 |
| -- [X] 📖 [block_all_reduce_sum_f16_acc_with_f16_kernel](./reduce/block_all_reduce.cu) |
166 |
| -- [X] 📖 [block_all_reduce_sum_f16_acc_with_f32_kernel](./reduce/block_all_reduce.cu) |
167 |
| -- [X] 📖 [block_all_reduce_sum_f16x2_acc_with_f16_kernel](./reduce/block_all_reduce.cu) |
168 |
| -- [X] 📖 [block_all_reduce_sum_f16x2_acc_with_f32_kernel](./reduce/block_all_reduce.cu) |
169 |
| -- [X] 📖 [block_all_reduce_sum_bf16_acc_with_bf16_kernel](./reduce/block_all_reduce.cu) |
170 |
| -- [X] 📖 [block_all_reduce_sum_bf16_acc_with_f32_kernel](./reduce/block_all_reduce.cu) |
171 |
| -- [X] 📖 [block_all_reduce_sum_bf16x2_acc_with_bf16_kernel](./reduce/block_all_reduce.cu) |
172 |
| -- [X] 📖 [block_all_reduce_sum_bf16x2_acc_with_f32_kernel](./reduce/block_all_reduce.cu) |
173 |
| -- [X] 📖 [block_all_reduce_sum_fp8_e4m3_acc_with_f16_kernel](./reduce/block_all_reduce.cu) |
| 163 | +- [X] 📖 [block_all_reduce_sum_f32_f32_kernel](./reduce/block_all_reduce.cu) |
| 164 | +- [X] 📖 [block_all_reduce_sum_f32x4_f32_kernel](./reduce/block_all_reduce.cu) |
| 165 | +- [X] 📖 [block_all_reduce_sum_f16_f16_kernel](./reduce/block_all_reduce.cu) |
| 166 | +- [X] 📖 [block_all_reduce_sum_f16_f32_kernel](./reduce/block_all_reduce.cu) |
| 167 | +- [X] 📖 [block_all_reduce_sum_f16x2_f16_kernel](./reduce/block_all_reduce.cu) |
| 168 | +- [X] 📖 [block_all_reduce_sum_f16x2_f32_kernel](./reduce/block_all_reduce.cu) |
| 169 | +- [X] 📖 [block_all_reduce_sum_bf16_bf16_kernel](./reduce/block_all_reduce.cu) |
| 170 | +- [X] 📖 [block_all_reduce_sum_bf16_f32_kernel](./reduce/block_all_reduce.cu) |
| 171 | +- [X] 📖 [block_all_reduce_sum_bf16x2_bf16_kernel](./reduce/block_all_reduce.cu) |
| 172 | +- [X] 📖 [block_all_reduce_sum_bf16x2_f32_kernel](./reduce/block_all_reduce.cu) |
| 173 | +- [X] 📖 [block_all_reduce_sum_fp8_e4m3_f16_kernel](./reduce/block_all_reduce.cu) |
174 | 174 | - [x] 📖 [dot_product_f32_kernel](./dot-product/dot_product.cu)
|
175 | 175 | - [x] 📖 [dot_product_f32x4_kernel](./dot-product/dot_product.cu)
|
176 | 176 | - [x] 📖 [elementwise_f32_kernel](./elementwise/elementwise.cu)
|
|
0 commit comments