@@ -172,6 +172,7 @@ cc_library_static {
172172 proprietary : true ,
173173 local_include_dirs : [" build/android-arm64v8a/src/core" ,
174174 " build/android-arm64v8a/src/core/CL" ,
175+ " compute_kernel_writer/include" ,
175176 " src/core/common" ,
176177 " src/core/helpers" ,
177178 " src/core/NEON/kernels/arm_gemm" ,
@@ -323,14 +324,17 @@ cc_library_static {
323324 " src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp" ,
324325 " src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp" ,
325326 " src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp" ,
327+ " src/core/NEON/kernels/arm_gemm/gemm_bf16bf16.cpp" ,
326328 " src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp" ,
327329 " src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp" ,
328330 " src/core/NEON/kernels/arm_gemm/gemm_int16.cpp" ,
329331 " src/core/NEON/kernels/arm_gemm/gemm_int8.cpp" ,
330332 " src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp" ,
331333 " src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp" ,
334+ " src/core/NEON/kernels/arm_gemm/gemm_s8fp32.cpp" ,
332335 " src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp" ,
333336 " src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp" ,
337+ " src/core/NEON/kernels/arm_gemm/interleave-8way.cpp" ,
334338 " src/core/NEON/kernels/arm_gemm/interleave_indirect-sve.cpp" ,
335339 " src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp" ,
336340 " src/core/NEON/kernels/arm_gemm/mergeresults-fp16.cpp" ,
@@ -621,7 +625,6 @@ cc_library_static {
621625 " src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp" ,
622626 " src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp" ,
623627 " src/dynamic_fusion/sketch/attributes/SoftmaxAttributes.cpp" ,
624- " src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp" ,
625628 " src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp" ,
626629 " src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp" ,
627630 " src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp" ,
@@ -634,8 +637,6 @@ cc_library_static {
634637 " src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp" ,
635638 " src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp" ,
636639 " src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp" ,
637- " src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp" ,
638- " src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp" ,
639640 " src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp" ,
640641 " src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp" ,
641642 " src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp" ,
@@ -657,19 +658,6 @@ cc_library_static {
657658 " src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp" ,
658659 " src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp" ,
659660 " src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp" ,
660- " src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp" ,
661- " src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp" ,
662- " src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp" ,
663- " src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp" ,
664- " src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp" ,
665- " src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp" ,
666- " src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp" ,
667- " src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp" ,
668- " src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp" ,
669- " src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp" ,
670- " src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp" ,
671- " src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp" ,
672- " src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp" ,
673661 " src/gpu/cl/ClContext.cpp" ,
674662 " src/gpu/cl/ClKernelLibrary.cpp" ,
675663 " src/gpu/cl/ClQueue.cpp" ,
@@ -720,6 +708,7 @@ cc_library_static {
720708 " src/gpu/cl/kernels/ClQuantizeKernel.cpp" ,
721709 " src/gpu/cl/kernels/ClReshapeKernel.cpp" ,
722710 " src/gpu/cl/kernels/ClScaleKernel.cpp" ,
711+ " src/gpu/cl/kernels/ClScatterKernel.cpp" ,
723712 " src/gpu/cl/kernels/ClSoftmaxKernel.cpp" ,
724713 " src/gpu/cl/kernels/ClTransposeKernel.cpp" ,
725714 " src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp" ,
@@ -771,6 +760,7 @@ cc_library_static {
771760 " src/gpu/cl/operators/ClQuantize.cpp" ,
772761 " src/gpu/cl/operators/ClReshape.cpp" ,
773762 " src/gpu/cl/operators/ClScale.cpp" ,
763+ " src/gpu/cl/operators/ClScatter.cpp" ,
774764 " src/gpu/cl/operators/ClSoftmax.cpp" ,
775765 " src/gpu/cl/operators/ClSub.cpp" ,
776766 " src/gpu/cl/operators/ClTranspose.cpp" ,
@@ -869,6 +859,7 @@ cc_library_static {
869859 " src/runtime/CL/functions/CLReshapeLayer.cpp" ,
870860 " src/runtime/CL/functions/CLReverse.cpp" ,
871861 " src/runtime/CL/functions/CLScale.cpp" ,
862+ " src/runtime/CL/functions/CLScatter.cpp" ,
872863 " src/runtime/CL/functions/CLSelect.cpp" ,
873864 " src/runtime/CL/functions/CLSlice.cpp" ,
874865 " src/runtime/CL/functions/CLSoftmaxLayer.cpp" ,
@@ -1224,6 +1215,7 @@ cc_library_static {
12241215 " src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp16_mla_6x32/generic.cpp" ,
12251216 " src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32_mla_6x16/generic.cpp" ,
12261217 " src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32bf16fp32_mmla_4x24/generic.cpp" ,
1218+ " src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32bf16fp32_mmla_6x16/generic.cpp" ,
12271219 " src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_bf16fp32_dot_8x12/generic.cpp" ,
12281220 " src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_bf16fp32_mmla_8x12/generic.cpp" ,
12291221 " src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp16_mla_8x24/generic.cpp" ,
@@ -1311,6 +1303,9 @@ cc_library_static {
13111303 " src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_1VLx4VL/generic.cpp" ,
13121304 " src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_2VLx2VL/generic.cpp" ,
13131305 " src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_4VLx1VL/generic.cpp" ,
1306+ " src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8qfp32_mopa_1VLx4VL/generic.cpp" ,
1307+ " src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8qfp32_mopa_2VLx2VL/generic.cpp" ,
1308+ " src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8qfp32_mopa_4VLx1VL/generic.cpp" ,
13141309 " src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_1VLx4VL/generic.cpp" ,
13151310 " src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_2VLx2VL/generic.cpp" ,
13161311 " src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_4VLx1VL/generic.cpp" ,
0 commit comments