Skip to content

Commit 874e0c7

Browse files
author
Jenkins
committed
Compute Library v23.08
1 parent 2b2ffe7 commit 874e0c7

File tree

1,303 files changed

+157667
-134628
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,303 files changed

+157667
-134628
lines changed

Android.bp

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ opencl_srcs = [
5151
"src/core/CL/cl_kernels/common/instance_normalization.cl",
5252
"src/core/CL/cl_kernels/common/l2_normalize.cl",
5353
"src/core/CL/cl_kernels/common/mat_mul.cl",
54+
"src/core/CL/cl_kernels/common/mat_mul_mmul.cl",
5455
"src/core/CL/cl_kernels/common/mat_mul_quantized.cl",
5556
"src/core/CL/cl_kernels/common/mean_stddev_normalization.cl",
5657
"src/core/CL/cl_kernels/common/memset.cl",
@@ -165,7 +166,9 @@ arm_compute_library_defaults {
165166
"-DARM_COMPUTE_ENABLE_NEON",
166167
"-Wno-unused-parameter",
167168
"-DNO_DOT_IN_TOOLCHAIN",
168-
"-Wno-implicit-fallthrough"
169+
"-Wno-implicit-fallthrough",
170+
"-fPIC",
171+
"-DACL_INTERNAL_TEST_CKW_IN_DF"
169172
],
170173
rtti: true,
171174
}
@@ -176,6 +179,8 @@ cc_library_static {
176179
proprietary: true,
177180
local_include_dirs: ["build/android-arm64v8a/src/core",
178181
"build/android-arm64v8a/src/core/CL",
182+
"compute_kernel_writer/prototype/include",
183+
"compute_kernel_writer/prototype",
179184
"src/core/common",
180185
"src/core/helpers",
181186
"src/core/NEON/kernels/arm_gemm",
@@ -185,6 +190,15 @@ cc_library_static {
185190
"src/cpu/kernels/assembly"],
186191
export_include_dirs: [".", "./include"],
187192
srcs: [
193+
"compute_kernel_writer/prototype/src/Kernel.cpp",
194+
"compute_kernel_writer/prototype/src/KernelArgument.cpp",
195+
"compute_kernel_writer/prototype/src/KernelWriter.cpp",
196+
"compute_kernel_writer/prototype/src/OperandBase.cpp",
197+
"compute_kernel_writer/prototype/src/TensorInfo.cpp",
198+
"compute_kernel_writer/prototype/src/TensorOperand.cpp",
199+
"compute_kernel_writer/prototype/src/TensorTileSampler.cpp",
200+
"compute_kernel_writer/prototype/src/TileInfo.cpp",
201+
"compute_kernel_writer/prototype/src/TileOperand.cpp",
188202
"src/c/AclContext.cpp",
189203
"src/c/AclOperator.cpp",
190204
"src/c/AclQueue.cpp",
@@ -313,9 +327,9 @@ cc_library_static {
313327
"src/core/NEON/kernels/arm_conv/depthwise/depthwise_strategies_common.cpp",
314328
"src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp",
315329
"src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp",
316-
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp",
317330
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/generic.cpp",
318331
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/generic_quantized_dot_product.cpp",
332+
"src/core/NEON/kernels/arm_conv/depthwise/premultiply.cpp",
319333
"src/core/NEON/kernels/arm_conv/pooling/kernels/cpp_nhwc_1x1_stride_any_depthfirst/generic.cpp",
320334
"src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp",
321335
"src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp",
@@ -363,7 +377,6 @@ cc_library_static {
363377
"src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_4x4_3x3.cpp",
364378
"src/core/NEON/kernels/convolution/winograd/output_transforms_fp16.cpp",
365379
"src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp",
366-
"src/core/NEON/kernels/convolution/winograd/padding.cpp",
367380
"src/core/NEON/kernels/convolution/winograd/weight_transforms/arm_fp32_2x2_3x3.cpp",
368381
"src/core/NEON/kernels/convolution/winograd/weight_transforms/arm_fp32_2x2_5x5.cpp",
369382
"src/core/NEON/kernels/convolution/winograd/weight_transforms/arm_fp32_4x4_3x3.cpp",
@@ -384,8 +397,14 @@ cc_library_static {
384397
"src/core/Version.cpp",
385398
"src/core/helpers/SoftmaxHelpers.cpp",
386399
"src/core/helpers/WindowHelpers.cpp",
400+
"src/core/utils/ActivationFunctionUtils.cpp",
387401
"src/core/utils/AssemblyUtils.cpp",
402+
"src/core/utils/DataLayoutUtils.cpp",
403+
"src/core/utils/DataTypeUtils.cpp",
404+
"src/core/utils/FormatUtils.cpp",
405+
"src/core/utils/InterpolationPolicyUtils.cpp",
388406
"src/core/utils/ScaleUtils.cpp",
407+
"src/core/utils/StringUtils.cpp",
389408
"src/core/utils/helpers/fft.cpp",
390409
"src/core/utils/helpers/tensor_transform.cpp",
391410
"src/core/utils/io/FileHandler.cpp",
@@ -604,6 +623,7 @@ cc_library_static {
604623
"src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp",
605624
"src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp",
606625
"src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp",
626+
"src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp",
607627
"src/dynamic_fusion/sketch/attributes/CastAttributes.cpp",
608628
"src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp",
609629
"src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp",
@@ -620,6 +640,16 @@ cc_library_static {
620640
"src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp",
621641
"src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp",
622642
"src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp",
643+
"src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp",
644+
"src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp",
645+
"src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp",
646+
"src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp",
647+
"src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp",
648+
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp",
649+
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp",
650+
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp",
651+
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp",
652+
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp",
623653
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp",
624654
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp",
625655
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp",
@@ -700,6 +730,7 @@ cc_library_static {
700730
"src/gpu/cl/kernels/ClIndirectConv2dKernel.cpp",
701731
"src/gpu/cl/kernels/ClMatMulLowpNativeKernel.cpp",
702732
"src/gpu/cl/kernels/ClMatMulNativeKernel.cpp",
733+
"src/gpu/cl/kernels/ClMatMulNativeMMULKernel.cpp",
703734
"src/gpu/cl/kernels/ClMulKernel.cpp",
704735
"src/gpu/cl/kernels/ClPermuteKernel.cpp",
705736
"src/gpu/cl/kernels/ClPool2dKernel.cpp",
@@ -1020,7 +1051,6 @@ cc_library_static {
10201051
srcs: [
10211052
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp",
10221053
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp",
1023-
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_8b_mla.cpp",
10241054
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp",
10251055
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp",
10261056
"src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",

BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,7 @@ cc_library(
382382
}),
383383
includes = [
384384
"arm_compute/runtime",
385+
"src/core/NEON/kernels/arm_gemm",
385386
"src/core/NEON/kernels/assembly",
386387
"src/core/NEON/kernels/convolution/common",
387388
"src/core/NEON/kernels/convolution/winograd",

CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
2828
list(APPEND CMAKE_MESSAGE_CONTEXT ArmCompute)
2929
project(
3030
ArmCompute
31-
VERSION 31.0.1
31+
VERSION 32.0.0
3232
DESCRIPTION
3333
"The Arm Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A CPU and Arm® Mali™ GPU architectures"
3434
LANGUAGES C CXX ASM)
@@ -197,6 +197,7 @@ target_include_directories(
197197
${CMAKE_CURRENT_SOURCE_DIR}
198198
PRIVATE src
199199
src/cpu/kernels/assembly
200+
src/core/NEON/kernels/arm_gemm
200201
src/core/NEON/kernels/assembly
201202
src/core/NEON/kernels/convolution/common
202203
src/core/NEON/kernels/arm_conv/depthwise
@@ -223,6 +224,7 @@ target_include_directories(
223224
${CMAKE_CURRENT_SOURCE_DIR}
224225
PRIVATE src
225226
src/cpu/kernels/assembly
227+
src/core/NEON/kernels/arm_gemm
226228
src/core/NEON/kernels/assembly
227229
src/core/NEON/kernels/convolution/common
228230
src/core/NEON/kernels/arm_conv/depthwise

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Please read https://arm-software.github.io/ComputeLibrary/v23.02.1/contribution_guidelines.xhtml
1+
Please read https://arm-software.github.io/ComputeLibrary/latest/contribution_guidelines.xhtml
22

33
Here on github we only publish a snapshot of the main development branch for each release, that's the reason why we don't accept pull requests.
44

0 commit comments

Comments
 (0)