Skip to content

Commit a53ffdc

Browse files
author
Jenkins
committed
Compute Library v24.05
1 parent 4fda7a8 commit a53ffdc

File tree

85 files changed

+6114
-2664
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+6114
-2664
lines changed

Android.bp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ opencl_srcs = [
6565
"src/core/CL/cl_kernels/common/roi_align_layer.cl",
6666
"src/core/CL/cl_kernels/common/roi_align_layer_quantized.cl",
6767
"src/core/CL/cl_kernels/common/roi_pooling_layer.cl",
68+
"src/core/CL/cl_kernels/common/scatter.cl",
6869
"src/core/CL/cl_kernels/common/select.cl",
6970
"src/core/CL/cl_kernels/common/slice_ops.cl",
7071
"src/core/CL/cl_kernels/common/softmax_layer.cl",
@@ -488,6 +489,8 @@ cc_library_static {
488489
"src/cpu/kernels/depthwiseconv2d/generic/neon/impl.cpp",
489490
"src/cpu/kernels/depthwiseconv2d/generic/neon/qasymm8.cpp",
490491
"src/cpu/kernels/depthwiseconv2d/generic/neon/qasymm8_signed.cpp",
492+
"src/cpu/kernels/dequantize/generic/neon/fp16.cpp",
493+
"src/cpu/kernels/dequantize/generic/neon/fp32.cpp",
491494
"src/cpu/kernels/directconv2d/nchw/all.cpp",
492495
"src/cpu/kernels/directconv2d/nchw/fp16.cpp",
493496
"src/cpu/kernels/directconv2d/nhwc/neon/fp16.cpp",
@@ -553,9 +556,17 @@ cc_library_static {
553556
"src/cpu/kernels/pool3d/neon/fp32.cpp",
554557
"src/cpu/kernels/pool3d/neon/qasymm8.cpp",
555558
"src/cpu/kernels/pool3d/neon/qasymm8_signed.cpp",
559+
"src/cpu/kernels/quantize/generic/neon/fp16.cpp",
560+
"src/cpu/kernels/quantize/generic/neon/fp32.cpp",
561+
"src/cpu/kernels/quantize/generic/neon/integer.cpp",
556562
"src/cpu/kernels/range/generic/neon/fp16.cpp",
557563
"src/cpu/kernels/range/generic/neon/fp32.cpp",
558564
"src/cpu/kernels/range/generic/neon/integer.cpp",
565+
"src/cpu/kernels/reduction_layer/generic/neon/fp16.cpp",
566+
"src/cpu/kernels/reduction_layer/generic/neon/fp32.cpp",
567+
"src/cpu/kernels/reduction_layer/generic/neon/integer.cpp",
568+
"src/cpu/kernels/reduction_layer/generic/neon/qasymm8.cpp",
569+
"src/cpu/kernels/reduction_layer/generic/neon/qasymm8_signed.cpp",
559570
"src/cpu/kernels/roialign/generic/neon/fp16.cpp",
560571
"src/cpu/kernels/roialign/generic/neon/fp32.cpp",
561572
"src/cpu/kernels/roialign/generic/neon/qasymm8.cpp",

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
2828
list(APPEND CMAKE_MESSAGE_CONTEXT ArmCompute)
2929
project(
3030
ArmCompute
31-
VERSION 36.0.0
31+
VERSION 37.0.0
3232
DESCRIPTION
3333
"The Arm Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A CPU and Arm® Mali™ GPU architectures"
3434
LANGUAGES C CXX ASM)

README.md

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
<img src="https://raw.githubusercontent.com/ARM-software/ComputeLibrary/gh-pages/ACL_logo.png"/><br><br>
1010
</div>
1111

12-
# Compute Library ![](https://img.shields.io/badge/latest_release-24.04-green)
12+
# Compute Library ![](https://img.shields.io/badge/latest_release-24.05-green)
1313

1414

1515
The Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A, Arm® Neoverse® and Arm® Mali™ GPUs architectures.<br>
@@ -37,7 +37,7 @@ Key Features:
3737
<br>
3838

3939
## Documentation
40-
[![Documentation](https://img.shields.io/badge/documentation-24.04-green)](https://arm-software.github.io/ComputeLibrary/latest)
40+
[![Documentation](https://img.shields.io/badge/documentation-24.05-green)](https://arm-software.github.io/ComputeLibrary/latest)
4141

4242
> Note: The documentation includes the reference API, changelogs, build guide, contribution guide, errata, etc.
4343
@@ -50,24 +50,24 @@ All the binaries can be downloaded from [here](https://github.com/ARM-software/C
5050

5151
| Platform | Operating System | Release archive (Download) |
5252
| -------------- | ---------------- | -------------------------- |
53-
| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-armv7a-neon.tar.gz) |
54-
| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-neon.tar.gz) |
55-
| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-neon-cl.tar.gz) |
56-
| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-neon-cl.tar.gz) |
53+
| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-armv7a-neon.tar.gz) |
54+
| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-neon.tar.gz) |
55+
| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-neon-cl.tar.gz) |
56+
| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-neon-cl.tar.gz) |
5757

5858
<br>
5959

6060
| Architecture | Operating System | Release archive (Download) |
6161
| ------------ | ---------------- | -------------------------- |
62-
| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-armv7a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-armv7a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-armv7a-neon-cl.tar.gz) |
63-
| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-android-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-android-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-android-arm64-v8a-neon-cl.tar.gz) |
64-
| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8a-neon-cl.tar.gz) |
65-
| arm64-v8.2-a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-android-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-android-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-android-arm64-v8.2-a-neon-cl.tar.gz) |
66-
| arm64-v8.2-a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.04/arm_compute-v24.04-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) |
62+
| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-armv7a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-armv7a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-armv7a-neon-cl.tar.gz) |
63+
| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-android-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-android-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-android-arm64-v8a-neon-cl.tar.gz) |
64+
| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8a-neon-cl.tar.gz) |
65+
| arm64-v8.2-a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-android-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-android-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-android-arm64-v8.2-a-neon-cl.tar.gz) |
66+
| arm64-v8.2-a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8.2-a-neon.tar.gz) [![](https://img.shields.io/badge/build-opencl-blue)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8.2-a-cl.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.05/arm_compute-v24.05-bin-linux-arm64-v8.2-a-neon-cl.tar.gz) |
6767

6868
<br>
6969

70-
Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.04-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.04)
70+
Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.05-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.05)
7171

7272
Pre-build binaries are generated with the following security / good coding practices related flags:
7373
> -Wall, -Wextra, -Wformat=2, -Winit-self, -Wstrict-overflow=2, -Wswitch-default, -Woverloaded-virtual, -Wformat-security, -Wctor-dtor-privacy, -Wsign-promo, -Weffc++, -pedantic, -fstack-protector-strong

SConscript

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ import json
3232
import codecs
3333
import platform
3434

35-
VERSION = "v24.04"
36-
LIBRARY_VERSION_MAJOR = 36
35+
VERSION = "v24.05"
36+
LIBRARY_VERSION_MAJOR = 37
3737
LIBRARY_VERSION_MINOR = 0
3838
LIBRARY_VERSION_PATCH = 0
3939
SONAME_VERSION = str(LIBRARY_VERSION_MAJOR) + "." + str(LIBRARY_VERSION_MINOR) + "." + str(LIBRARY_VERSION_PATCH)
@@ -429,6 +429,7 @@ if env['opencl'] and env['embed_kernels']:
429429
'src/core/CL/cl_kernels/common/fill_border.cl',
430430
'src/core/CL/cl_kernels/common/floor.cl',
431431
'src/core/CL/cl_kernels/common/gather.cl',
432+
'src/core/CL/cl_kernels/common/scatter.cl',
432433
'src/core/CL/cl_kernels/common/gemm.cl',
433434
'src/core/CL/cl_kernels/common/gemm_reshaped_only_rhs_mmul.cl',
434435
'src/core/CL/cl_kernels/common/gemm_utils.cl',

arm_compute/core/CPP/CPPTypes.h

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2022 Arm Limited.
2+
* Copyright (c) 2017-2022, 2024 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -21,8 +21,8 @@
2121
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2222
* SOFTWARE.
2323
*/
24-
#ifndef ARM_COMPUTE_CPP_TYPES_H
25-
#define ARM_COMPUTE_CPP_TYPES_H
24+
#ifndef ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H
25+
#define ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H
2626

2727
#include "arm_compute/core/Error.h"
2828

@@ -170,6 +170,17 @@ class CPUInfo final
170170
* @return Number of CPUs
171171
*/
172172
unsigned int get_cpu_num() const;
173+
/** Return the maximum number of CPUs present excluding the little cores
174+
* in case of an Android device
175+
*
176+
* @return Number of CPUs excluding little
177+
*/
178+
unsigned int get_cpu_num_excluding_little() const;
179+
/** Return the vector length in bytes for sme2
180+
*
181+
* @return Vector length if sme2 is enabled, otherwise returns 0.
182+
*/
183+
unsigned long get_sme2_vector_length() const;
173184

174185
private:
175186
struct Impl;
@@ -184,4 +195,4 @@ struct ThreadInfo
184195
const CPUInfo *cpu_info{nullptr};
185196
};
186197
} // namespace arm_compute
187-
#endif /* ARM_COMPUTE_CPP_TYPES_H */
198+
#endif // ACL_ARM_COMPUTE_CORE_CPP_CPPTYPES_H

arm_compute/runtime/CL/functions/CLScatter.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,16 @@ class CLScatter : public IFunction
5454
/** Default destructor */
5555
~CLScatter();
5656
/** Initialise the kernel's inputs and outputs
57+
*
58+
* @note Negative indices are treated as out of bounds.
5759
*
5860
* Valid data layouts:
5961
* - All
6062
*
61-
*
6263
* @param[in] compile_context The compile context to be used.
6364
* @param[in] src Source tensor. Values used to fill output. Can be nullptr when zero initialization is true.
6465
* @param[in] updates Tensor containing values used to update output tensor. Data types supported: same as @p src
65-
* @param[in] indices Tensor containing Indices to change in the output Tensor. Data types supported : U32
66+
* @param[in] indices Tensor containing Indices to change in the output Tensor. Data types supported : S32
6667
* @param[out] output Destination tensor. Data types supported: same as @p src.
6768
* @param[in] info Scatter info object.
6869
*/
@@ -85,7 +86,7 @@ class CLScatter : public IFunction
8586
*
8687
* @param[in] src Source tensor.
8788
* @param[in] updates Tensor containing values used for updating the output Tensor. Data types supported : same as @p src
88-
* @param[in] indices Tensor containing Indices to change in the output Tensor. Data types supported : U32
89+
* @param[in] indices Tensor containing Indices to change in the output Tensor. Data types supported : S32
8990
* @param[in] output Destination tensor. Data types supported: same as @p src.
9091
* @param[in] info Scatter info containing type of scatter.
9192
*

arm_compute/runtime/OMP/OMPScheduler.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017-2021 Arm Limited.
2+
* Copyright (c) 2017-2021, 2024 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -21,8 +21,8 @@
2121
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2222
* SOFTWARE.
2323
*/
24-
#ifndef ARM_COMPUTE_OMPSCHEDULER_H
25-
#define ARM_COMPUTE_OMPSCHEDULER_H
24+
#ifndef ACL_ARM_COMPUTE_RUNTIME_OMP_OMPSCHEDULER_H
25+
#define ACL_ARM_COMPUTE_RUNTIME_OMP_OMPSCHEDULER_H
2626

2727
#include "arm_compute/runtime/IScheduler.h"
2828

@@ -79,6 +79,7 @@ class OMPScheduler final : public IScheduler
7979

8080
private:
8181
unsigned int _num_threads;
82+
unsigned int _nonlittle_num_cpus;
8283
};
8384
} // namespace arm_compute
84-
#endif /* ARM_COMPUTE_OMPSCHEDULER_H */
85+
#endif // ACL_ARM_COMPUTE_RUNTIME_OMP_OMPSCHEDULER_H

docs/Doxyfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ PROJECT_NAME = "Compute Library"
3838
# could be handy for archiving the generated documentation or if some version
3939
# control system is used.
4040

41-
PROJECT_NUMBER = 24.04
41+
PROJECT_NUMBER = 24.05
4242

4343
# Using the PROJECT_BRIEF tag one can provide an optional one line description
4444
# for a project that appears at the top of each page and should give viewer a

docs/user_guide/release_version_and_change_log.dox

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,14 @@ If there is more than one release in a month then an extra sequential number is
4141

4242
@section S2_2_changelog Changelog
4343

44+
v24.05 Public major release
45+
- Add @ref CLScatter operator for FP32/16, S32/16/8, U32/16/8 data types
46+
- Various fixes to enable FP16 kernels in armv8a multi_isa builds.
47+
- Updated logic in the OpenMP scheduler to exclude LITTLE cores.
48+
4449
v24.04 Public major release
4550
- Add Bfloat16 data type support for @ref NEMatMul.
46-
- Add support for SoftMax in SME2 for FP32 and FP16.
51+
- Add support for SoftMax in SME2 for FP32, FP16, QASYMM8 and QASYMM8_SIGNED.
4752
- Add support for in place accumulation to CPU GEMM kernels.
4853
- Add low-precision Int8 * Int8 -> FP32 CPU GEMM which dequantizes after multiplication
4954
- Add is_dynamic flag to QuantizationInfo to signal to operators that it may change after configuration

0 commit comments

Comments
 (0)