Skip to content

Commit f45d5a9

Browse files
arm_compute v18.01
Change-Id: I9bfa178c2e38bfd5fc812e62aab6760d87748e05
1 parent 6943bb0 commit f45d5a9

File tree

6,713 files changed

+194224
-133159
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

6,713 files changed

+194224
-133159
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Related projects:
99

1010
Documentation available here:
1111

12+
- [v18.01](https://arm-software.github.io/ComputeLibrary/v18.01/)
1213
- [v17.12](https://arm-software.github.io/ComputeLibrary/v17.12/)
1314
- [v17.10](https://arm-software.github.io/ComputeLibrary/v17.10/)
1415
- [v17.09](https://arm-software.github.io/ComputeLibrary/v17.09/)
@@ -19,6 +20,7 @@ Documentation available here:
1920

2021
Binaries available here:
2122

23+
- [v18.01](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.01/arm_compute-v18.01-bin.tar.gz)
2224
- [v17.12](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.12/arm_compute-v17.12-bin.tar.gz)
2325
- [v17.10](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.10/arm_compute-v17.10-bin.tar.gz)
2426
- [v17.09](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.09/arm_compute-v17.09-bin.tar.gz)

SConscript

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ import os.path
2424
import re
2525
import subprocess
2626

27-
VERSION = "v17.12"
28-
SONAME_VERSION="6.0.0"
27+
VERSION = "v18.01"
28+
SONAME_VERSION="7.0.0"
2929

3030
Import('env')
3131
Import('vars')
@@ -175,6 +175,11 @@ if env['neon']:
175175
core_files += Glob('src/core/NEON/*.cpp')
176176
core_files += Glob('src/core/NEON/kernels/*.cpp')
177177

178+
# build winograd sources for either v7a / v8a
179+
core_files += Glob('src/core/NEON/kernels/winograd/*.cpp')
180+
core_files += Glob('src/core/NEON/kernels/winograd/transforms/*.cpp')
181+
arm_compute_env.Append(CPPPATH = ["arm_compute/core/NEON/kernels/winograd/"])
182+
178183
if env['arch'] == "armv7a":
179184
core_files += Glob('src/core/NEON/kernels/arm32/*.cpp')
180185

@@ -235,7 +240,7 @@ if env['neon'] and env['opencl']:
235240
Export('arm_compute_graph_a')
236241

237242
arm_compute_env.Append(LIBPATH = ["#build/%s/opencl-1.2-stubs" % env['build_dir']])
238-
arm_compute_graph_so = build_library('arm_compute_graph', shared_graph_objects, static=False, libs = [ "arm_compute", "arm_compute_core", "OpenCL" ])
243+
arm_compute_graph_so = build_library('arm_compute_graph', shared_graph_objects, static=False, libs = [ "arm_compute", "arm_compute_core"])
239244
Depends(arm_compute_graph_so, arm_compute_so)
240245
Depends(arm_compute_graph_so, opencl)
241246
Export('arm_compute_graph_so')

SConstruct

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ vars.AddVariables(
4949
BoolVariable("opencl", "Enable OpenCL support", True),
5050
BoolVariable("neon", "Enable Neon support", False),
5151
BoolVariable("gles_compute", "Enable OpenGL ES Compute Shader support", False),
52-
BoolVariable("embed_kernels", "Embed OpenCL kernels and OpenGL ES compute shaders in library binary", False),
52+
BoolVariable("embed_kernels", "Embed OpenCL kernels and OpenGL ES compute shaders in library binary", True),
5353
BoolVariable("set_soname", "Set the library's soname and shlibversion (requires SCons 2.4 or above)", False),
5454
BoolVariable("openmp", "Enable OpenMP backend", False),
5555
BoolVariable("cppthreads", "Enable C++11 threads backend", True),
@@ -86,7 +86,15 @@ env.Append(CXXFLAGS = ['-Wno-deprecated-declarations','-Wall','-DARCH_ARM',
8686

8787
env.Append(CPPDEFINES = ['_GLIBCXX_USE_NANOSLEEP'])
8888

89-
if os.environ.get('CXX', 'g++') == 'clang++':
89+
default_cpp_compiler = 'g++' if env['os'] != 'android' else 'clang++'
90+
default_c_compiler = 'gcc' if env['os'] != 'android' else 'clang'
91+
cpp_compiler = os.environ.get('CXX', default_cpp_compiler)
92+
c_compiler = os.environ.get('CC', default_c_compiler)
93+
94+
if env['os'] == 'android' and ( cpp_compiler != 'clang++' or c_compiler != 'clang'):
95+
print "WARNING: Only clang is officially supported to build the Compute Library for Android"
96+
97+
if cpp_compiler == 'clang++':
9098
env.Append(CXXFLAGS = ['-Wno-format-nonliteral','-Wno-deprecated-increment-bool','-Wno-vla-extension','-Wno-mismatched-tags'])
9199
else:
92100
env.Append(CXXFLAGS = ['-Wlogical-op','-Wnoexcept','-Wstrict-null-sentinel'])
@@ -95,7 +103,7 @@ if env['cppthreads']:
95103
env.Append(CPPDEFINES = [('ARM_COMPUTE_CPP_SCHEDULER', 1)])
96104

97105
if env['openmp']:
98-
if os.environ.get('CXX', 'g++') == 'clang++':
106+
if cpp_compiler == 'clang++':
99107
print "Clang does not support OpenMP. Use scheduler=cpp."
100108
Exit(1)
101109

@@ -128,7 +136,7 @@ elif env['arch'] == 'arm64-v8a':
128136
elif env['arch'] == 'arm64-v8.2-a':
129137
env.Append(CPPDEFINES = ['ARM_COMPUTE_AARCH64_V8_2'])
130138

131-
if os.environ.get('CXX', 'g++') == 'clang++':
139+
if cpp_compiler == 'clang++':
132140
env.Append(CXXFLAGS = ['-fno-integrated-as'])
133141

134142
if env['os'] == 'linux':
@@ -147,8 +155,8 @@ elif env['arch'] == 'x86_64':
147155
if env['build'] == 'native':
148156
prefix = ""
149157

150-
env['CC'] = prefix + os.environ.get('CC', 'gcc')
151-
env['CXX'] = prefix + os.environ.get('CXX', 'g++')
158+
env['CC'] = prefix + c_compiler
159+
env['CXX'] = prefix + cpp_compiler
152160
env['LD'] = prefix + "ld"
153161
env['AS'] = prefix + "as"
154162
env['AR'] = prefix + "ar"
@@ -161,7 +169,7 @@ if not GetOption("help"):
161169
print("ERROR: Compiler '%s' not found" % env['CXX'])
162170
Exit(1)
163171

164-
if os.environ.get('CXX','g++') == 'g++':
172+
if cpp_compiler == 'g++':
165173
if env['arch'] == 'arm64-v8.2-a' and not version_at_least(compiler_ver, '6.2.1'):
166174
print "GCC 6.2.1 or newer is required to compile armv8.2-a code"
167175
Exit(1)

arm_compute/core/CL/CLKernelLibrary.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016, 2017 ARM Limited.
2+
* Copyright (c) 2016-2018 ARM Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -286,6 +286,14 @@ class CLKernelLibrary
286286
*/
287287
cl::NDRange default_ndrange() const;
288288

289+
/** Clear the library's cache of binary programs
290+
*/
291+
void clear_programs_cache()
292+
{
293+
_programs_map.clear();
294+
_built_programs_map.clear();
295+
}
296+
289297
private:
290298
/** Load program and its dependencies.
291299
*

arm_compute/core/CL/CLKernels.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016, 2017 ARM Limited.
2+
* Copyright (c) 2016-2018 ARM Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -42,6 +42,7 @@
4242
#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
4343
#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h"
4444
#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
45+
#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
4546
#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
4647
#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
4748
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h"
@@ -87,6 +88,7 @@
8788
#include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h"
8889
#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
8990
#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h"
91+
#include "arm_compute/core/CL/kernels/CLPermuteKernel.h"
9092
#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
9193
#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h"
9294
#include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h"

arm_compute/core/CL/OpenCL.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ class CLSymbols final
5959
#define DECLARE_FUNCTION_PTR(func_name) \
6060
std::function<decltype(func_name)> func_name##_ptr = nullptr
6161

62+
DECLARE_FUNCTION_PTR(clCreateContextFromType);
63+
DECLARE_FUNCTION_PTR(clCreateCommandQueue);
64+
DECLARE_FUNCTION_PTR(clGetContextInfo);
6265
DECLARE_FUNCTION_PTR(clBuildProgram);
6366
DECLARE_FUNCTION_PTR(clEnqueueNDRangeKernel);
6467
DECLARE_FUNCTION_PTR(clSetKernelArg);
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/*
2+
* Copyright (c) 2017, 2018 ARM Limited.
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to
8+
* deal in the Software without restriction, including without limitation the
9+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10+
* sell copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*/
24+
#ifndef __ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H__
25+
#define __ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H__
26+
27+
#include "arm_compute/core/CL/ICLKernel.h"
28+
29+
namespace arm_compute
30+
{
31+
class ICLTensor;
32+
33+
/** Interface for the Deconvolution layer kernel on OpenCL.
34+
*/
35+
class CLDeconvolutionLayerUpsampleKernel : public ICLKernel
36+
{
37+
public:
38+
/** Constructor */
39+
CLDeconvolutionLayerUpsampleKernel();
40+
/** Prevent instances of this class from being copied (As this class contains pointers) */
41+
CLDeconvolutionLayerUpsampleKernel(const CLDeconvolutionLayerUpsampleKernel &) = delete;
42+
/** Prevent instances of this class from being copied (As this class contains pointers) */
43+
CLDeconvolutionLayerUpsampleKernel &operator=(const CLDeconvolutionLayerUpsampleKernel &) = delete;
44+
/** Default Move Constructor. */
45+
CLDeconvolutionLayerUpsampleKernel(CLDeconvolutionLayerUpsampleKernel &&) = default;
46+
/** Default move assignment operator. */
47+
CLDeconvolutionLayerUpsampleKernel &operator=(CLDeconvolutionLayerUpsampleKernel &&) = default;
48+
/** Default destructor */
49+
~CLDeconvolutionLayerUpsampleKernel() = default;
50+
51+
/** Initialise the kernel's input and output.
52+
*
53+
* @param[in] input Source tensor. Data types supported: F32.
54+
* @param[out] output Destination tensor. Data types supported: F32. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
55+
* @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled with zero.
56+
* @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
57+
*/
58+
void configure(const ICLTensor *input, ICLTensor *output, const BorderSize &inner_border, const PadStrideInfo &info);
59+
/** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
60+
*
61+
* @param[in] input Source tensor info. Data types supported: F32.
62+
* @param[in] output Destination tensor info. Data types supported: F32. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
63+
* @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled with zero.
64+
* @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
65+
*
66+
* @return a status
67+
*/
68+
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const BorderSize &inner_border, const PadStrideInfo &info);
69+
70+
// Inherited methods overridden:
71+
void run(const Window &window, cl::CommandQueue &queue) override;
72+
73+
private:
74+
const ICLTensor *_input;
75+
ICLTensor *_output;
76+
BorderSize _inner_border;
77+
PadStrideInfo _info;
78+
};
79+
} // namespace arm_compute
80+
#endif /*__ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H__ */

arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,14 @@ class CLGEMMInterleave4x4Kernel : public ICLKernel
6868
* @param[out] output Output tensor. Data type supported: same as @p input
6969
*/
7070
void configure(const ICLTensor *input, ICLTensor *output);
71+
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMInterleave4x4Kernel
72+
*
73+
* @param[in] input Input tensor info. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
74+
* @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input.
75+
*
76+
* @return a status
77+
*/
78+
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
7179

7280
// Inherited methods overridden
7381
void run(const Window &window, cl::CommandQueue &queue) override;

arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,16 @@ class CLGEMMLowpMatrixMultiplyKernel : public ICLKernel
6161
* @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel
6262
*/
6363
void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, bool is_interleaved_transposed = true);
64+
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyKernel
65+
*
66+
* @param[in] input0 Input tensor info containing the interleaved Matrix A. Data type supported: QASYMM8
67+
* @param[in] input1 Input tensor info containing the transposed Matrix B. Data type supported: same as @p input0
68+
* @param[in] output Output tensor info to store the result of matrix multiplication. Data type supported: S32
69+
* @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel
70+
*
71+
* @return a status
72+
*/
73+
static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, bool is_interleaved_transposed = true);
6474

6575
// Inherited methods overridden:
6676
void run(const Window &window, cl::CommandQueue &queue) override;

arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,19 @@ class CLGEMMLowpOffsetContributionKernel : public ICLKernel
6868
* @param[in] b_offset Offset to be added to each element of the matrix B.
6969
*/
7070
void configure(ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, int32_t k, int32_t a_offset, int32_t b_offset);
71+
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel
72+
*
73+
* @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32
74+
* @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
75+
* Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
76+
* @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
77+
* Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
78+
* @param[in] a_offset Offset to be added to each element of the matrix A.
79+
* @param[in] b_offset Offset to be added to each element of the matrix B.
80+
*
81+
* @return a status
82+
*/
83+
static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, int32_t a_offset, int32_t b_offset);
7184

7285
// Inherited methods overridden:
7386
void run(const Window &window, cl::CommandQueue &queue) override;

0 commit comments

Comments
 (0)