Skip to content

Commit bf8b01d

Browse files
KaizenAnthonyBarbier
authored andcommitted
arm_compute v17.10
Change-Id: If1489af40eccd0219ede8946577afbf04db31b29
1 parent 8938bd3 commit bf8b01d

File tree

4,335 files changed

+78187
-40390
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

4,335 files changed

+78187
-40390
lines changed

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Related projects:
99

1010
Documentation available here:
1111

12+
- [v17.10](https://arm-software.github.io/ComputeLibrary/v17.10/)
1213
- [v17.09](https://arm-software.github.io/ComputeLibrary/v17.09/)
1314
- [v17.06](https://arm-software.github.io/ComputeLibrary/v17.06/)
1415
- [v17.05](https://arm-software.github.io/ComputeLibrary/v17.05/)
@@ -17,6 +18,7 @@ Documentation available here:
1718

1819
Binaries available here:
1920

21+
- [v17.10](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.10/arm_compute-v17.10-bin.tar.gz)
2022
- [v17.09](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.09/arm_compute-v17.09-bin.tar.gz)
2123
- [v17.06](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.06/arm_compute-v17.06-bin.tar.gz)
2224
- [v17.05](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.05/arm_compute-v17.05-bin.tar.gz)

SConscript

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,18 @@ import os.path
2424
import re
2525
import subprocess
2626

27-
VERSION = "v17.09"
28-
SONAME_VERSION="4.0.0"
27+
VERSION = "v17.10"
28+
SONAME_VERSION="5.0.0"
2929

3030
Import('env')
3131
Import('vars')
3232

33-
def build_library(name, sources, static=False):
33+
def build_library(name, sources, static=False, libs=[]):
3434
if static:
35-
obj = arm_compute_env.StaticLibrary(name, source=sources)
35+
obj = arm_compute_env.StaticLibrary(name, source=sources, LIBS = arm_compute_env["LIBS"] + libs)
3636
else:
3737
if env['set_soname']:
38-
obj = arm_compute_env.SharedLibrary(name, source=sources, SHLIBVERSION = SONAME_VERSION)
38+
obj = arm_compute_env.SharedLibrary(name, source=sources, SHLIBVERSION = SONAME_VERSION, LIBS = arm_compute_env["LIBS"] + libs)
3939

4040
symlinks = []
4141
# Manually delete symlinks or SCons will get confused:
@@ -51,7 +51,7 @@ def build_library(name, sources, static=False):
5151
Default(clean)
5252
Depends(obj, clean)
5353
else:
54-
obj = arm_compute_env.SharedLibrary(name, source=sources)
54+
obj = arm_compute_env.SharedLibrary(name, source=sources, LIBS = arm_compute_env["LIBS"] + libs)
5555

5656
Default(obj)
5757
return obj
@@ -191,11 +191,12 @@ if env['os'] != 'bare_metal' and not env['standalone']:
191191
shared_runtime_objects = [arm_compute_env.SharedObject(f) for f in runtime_files]
192192
static_runtime_objects = [arm_compute_env.StaticObject(f) for f in runtime_files]
193193

194-
arm_compute_a = build_library('arm_compute-static', static_core_objects + static_runtime_objects, static=True)
194+
arm_compute_a = build_library('arm_compute-static', static_runtime_objects, static=True, libs = [ arm_compute_core_a ])
195195
Export('arm_compute_a')
196196

197197
if env['os'] != 'bare_metal' and not env['standalone']:
198-
arm_compute_so = build_library('arm_compute', shared_core_objects + shared_runtime_objects, static=False)
198+
arm_compute_so = build_library('arm_compute', shared_runtime_objects, static=False, libs = [ "arm_compute_core" ])
199+
Depends(arm_compute_so, arm_compute_core_so)
199200
Export('arm_compute_so')
200201

201202
if env['neon'] and env['opencl']:
@@ -208,10 +209,11 @@ if env['neon'] and env['opencl']:
208209
shared_graph_objects = [arm_compute_env.SharedObject(f) for f in graph_files]
209210
static_graph_objects = [arm_compute_env.StaticObject(f) for f in graph_files]
210211

211-
arm_compute_graph_a = build_library('arm_compute_graph-static', static_core_objects + static_runtime_objects + static_graph_objects, static=True)
212+
arm_compute_graph_a = build_library('arm_compute_graph-static', static_graph_objects, static=True, libs = [ arm_compute_a ])
212213
Export('arm_compute_graph_a')
213214

214-
arm_compute_graph_so = build_library('arm_compute_graph', shared_core_objects + shared_runtime_objects + shared_graph_objects, static=False)
215+
arm_compute_graph_so = build_library('arm_compute_graph', shared_graph_objects, static=False, libs = [ "arm_compute", "arm_compute_core" ])
216+
Depends( arm_compute_graph_so, arm_compute_so)
215217
Export('arm_compute_graph_so')
216218

217219
graph_alias = arm_compute_env.Alias("arm_compute_graph", [arm_compute_graph_a, arm_compute_graph_so])

SConstruct

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ vars.AddVariables(
5656
)
5757

5858
env = Environment(platform="posix", variables=vars, ENV = os.environ)
59+
env.Append(LIBPATH = ["#build/%s" % env['build_dir']])
5960

6061
SConsignFile('build/.%s' % env['build_dir'])
6162

@@ -107,7 +108,7 @@ if env['arch'] == 'armv7a':
107108
prefix = "arm-linux-gnueabihf-"
108109
env.Append(CXXFLAGS = ['-mfloat-abi=hard'])
109110
elif env['os'] == 'bare_metal':
110-
prefix = "arm-none-eabi-"
111+
prefix = "arm-eabi-"
111112
env.Append(CXXFLAGS = ['-mfloat-abi=hard'])
112113
elif env['os'] == 'android':
113114
prefix = "arm-linux-androideabi-"
@@ -118,7 +119,7 @@ elif env['arch'] == 'arm64-v8a':
118119
if env['os'] == 'linux':
119120
prefix = "aarch64-linux-gnu-"
120121
elif env['os'] == 'bare_metal':
121-
prefix = "aarch64-none-elf-"
122+
prefix = "aarch64-elf-"
122123
elif env['os'] == 'android':
123124
prefix = "aarch64-linux-android-"
124125
elif env['arch'] == 'arm64-v8.2-a':
@@ -216,7 +217,7 @@ SConscript('./SConscript', variant_dir='#build/%s' % env['build_dir'], duplicate
216217
if env['opencl']:
217218
SConscript("./opencl-1.2-stubs/SConscript", variant_dir="build/%s/opencl-1.2-stubs" % env['build_dir'], duplicate=0)
218219

219-
if env['examples']:
220+
if env['examples'] and env['os'] != 'bare_metal':
220221
SConscript('./examples/SConscript', variant_dir='#build/%s/examples' % env['build_dir'], duplicate=0)
221222

222223
if env['os'] != 'bare_metal':

arm_compute/core/CL/CLKernelLibrary.h

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -162,11 +162,9 @@ class CLKernelLibrary
162162
*/
163163
void init(std::string kernel_path = ".", cl::Context context = cl::Context::getDefault(), cl::Device device = cl::Device::getDefault())
164164
{
165-
_kernel_path = std::move(kernel_path);
166-
_context = std::move(context);
167-
_device = std::move(device);
168-
_max_workgroup_size = 0;
169-
max_local_workgroup_size();
165+
_kernel_path = std::move(kernel_path);
166+
_context = std::move(context);
167+
_device = std::move(device);
170168
}
171169
/** Sets the path that the kernels reside in.
172170
*
@@ -208,20 +206,15 @@ class CLKernelLibrary
208206
{
209207
_device = cl_devices[0];
210208
}
211-
212-
_max_workgroup_size = 0;
213-
max_local_workgroup_size();
214-
};
209+
}
215210
/** Sets the CL device for which the programs are created.
216211
*
217212
* @param[in] device A CL device.
218213
*/
219214
void set_device(cl::Device device)
220215
{
221-
_device = std::move(device);
222-
_max_workgroup_size = 0;
223-
max_local_workgroup_size();
224-
};
216+
_device = std::move(device);
217+
}
225218
/** Creates a kernel from the kernel library.
226219
*
227220
* @param[in] kernel_name Kernel name.
@@ -238,15 +231,14 @@ class CLKernelLibrary
238231
*
239232
*/
240233
void load_binary();
241-
/** Find the maximum number of local work items in a workgroup can be supported by the device
234+
/** Find the maximum number of local work items in a workgroup can be supported for the kernel.
242235
*
243236
*/
244-
size_t max_local_workgroup_size();
245-
246-
/** Return the default NDRange that is suitable for the device.
237+
size_t max_local_workgroup_size(const cl::Kernel &kernel) const;
238+
/** Return the default NDRange for the device.
247239
*
248240
*/
249-
cl::NDRange default_ndrange();
241+
cl::NDRange default_ndrange() const;
250242

251243
private:
252244
/** Load program and its dependencies.
@@ -270,7 +262,6 @@ class CLKernelLibrary
270262
static const std::map<std::string, std::string> _kernel_program_map; /**< Map that associates kernel names with programs. */
271263
static const std::map<std::string, std::string> _program_source_map; /**< Contains sources for all programs.
272264
Used for compile-time kernel inclusion. >*/
273-
size_t _max_workgroup_size; /** Maximum local workgroup size supported on the device */
274265
};
275266
}
276267
#endif /* __ARM_COMPUTE_CLKERNELLIBRARY_H__ */

arm_compute/core/CL/ICLKernel.h

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,12 @@ class ICLKernel : public IKernel
175175
*/
176176
GPUTarget get_target() const;
177177

178+
/** Get the maximum workgroup size for the device the CLKernelLibrary uses.
179+
*
180+
* @return The maximum workgroup size value.
181+
*/
182+
size_t get_max_workgroup_size();
183+
178184
private:
179185
/** Add the passed array's parameters to the object's kernel's arguments starting from the index idx.
180186
*
@@ -208,10 +214,11 @@ class ICLKernel : public IKernel
208214
unsigned int num_arguments_per_tensor() const;
209215

210216
protected:
211-
cl::Kernel _kernel; /**< OpenCL kernel to run */
212-
cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */
213-
GPUTarget _target; /**< The targeted GPU */
214-
std::string _config_id; /**< Configuration ID */
217+
cl::Kernel _kernel; /**< OpenCL kernel to run */
218+
cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */
219+
GPUTarget _target; /**< The targeted GPU */
220+
std::string _config_id; /**< Configuration ID */
221+
size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */
215222
};
216223

217224
/** Add the kernel to the command queue with the given window.
@@ -223,7 +230,7 @@ class ICLKernel : public IKernel
223230
* @param[in,out] queue OpenCL command queue.
224231
* @param[in] kernel Kernel to enqueue
225232
* @param[in] window Window the kernel has to process.
226-
* @param[in] lws_hint Local workgroup size requested, by default (128,1)
233+
* @param[in] lws_hint Local workgroup size requested, by default (128,1).
227234
*
228235
* @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed.
229236
*/

arm_compute/core/CL/OpenCL.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ class CLSymbols final
8383
using clGetDeviceInfo_func = cl_int (*)(cl_device_id, cl_device_info, size_t, void *, size_t *);
8484
using clGetDeviceIDs_func = cl_int (*)(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *);
8585
using clRetainEvent_func = cl_int (*)(cl_event);
86+
using clGetPlatformIDs_func = cl_int (*)(cl_uint, cl_platform_id *, cl_uint *);
87+
using clGetKernelWorkGroupInfo_func = cl_int (*)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *);
8688

8789
clBuildProgram_func clBuildProgram = nullptr;
8890
clEnqueueNDRangeKernel_func clEnqueueNDRangeKernel = nullptr;
@@ -113,6 +115,8 @@ class CLSymbols final
113115
clGetDeviceInfo_func clGetDeviceInfo = nullptr;
114116
clGetDeviceIDs_func clGetDeviceIDs = nullptr;
115117
clRetainEvent_func clRetainEvent = nullptr;
118+
clGetPlatformIDs_func clGetPlatformIDs = nullptr;
119+
clGetKernelWorkGroupInfo_func clGetKernelWorkGroupInfo = nullptr;
116120

117121
private:
118122
std::pair<bool, bool> _loaded{ false, false };

arm_compute/core/Logger.h

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
* Copyright (c) 2017 ARM Limited.
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to
8+
* deal in the Software without restriction, including without limitation the
9+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10+
* sell copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*/
24+
25+
#ifndef __ARM_COMPUTE_LOGGER_H__
26+
#define __ARM_COMPUTE_LOGGER_H__
27+
28+
#include <iostream>
29+
#include <memory>
30+
31+
#ifdef ARM_COMPUTE_DEBUG_ENABLED
32+
#define ARM_COMPUTE_LOG(x) (arm_compute::Logger::get().log_info() << x)
33+
#else /* ARM_COMPUTE_DEBUG_ENABLED */
34+
#define ARM_COMPUTE_LOG(...)
35+
#endif /* ARM_COMPUTE_DEBUG_ENABLED */
36+
37+
namespace arm_compute
38+
{
39+
/**< Verbosity of the logger */
40+
enum class LoggerVerbosity
41+
{
42+
NONE, /**< No info */
43+
INFO /**< Log info */
44+
};
45+
46+
/** Logger singleton class */
47+
class Logger
48+
{
49+
public:
50+
static Logger &get();
51+
void set_logger(std::ostream &ostream, LoggerVerbosity verbosity);
52+
std::ostream &log_info();
53+
54+
private:
55+
/** Default constructor */
56+
Logger();
57+
/** Allow instances of this class to be moved */
58+
Logger(Logger &&) = default;
59+
/** Prevent instances of this class from being copied (As this class contains pointers) */
60+
Logger(const Logger &) = delete;
61+
/** Prevent instances of this class from being copied (As this class contains pointers) */
62+
Logger &operator=(const Logger &) = delete;
63+
/** Allow instances of this class to be moved */
64+
Logger &operator=(Logger &&) = default;
65+
66+
std::ostream *_ostream;
67+
std::ostream _nullstream;
68+
LoggerVerbosity _verbosity;
69+
};
70+
} // arm_compute
71+
#endif /* __ARM_COMPUTE_LOGGER_H__ */

arm_compute/core/NEON/NEFixedPoint.inl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2222
* SOFTWARE.
2323
*/
24+
#include <array>
2425
#include <limits>
2526

2627
namespace arm_compute

arm_compute/core/NEON/kernels/NELKTrackerKernel.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ class NELKTrackerKernel : public INEKernel
109109
*
110110
* @return Values A11, A12, A22
111111
*/
112-
std::tuple<int, int, int> compute_spatial_gradient_matrix(const NELKInternalKeypoint &keypoint, int *bilinear_ix, int *bilinear_iy);
112+
std::tuple<int, int, int> compute_spatial_gradient_matrix(const NELKInternalKeypoint &keypoint, int32_t *bilinear_ix, int32_t *bilinear_iy);
113113
/** Compute the vector A^T * b, i.e. -sum(I_d * I_t) for d in {x,y}
114114
*
115115
* @param[in] old_keypoint Old keypoint for which gradient is computed
@@ -119,7 +119,7 @@ class NELKTrackerKernel : public INEKernel
119119
*
120120
* @return Values b1, b2
121121
*/
122-
std::pair<int, int> compute_image_mismatch_vector(const NELKInternalKeypoint &old_keypoint, const NELKInternalKeypoint &new_keypoint, const int *bilinear_ix, const int *bilinear_iy);
122+
std::pair<int, int> compute_image_mismatch_vector(const NELKInternalKeypoint &old_keypoint, const NELKInternalKeypoint &new_keypoint, const int32_t *bilinear_ix, const int32_t *bilinear_iy);
123123

124124
const ITensor *_input_old;
125125
const ITensor *_input_new;

arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ class NEPoolingLayerKernel : public INEKernel
4747
/** Default destructor */
4848
~NEPoolingLayerKernel() = default;
4949
/** Set the input and output tensors.
50+
*
51+
* @note QS8, QS16 and F16 are supported for pool sizes 2 and 3 only
5052
*
5153
* @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32.
5254
* @param[out] output Destination tensor. Data types supported: Same as @p input.
@@ -123,6 +125,13 @@ class NEPoolingLayerKernel : public INEKernel
123125
*/
124126
template <PoolingType pooling_type>
125127
void pooling7_f32(const Window &window_input, const Window &window);
128+
/** Function to perform NxN pooling.
129+
*
130+
* @param[in] window_input Input region on which to execute the kernel.
131+
* @param[in] window Output region on which to execute the kernel.
132+
*/
133+
template <PoolingType pooling_type>
134+
void poolingN_f32(const Window &window_input, const Window &window);
126135
/** Common signature for all the specialised Pooling functions
127136
*
128137
* @param[in] window_input Input region on which to execute the kernel.

0 commit comments

Comments
 (0)