Skip to content

Commit 8938bd3

Browse files
KaizenAnthonyBarbier
authored andcommitted
arm_compute v17.09
Change-Id: I4bf8f4e6e5f84ce0d5b6f5ba570d276879f42a81
1 parent f4a254c commit 8938bd3

File tree

9,142 files changed

+660413
-194420
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

9,142 files changed

+660413
-194420
lines changed

LICENSE

Lines changed: 0 additions & 21 deletions
This file was deleted.

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,23 @@
11

22
Please report issues here: https://github.com/ARM-software/ComputeLibrary/issues
3+
Make sure you are using the latest version of the library before opening an issue. Thanks
34

45
Related projects:
56

67
- [Caffe on Compute Library](https://github.com/OAID/caffeOnACL)
8+
- [Tutorial: Cartoonifying Images on Raspberry Pi with the Compute Library](https://community.arm.com/graphics/b/blog/posts/cartoonifying-images-on-raspberry-pi-with-the-compute-library)
79

810
Documentation available here:
911

12+
- [v17.09](https://arm-software.github.io/ComputeLibrary/v17.09/)
1013
- [v17.06](https://arm-software.github.io/ComputeLibrary/v17.06/)
1114
- [v17.05](https://arm-software.github.io/ComputeLibrary/v17.05/)
1215
- [v17.04](https://arm-software.github.io/ComputeLibrary/v17.04/)
1316
- [v17.03.1](https://arm-software.github.io/ComputeLibrary/v17.03.1/)
1417

1518
Binaries available here:
1619

20+
- [v17.09](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.09/arm_compute-v17.09-bin.tar.gz)
1721
- [v17.06](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.06/arm_compute-v17.06-bin.tar.gz)
1822
- [v17.05](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.05/arm_compute-v17.05-bin.tar.gz)
1923
- [v17.04](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.04/arm_compute-v17.04-bin.tar.gz)

SConscript

Lines changed: 51 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ import os.path
2424
import re
2525
import subprocess
2626

27-
VERSION = "v17.06"
28-
SONAME_VERSION="3.0.0"
27+
VERSION = "v17.09"
28+
SONAME_VERSION="4.0.0"
2929

3030
Import('env')
3131
Import('vars')
@@ -138,22 +138,22 @@ core_files = Glob('src/core/*.cpp')
138138
core_files += Glob('src/core/CPP/*.cpp')
139139
core_files += Glob('src/core/CPP/kernels/*.cpp')
140140

141-
files = Glob('src/runtime/*.cpp')
141+
runtime_files = Glob('src/runtime/*.cpp')
142142
# CLHarrisCorners uses the Scheduler to run CPP kernels
143-
files += Glob('src/runtime/CPP/SingleThreadScheduler.cpp')
143+
runtime_files += Glob('src/runtime/CPP/SingleThreadScheduler.cpp')
144144

145145
if env['cppthreads']:
146-
files += Glob('src/runtime/CPP/CPPScheduler.cpp')
146+
runtime_files += Glob('src/runtime/CPP/CPPScheduler.cpp')
147147

148148
if env['openmp']:
149-
files += Glob('src/runtime/OMP/OMPScheduler.cpp')
149+
runtime_files += Glob('src/runtime/OMP/OMPScheduler.cpp')
150150

151151
if env['opencl']:
152152
core_files += Glob('src/core/CL/*.cpp')
153153
core_files += Glob('src/core/CL/kernels/*.cpp')
154154

155-
files += Glob('src/runtime/CL/*.cpp')
156-
files += Glob('src/runtime/CL/functions/*.cpp')
155+
runtime_files += Glob('src/runtime/CL/*.cpp')
156+
runtime_files += Glob('src/runtime/CL/functions/*.cpp')
157157

158158
# Generate embed files
159159
if env['embed_kernels']:
@@ -169,31 +169,64 @@ if env['neon']:
169169
core_files += Glob('src/core/NEON/*.cpp')
170170
core_files += Glob('src/core/NEON/kernels/*.cpp')
171171

172-
files += Glob('src/runtime/NEON/*.cpp')
173-
files += Glob('src/runtime/NEON/functions/*.cpp')
172+
if env['arch'] == "armv7a":
173+
core_files += Glob('src/core/NEON/kernels/arm32/*.cpp')
174+
175+
if "arm64-v8" in env['arch']:
176+
core_files += Glob('src/core/NEON/kernels/arm64/*.cpp')
177+
178+
runtime_files += Glob('src/runtime/NEON/*.cpp')
179+
runtime_files += Glob('src/runtime/NEON/functions/*.cpp')
174180

175181
static_core_objects = [arm_compute_env.StaticObject(f) for f in core_files]
176182
shared_core_objects = [arm_compute_env.SharedObject(f) for f in core_files]
177183

178184
arm_compute_core_a = build_library('arm_compute_core-static', static_core_objects, static=True)
179185
Export('arm_compute_core_a')
180186

181-
if env['os'] != 'bare_metal':
187+
if env['os'] != 'bare_metal' and not env['standalone']:
182188
arm_compute_core_so = build_library('arm_compute_core', shared_core_objects, static=False)
183189
Export('arm_compute_core_so')
184190

185-
shared_objects = [arm_compute_env.SharedObject(f) for f in files]
186-
static_objects = [arm_compute_env.StaticObject(f) for f in files]
191+
shared_runtime_objects = [arm_compute_env.SharedObject(f) for f in runtime_files]
192+
static_runtime_objects = [arm_compute_env.StaticObject(f) for f in runtime_files]
187193

188-
arm_compute_a = build_library('arm_compute-static', static_core_objects + static_objects, static=True)
194+
arm_compute_a = build_library('arm_compute-static', static_core_objects + static_runtime_objects, static=True)
189195
Export('arm_compute_a')
190196

191-
if env['os'] != 'bare_metal':
192-
arm_compute_so = build_library('arm_compute', shared_core_objects + shared_objects, static=False)
197+
if env['os'] != 'bare_metal' and not env['standalone']:
198+
arm_compute_so = build_library('arm_compute', shared_core_objects + shared_runtime_objects, static=False)
193199
Export('arm_compute_so')
194200

195-
alias = arm_compute_env.Alias("arm_compute", [arm_compute_a, arm_compute_so])
201+
if env['neon'] and env['opencl']:
202+
graph_files = Glob('src/graph/*.cpp')
203+
graph_files += Glob('src/graph/nodes/*.cpp')
204+
205+
graph_files += Glob('src/graph/CL/*.cpp')
206+
graph_files += Glob('src/graph/NEON/*.cpp')
207+
208+
shared_graph_objects = [arm_compute_env.SharedObject(f) for f in graph_files]
209+
static_graph_objects = [arm_compute_env.StaticObject(f) for f in graph_files]
210+
211+
arm_compute_graph_a = build_library('arm_compute_graph-static', static_core_objects + static_runtime_objects + static_graph_objects, static=True)
212+
Export('arm_compute_graph_a')
213+
214+
arm_compute_graph_so = build_library('arm_compute_graph', shared_core_objects + shared_runtime_objects + shared_graph_objects, static=False)
215+
Export('arm_compute_graph_so')
216+
217+
graph_alias = arm_compute_env.Alias("arm_compute_graph", [arm_compute_graph_a, arm_compute_graph_so])
218+
Default(graph_alias)
219+
220+
if env['standalone']:
221+
alias = arm_compute_env.Alias("arm_compute", [arm_compute_a])
222+
else:
223+
alias = arm_compute_env.Alias("arm_compute", [arm_compute_a, arm_compute_so])
224+
196225
Default(alias)
197226

198227
Default(generate_embed)
199-
Depends([alias,arm_compute_core_so, arm_compute_core_a], generate_embed)
228+
229+
if env['standalone']:
230+
Depends([alias,arm_compute_core_a], generate_embed)
231+
else:
232+
Depends([alias,arm_compute_core_so, arm_compute_core_a], generate_embed)

SConstruct

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ vars.AddVariables(
4444
EnumVariable("build", "Build type", "cross_compile", allowed_values=("native", "cross_compile")),
4545
BoolVariable("examples", "Build example programs", True),
4646
BoolVariable("Werror", "Enable/disable the -Werror compilation flag", True),
47+
BoolVariable("standalone", "Builds the tests as standalone executables, links statically with libgcc, libstdc++ and libarm_compute", False),
4748
BoolVariable("opencl", "Enable OpenCL support", True),
4849
BoolVariable("neon", "Enable Neon support", False),
4950
BoolVariable("embed_kernels", "Embed OpenCL kernels in library binary", False),
@@ -102,25 +103,31 @@ prefix = ""
102103
if env['arch'] == 'armv7a':
103104
env.Append(CXXFLAGS = ['-march=armv7-a', '-mthumb', '-mfpu=neon'])
104105

105-
if env['os'] in ['linux', 'bare_metal']:
106+
if env['os'] == 'linux':
106107
prefix = "arm-linux-gnueabihf-"
107108
env.Append(CXXFLAGS = ['-mfloat-abi=hard'])
109+
elif env['os'] == 'bare_metal':
110+
prefix = "arm-none-eabi-"
111+
env.Append(CXXFLAGS = ['-mfloat-abi=hard'])
108112
elif env['os'] == 'android':
109113
prefix = "arm-linux-androideabi-"
110114
env.Append(CXXFLAGS = ['-mfloat-abi=softfp'])
111115
elif env['arch'] == 'arm64-v8a':
112116
env.Append(CXXFLAGS = ['-march=armv8-a'])
113117

114-
if env['os'] in ['linux', 'bare_metal']:
118+
if env['os'] == 'linux':
115119
prefix = "aarch64-linux-gnu-"
120+
elif env['os'] == 'bare_metal':
121+
prefix = "aarch64-none-elf-"
116122
elif env['os'] == 'android':
117123
prefix = "aarch64-linux-android-"
118124
elif env['arch'] == 'arm64-v8.2-a':
119125
env.Append(CXXFLAGS = ['-march=armv8.2-a+fp16+simd'])
120126
env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FP16'])
121-
122-
if env['os'] in ['linux', 'bare_metal']:
127+
if env['os'] == 'linux':
123128
prefix = "aarch64-linux-gnu-"
129+
elif env['os'] == 'bare_metal':
130+
prefix = "aarch64-elf-"
124131
elif env['os'] == 'android':
125132
prefix = "aarch64-linux-android-"
126133
elif env['arch'] == 'x86_32':
@@ -161,6 +168,10 @@ if not GetOption("help"):
161168
if compiler_ver == '4.8.3':
162169
env.Append(CXXFLAGS = ['-Wno-array-bounds'])
163170

171+
if env['standalone']:
172+
env.Append(CXXFLAGS = ['-fPIC'])
173+
env.Append(LINKFLAGS = ['-static-libgcc','-static-libstdc++'])
174+
164175
if env['Werror']:
165176
env.Append(CXXFLAGS = ['-Werror'])
166177

@@ -169,8 +180,10 @@ if env['os'] == 'android':
169180
env.Append(LINKFLAGS = ['-pie', '-static-libstdc++'])
170181
elif env['os'] == 'bare_metal':
171182
env.Append(LINKFLAGS = ['-static'])
183+
env.Append(LINKFLAGS = ['-specs=rdimon.specs'])
172184
env.Append(CXXFLAGS = ['-fPIC'])
173185
env.Append(CPPDEFINES = ['NO_MULTI_THREADING'])
186+
env.Append(CPPDEFINES = ['BARE_METAL'])
174187

175188
if env['opencl']:
176189
if env['os'] == 'bare_metal':
@@ -189,6 +202,7 @@ else:
189202

190203
if env['asserts']:
191204
env.Append(CPPDEFINES = ['ARM_COMPUTE_ASSERTS_ENABLED'])
205+
env.Append(CXXFLAGS = ['-fstack-protector-strong'])
192206

193207
env.Append(CPPPATH = ['#/include', "#"])
194208
env.Append(CXXFLAGS = env['extra_cxx_flags'])
@@ -205,4 +219,5 @@ if env['opencl']:
205219
if env['examples']:
206220
SConscript('./examples/SConscript', variant_dir='#build/%s/examples' % env['build_dir'], duplicate=0)
207221

208-
SConscript('./tests/SConscript', variant_dir='#build/%s/tests' % env['build_dir'], duplicate=0)
222+
if env['os'] != 'bare_metal':
223+
SConscript('./tests/SConscript', variant_dir='#build/%s/tests' % env['build_dir'], duplicate=0)

arm_compute/core/CL/CLHelpers.h

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
#include "arm_compute/core/CL/OpenCL.h"
2828
#include "arm_compute/core/Helpers.h"
29+
#include "support/ToolchainSupport.h"
2930

3031
#include <string>
3132

@@ -52,6 +53,22 @@ static constexpr const unsigned int max_cl_vector_width = 16;
5253
*/
5354
std::string get_cl_type_from_data_type(const DataType &dt);
5455

56+
/** Get the size of a data type in number of bits.
57+
*
58+
* @param[in] dt @ref DataType.
59+
*
60+
* @return Number of bits in the data type specified.
61+
*/
62+
std::string get_data_size_from_data_type(const DataType &dt);
63+
64+
/** Translates fixed point tensor data type to the underlying OpenCL type.
65+
*
66+
* @param[in] dt @ref DataType to be translated to OpenCL type.
67+
*
68+
* @return The string specifying the underlying OpenCL type to be used.
69+
*/
70+
std::string get_underlying_cl_type_from_data_type(const DataType &dt);
71+
5572
/** Translates a given gpu device target to string.
5673
*
5774
* @param[in] target Given gpu target.
@@ -70,7 +87,7 @@ const std::string &string_from_target(GPUTarget target);
7087
template <typename Kernel, typename... T>
7188
std::unique_ptr<Kernel> create_configure_kernel(T &&... args)
7289
{
73-
std::unique_ptr<Kernel> k = arm_compute::cpp14::make_unique<Kernel>();
90+
std::unique_ptr<Kernel> k = arm_compute::support::cpp14::make_unique<Kernel>();
7491
k->configure(std::forward<T>(args)...);
7592
return k;
7693
}
@@ -82,7 +99,7 @@ std::unique_ptr<Kernel> create_configure_kernel(T &&... args)
8299
template <typename Kernel>
83100
std::unique_ptr<Kernel> create_kernel()
84101
{
85-
std::unique_ptr<Kernel> k = arm_compute::cpp14::make_unique<Kernel>();
102+
std::unique_ptr<Kernel> k = arm_compute::support::cpp14::make_unique<Kernel>();
86103
return k;
87104
}
88105

@@ -101,5 +118,20 @@ GPUTarget get_target_from_device(cl::Device &device);
101118
* @return the GPU target which shows the arch
102119
*/
103120
GPUTarget get_arch_from_target(GPUTarget target);
121+
122+
/** Helper function to get the highest OpenCL version supported
123+
*
124+
* @param[in] device A CL device
125+
*
126+
* @return the highest OpenCL version supported
127+
*/
128+
CLVersion get_cl_version(const cl::Device &device);
129+
/** Helper function to check whether the arm_non_uniform_work_group_size extension is supported
130+
*
131+
* @param[in] device A CL device
132+
*
133+
* @return True if the extension is supported
134+
*/
135+
bool non_uniform_workgroup_support(const cl::Device &device);
104136
}
105-
#endif
137+
#endif /* __ARM_COMPUTE_CLHELPERS_H__ */

arm_compute/core/CL/CLKernelLibrary.h

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,11 @@ class CLKernelLibrary
162162
*/
163163
void init(std::string kernel_path = ".", cl::Context context = cl::Context::getDefault(), cl::Device device = cl::Device::getDefault())
164164
{
165-
_kernel_path = std::move(kernel_path);
166-
_context = std::move(context);
167-
_device = std::move(device);
165+
_kernel_path = std::move(kernel_path);
166+
_context = std::move(context);
167+
_device = std::move(device);
168+
_max_workgroup_size = 0;
169+
max_local_workgroup_size();
168170
}
169171
/** Sets the path that the kernels reside in.
170172
*
@@ -174,6 +176,17 @@ class CLKernelLibrary
174176
{
175177
_kernel_path = kernel_path;
176178
};
179+
/** Gets the path that the kernels reside in.
180+
*/
181+
std::string get_kernel_path()
182+
{
183+
return _kernel_path;
184+
};
185+
/** Gets the source of the selected program
186+
*
187+
* @param[in] program_name Program name.
188+
*/
189+
std::string get_program_source(const std::string &program_name);
177190
/** Sets the CL context used to create programs.
178191
*
179192
* @note Setting the context also resets the device to the
@@ -195,14 +208,19 @@ class CLKernelLibrary
195208
{
196209
_device = cl_devices[0];
197210
}
211+
212+
_max_workgroup_size = 0;
213+
max_local_workgroup_size();
198214
};
199215
/** Sets the CL device for which the programs are created.
200216
*
201217
* @param[in] device A CL device.
202218
*/
203219
void set_device(cl::Device device)
204220
{
205-
_device = std::move(device);
221+
_device = std::move(device);
222+
_max_workgroup_size = 0;
223+
max_local_workgroup_size();
206224
};
207225
/** Creates a kernel from the kernel library.
208226
*
@@ -220,6 +238,15 @@ class CLKernelLibrary
220238
*
221239
*/
222240
void load_binary();
241+
/** Find the maximum number of local work items in a workgroup can be supported by the device
242+
*
243+
*/
244+
size_t max_local_workgroup_size();
245+
246+
/** Return the default NDRange that is suitable for the device.
247+
*
248+
*/
249+
cl::NDRange default_ndrange();
223250

224251
private:
225252
/** Load program and its dependencies.
@@ -243,6 +270,7 @@ class CLKernelLibrary
243270
static const std::map<std::string, std::string> _kernel_program_map; /**< Map that associates kernel names with programs. */
244271
static const std::map<std::string, std::string> _program_source_map; /**< Contains sources for all programs.
245272
Used for compile-time kernel inclusion. >*/
273+
size_t _max_workgroup_size; /** Maximum local workgroup size supported on the device */
246274
};
247275
}
248276
#endif /* __ARM_COMPUTE_CLKERNELLIBRARY_H__ */

0 commit comments

Comments
 (0)