Skip to content

Commit 48bc34e

Browse files
author
BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com
committed
arm_compute v17.12
1 parent 8a3da6f commit 48bc34e

File tree

10,812 files changed

+535620
-332812
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

10,812 files changed

+535620
-332812
lines changed

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2017 ARM Software
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Related projects:
99

1010
Documentation available here:
1111

12+
- [v17.12](https://arm-software.github.io/ComputeLibrary/v17.12/)
1213
- [v17.10](https://arm-software.github.io/ComputeLibrary/v17.10/)
1314
- [v17.09](https://arm-software.github.io/ComputeLibrary/v17.09/)
1415
- [v17.06](https://arm-software.github.io/ComputeLibrary/v17.06/)
@@ -18,6 +19,7 @@ Documentation available here:
1819

1920
Binaries available here:
2021

22+
- [v17.12](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.12/arm_compute-v17.12-bin.tar.gz)
2123
- [v17.10](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.10/arm_compute-v17.10-bin.tar.gz)
2224
- [v17.09](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.09/arm_compute-v17.09-bin.tar.gz)
2325
- [v17.06](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.06/arm_compute-v17.06-bin.tar.gz)

SConscript

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ import os.path
2424
import re
2525
import subprocess
2626

27-
VERSION = "v17.10"
28-
SONAME_VERSION="5.0.0"
27+
VERSION = "v17.12"
28+
SONAME_VERSION="6.0.0"
2929

3030
Import('env')
3131
Import('vars')
@@ -125,6 +125,8 @@ def create_version_file(target, source, env):
125125

126126

127127
arm_compute_env = env.Clone()
128+
# Don't allow undefined references in the libraries:
129+
arm_compute_env.Append(LINKFLAGS=['-Wl,--no-undefined','-Wl,--no-allow-shlib-undefined'])
128130

129131
generate_embed = [ arm_compute_env.Command("src/core/arm_compute_version.embed", "", action=create_version_file) ]
130132
arm_compute_env.Append(CPPPATH =[Dir("./src/core/").path] )
@@ -137,8 +139,12 @@ arm_compute_env.Append(LIBS = ['dl'])
137139
core_files = Glob('src/core/*.cpp')
138140
core_files += Glob('src/core/CPP/*.cpp')
139141
core_files += Glob('src/core/CPP/kernels/*.cpp')
142+
core_files += Glob('src/core/utils/*/*.cpp')
140143

141144
runtime_files = Glob('src/runtime/*.cpp')
145+
runtime_files += Glob('src/runtime/CPP/ICPPSimpleFunction.cpp')
146+
runtime_files += Glob('src/runtime/CPP/functions/*.cpp')
147+
142148
# CLHarrisCorners uses the Scheduler to run CPP kernels
143149
runtime_files += Glob('src/runtime/CPP/SingleThreadScheduler.cpp')
144150

@@ -178,30 +184,46 @@ if env['neon']:
178184
runtime_files += Glob('src/runtime/NEON/*.cpp')
179185
runtime_files += Glob('src/runtime/NEON/functions/*.cpp')
180186

181-
static_core_objects = [arm_compute_env.StaticObject(f) for f in core_files]
182-
shared_core_objects = [arm_compute_env.SharedObject(f) for f in core_files]
187+
if env['gles_compute']:
188+
if env['os'] != 'android':
189+
arm_compute_env.Append(CPPPATH = ["#opengles-3.1/include", "#opengles-3.1/mali_include"])
190+
191+
core_files += Glob('src/core/GLES_COMPUTE/*.cpp')
192+
core_files += Glob('src/core/GLES_COMPUTE/kernels/*.cpp')
193+
194+
runtime_files += Glob('src/runtime/GLES_COMPUTE/*.cpp')
195+
runtime_files += Glob('src/runtime/GLES_COMPUTE/functions/*.cpp')
183196

184-
arm_compute_core_a = build_library('arm_compute_core-static', static_core_objects, static=True)
197+
# Generate embed files
198+
if env['embed_kernels']:
199+
cs_files = Glob('src/core/GLES_COMPUTE/cs_shaders/*.cs')
200+
cs_files += Glob('src/core/GLES_COMPUTE/cs_shaders/*.h')
201+
202+
embed_files = [ f.get_path()+"embed" for f in cs_files ]
203+
arm_compute_env.Append(CPPPATH =[Dir("./src/core/GLES_COMPUTE/").path] )
204+
205+
generate_embed.append(arm_compute_env.Command(embed_files, cs_files, action=resolve_includes))
206+
207+
arm_compute_core_a = build_library('arm_compute_core-static', core_files, static=True)
185208
Export('arm_compute_core_a')
186209

187210
if env['os'] != 'bare_metal' and not env['standalone']:
188-
arm_compute_core_so = build_library('arm_compute_core', shared_core_objects, static=False)
211+
arm_compute_core_so = build_library('arm_compute_core', core_files, static=False)
189212
Export('arm_compute_core_so')
190213

191-
shared_runtime_objects = [arm_compute_env.SharedObject(f) for f in runtime_files]
192-
static_runtime_objects = [arm_compute_env.StaticObject(f) for f in runtime_files]
193-
194-
arm_compute_a = build_library('arm_compute-static', static_runtime_objects, static=True, libs = [ arm_compute_core_a ])
214+
arm_compute_a = build_library('arm_compute-static', runtime_files, static=True, libs = [ arm_compute_core_a ])
195215
Export('arm_compute_a')
196216

197217
if env['os'] != 'bare_metal' and not env['standalone']:
198-
arm_compute_so = build_library('arm_compute', shared_runtime_objects, static=False, libs = [ "arm_compute_core" ])
218+
arm_compute_so = build_library('arm_compute', runtime_files, static=False, libs = [ "arm_compute_core" ])
199219
Depends(arm_compute_so, arm_compute_core_so)
200220
Export('arm_compute_so')
201221

202222
if env['neon'] and env['opencl']:
223+
Import('opencl')
203224
graph_files = Glob('src/graph/*.cpp')
204225
graph_files += Glob('src/graph/nodes/*.cpp')
226+
graph_files += Glob('src/graph/operations/*.cpp')
205227

206228
graph_files += Glob('src/graph/CL/*.cpp')
207229
graph_files += Glob('src/graph/NEON/*.cpp')
@@ -212,8 +234,10 @@ if env['neon'] and env['opencl']:
212234
arm_compute_graph_a = build_library('arm_compute_graph-static', static_graph_objects, static=True, libs = [ arm_compute_a ])
213235
Export('arm_compute_graph_a')
214236

215-
arm_compute_graph_so = build_library('arm_compute_graph', shared_graph_objects, static=False, libs = [ "arm_compute", "arm_compute_core" ])
216-
Depends( arm_compute_graph_so, arm_compute_so)
237+
arm_compute_env.Append(LIBPATH = ["#build/%s/opencl-1.2-stubs" % env['build_dir']])
238+
arm_compute_graph_so = build_library('arm_compute_graph', shared_graph_objects, static=False, libs = [ "arm_compute", "arm_compute_core", "OpenCL" ])
239+
Depends(arm_compute_graph_so, arm_compute_so)
240+
Depends(arm_compute_graph_so, opencl)
217241
Export('arm_compute_graph_so')
218242

219243
graph_alias = arm_compute_env.Alias("arm_compute_graph", [arm_compute_graph_a, arm_compute_graph_so])

SConstruct

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ vars = Variables("scons")
3939
vars.AddVariables(
4040
BoolVariable("debug", "Debug", False),
4141
BoolVariable("asserts", "Enable asserts (this flag is forced to 1 for debug=1)", False),
42+
BoolVariable("logging", "Logging (this flag is forced to 1 for debug=1)", False),
4243
EnumVariable("arch", "Target Architecture", "armv7a", allowed_values=("armv7a", "arm64-v8a", "arm64-v8.2-a", "x86_32", "x86_64")),
4344
EnumVariable("os", "Target OS", "linux", allowed_values=("linux", "android", "bare_metal")),
4445
EnumVariable("build", "Build type", "cross_compile", allowed_values=("native", "cross_compile")),
@@ -47,7 +48,8 @@ vars.AddVariables(
4748
BoolVariable("standalone", "Builds the tests as standalone executables, links statically with libgcc, libstdc++ and libarm_compute", False),
4849
BoolVariable("opencl", "Enable OpenCL support", True),
4950
BoolVariable("neon", "Enable Neon support", False),
50-
BoolVariable("embed_kernels", "Embed OpenCL kernels in library binary", False),
51+
BoolVariable("gles_compute", "Enable OpenGL ES Compute Shader support", False),
52+
BoolVariable("embed_kernels", "Embed OpenCL kernels and OpenGL ES compute shaders in library binary", False),
5153
BoolVariable("set_soname", "Set the library's soname and shlibversion (requires SCons 2.4 or above)", False),
5254
BoolVariable("openmp", "Enable OpenMP backend", False),
5355
BoolVariable("cppthreads", "Enable C++11 threads backend", True),
@@ -81,6 +83,7 @@ env.Append(CXXFLAGS = ['-Wno-deprecated-declarations','-Wall','-DARCH_ARM',
8183
'-Winit-self','-Wstrict-overflow=2','-Wswitch-default',
8284
'-fpermissive','-std=gnu++11','-Wno-vla','-Woverloaded-virtual',
8385
'-Wctor-dtor-privacy','-Wsign-promo','-Weffc++','-Wno-format-nonliteral','-Wno-overlength-strings','-Wno-strict-overflow'])
86+
8487
env.Append(CPPDEFINES = ['_GLIBCXX_USE_NANOSLEEP'])
8588

8689
if os.environ.get('CXX', 'g++') == 'clang++':
@@ -115,16 +118,19 @@ if env['arch'] == 'armv7a':
115118
env.Append(CXXFLAGS = ['-mfloat-abi=softfp'])
116119
elif env['arch'] == 'arm64-v8a':
117120
env.Append(CXXFLAGS = ['-march=armv8-a'])
118-
121+
env.Append(CPPDEFINES = ['ARM_COMPUTE_AARCH64_V8A'])
119122
if env['os'] == 'linux':
120123
prefix = "aarch64-linux-gnu-"
121124
elif env['os'] == 'bare_metal':
122125
prefix = "aarch64-elf-"
123126
elif env['os'] == 'android':
124127
prefix = "aarch64-linux-android-"
125128
elif env['arch'] == 'arm64-v8.2-a':
126-
env.Append(CXXFLAGS = ['-march=armv8.2-a+fp16+simd'])
127-
env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FP16'])
129+
env.Append(CPPDEFINES = ['ARM_COMPUTE_AARCH64_V8_2'])
130+
131+
if os.environ.get('CXX', 'g++') == 'clang++':
132+
env.Append(CXXFLAGS = ['-fno-integrated-as'])
133+
128134
if env['os'] == 'linux':
129135
prefix = "aarch64-linux-gnu-"
130136
elif env['os'] == 'bare_metal':
@@ -172,6 +178,8 @@ if not GetOption("help"):
172178
if env['standalone']:
173179
env.Append(CXXFLAGS = ['-fPIC'])
174180
env.Append(LINKFLAGS = ['-static-libgcc','-static-libstdc++'])
181+
if env['cppthreads']:
182+
env.Append(LINKFLAGS = ['-lpthread'])
175183

176184
if env['Werror']:
177185
env.Append(CXXFLAGS = ['-Werror'])
@@ -187,15 +195,17 @@ elif env['os'] == 'bare_metal':
187195
env.Append(CPPDEFINES = ['BARE_METAL'])
188196

189197
if env['opencl']:
190-
if env['os'] == 'bare_metal':
198+
if env['os'] in ['bare_metal'] or env['standalone']:
191199
print("Cannot link OpenCL statically, which is required on bare metal")
192200
Exit(1)
193201

202+
if env['opencl'] or env['gles_compute']:
194203
if env['embed_kernels']:
195204
env.Append(CPPDEFINES = ['EMBEDDED_KERNELS'])
196205

197206
if env['debug']:
198207
env['asserts'] = True
208+
env['logging'] = True
199209
env.Append(CXXFLAGS = ['-O0','-g','-gdwarf-2'])
200210
env.Append(CPPDEFINES = ['ARM_COMPUTE_DEBUG_ENABLED'])
201211
else:
@@ -205,18 +215,26 @@ if env['asserts']:
205215
env.Append(CPPDEFINES = ['ARM_COMPUTE_ASSERTS_ENABLED'])
206216
env.Append(CXXFLAGS = ['-fstack-protector-strong'])
207217

218+
if env['logging']:
219+
env.Append(CPPDEFINES = ['ARM_COMPUTE_LOGGING_ENABLED'])
220+
208221
env.Append(CPPPATH = ['#/include', "#"])
209222
env.Append(CXXFLAGS = env['extra_cxx_flags'])
210223

211224
Export('vars')
212225
Export('env')
213226
Export('version_at_least')
214227

215-
SConscript('./SConscript', variant_dir='#build/%s' % env['build_dir'], duplicate=0)
216-
217228
if env['opencl']:
218229
SConscript("./opencl-1.2-stubs/SConscript", variant_dir="build/%s/opencl-1.2-stubs" % env['build_dir'], duplicate=0)
219230

231+
if env['gles_compute'] and env['os'] != 'android':
232+
env.Append(CPPPATH = ['#/include/linux'])
233+
env.Append(LIBPATH = ["#build/%s/opengles-3.1-stubs" % env['build_dir']])
234+
SConscript("./opengles-3.1-stubs/SConscript", variant_dir="build/%s/opengles-3.1-stubs" % env['build_dir'], duplicate=0)
235+
236+
SConscript('./SConscript', variant_dir='#build/%s' % env['build_dir'], duplicate=0)
237+
220238
if env['examples'] and env['os'] != 'bare_metal':
221239
SConscript('./examples/SConscript', variant_dir='#build/%s/examples' % env['build_dir'], duplicate=0)
222240

arm_compute/core/CL/CLHelpers.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ struct enable_bitwise_ops<arm_compute::GPUTarget>
4343
};
4444

4545
/** Max vector width of an OpenCL vector */
46-
static constexpr const unsigned int max_cl_vector_width = 16;
46+
static constexpr unsigned int max_cl_vector_width = 16;
4747

4848
/** Translates a tensor data type to the appropriate OpenCL type.
4949
*
@@ -126,6 +126,13 @@ GPUTarget get_arch_from_target(GPUTarget target);
126126
* @return the highest OpenCL version supported
127127
*/
128128
CLVersion get_cl_version(const cl::Device &device);
129+
/** Helper function to check whether the cl_khr_fp16 extension is supported
130+
*
131+
* @param[in] device A CL device
132+
*
133+
* @return True if the extension is supported
134+
*/
135+
bool fp16_support(const cl::Device &device);
129136
/** Helper function to check whether the arm_non_uniform_work_group_size extension is supported
130137
*
131138
* @param[in] device A CL device

arm_compute/core/CL/CLKernelLibrary.h

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,52 @@
3333

3434
namespace arm_compute
3535
{
36+
/** Build options */
37+
class CLBuildOptions
38+
{
39+
using StringSet = std::set<std::string>;
40+
41+
public:
42+
/** Default constructor. */
43+
CLBuildOptions();
44+
/** Adds option to the existing build option list
45+
*
46+
* @param[in] option Option to add
47+
*/
48+
void add_option(std::string option);
49+
/** Adds option if a given condition is true;
50+
*
51+
* @param[in] cond Condition to check
52+
* @param[in] option Option to add if condition is true
53+
*/
54+
void add_option_if(bool cond, std::string option);
55+
/** Adds first option if condition is true else the second one
56+
*
57+
* @param[in] cond Condition to check
58+
* @param[in] option_true Option to add if condition is true
59+
* @param[in] option_false Option to add if condition is false
60+
*/
61+
void add_option_if_else(bool cond, std::string option_true, std::string option_false);
62+
/** Appends given build options to the current's objects options.
63+
*
64+
* @param[in] options Build options to append
65+
*/
66+
void add_options(const StringSet &options);
67+
/** Appends given build options to the current's objects options if a given condition is true.
68+
*
69+
* @param[in] cond Condition to check
70+
* @param[in] options Option to add if condition is true
71+
*/
72+
void add_options_if(bool cond, const StringSet &options);
73+
/** Gets the current options list set
74+
*
75+
* @return Build options set
76+
*/
77+
const StringSet &options() const;
78+
79+
private:
80+
StringSet _build_opts; /**< Build options set */
81+
};
3682
/** Program class */
3783
class Program
3884
{
@@ -181,8 +227,8 @@ class CLKernelLibrary
181227
return _kernel_path;
182228
};
183229
/** Gets the source of the selected program
184-
*
185-
* @param[in] program_name Program name.
230+
*
231+
* @param[in] program_name Program name.
186232
*/
187233
std::string get_program_source(const std::string &program_name);
188234
/** Sets the CL context used to create programs.

arm_compute/core/CL/CLKernels.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@
4242
#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
4343
#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h"
4444
#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
45-
#include "arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h"
46-
#include "arm_compute/core/CL/kernels/CLDepthConvertKernel.h"
47-
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h"
45+
#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
46+
#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
47+
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h"
4848
#include "arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h"
4949
#include "arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h"
5050
#include "arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h"
@@ -58,6 +58,10 @@
5858
#include "arm_compute/core/CL/kernels/CLFloorKernel.h"
5959
#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
6060
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h"
61+
#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
62+
#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
63+
#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.h"
64+
#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
6165
#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
6266
#include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h"
6367
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
@@ -72,7 +76,7 @@
7276
#include "arm_compute/core/CL/kernels/CLHistogramKernel.h"
7377
#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
7478
#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h"
75-
#include "arm_compute/core/CL/kernels/CLL2NormalizeKernel.h"
79+
#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h"
7680
#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
7781
#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
7882
#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"

arm_compute/core/CL/ICLKernel.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,13 @@ class ICLKernel : public IKernel
180180
* @return The maximum workgroup size value.
181181
*/
182182
size_t get_max_workgroup_size();
183+
/** Get the global work size given an execution window
184+
*
185+
* @param[in] window Execution window
186+
*
187+
* @return Global work size of the given execution window
188+
*/
189+
static cl::NDRange gws_from_window(const Window &window);
183190

184191
private:
185192
/** Add the passed array's parameters to the object's kernel's arguments starting from the index idx.

arm_compute/core/CL/ICLMultiHOG.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,14 @@ class ICLMultiHOG : public IMultiHOG
3535
public:
3636
/** Return a pointer to the requested OpenCL HOG model
3737
*
38-
* @param[in] index The index of the wanted OpenCL HOG model.
38+
* @param[in] index The index of the wanted OpenCL HOG model.
3939
*
4040
* @return A pointer pointed to the HOG model
4141
*/
4242
virtual ICLHOG *cl_model(size_t index) = 0;
4343
/** Return a constant pointer to the requested OpenCL HOG model
4444
*
45-
* @param[in] index The index of the wanted OpenCL HOG model.
45+
* @param[in] index The index of the wanted OpenCL HOG model.
4646
*
4747
* @return A constant pointer pointed to the OpenCL HOG model
4848
*/

0 commit comments

Comments
 (0)