Skip to content

Commit 52ba29e

Browse files
Jenkinsmdigiorgio
authored andcommitted
arm_compute v18.08
1 parent e2542c9 commit 52ba29e

File tree

7,385 files changed

+238553
-206814
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

7,385 files changed

+238553
-206814
lines changed

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
MIT License
22

3-
Copyright (c) 2017 ARM Software
3+
Copyright (c) 2017-2018 ARM Software
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11

2-
:warning: **Deprecation notice: QS8 and QS16 data types will be removed in the next release** (As far as we know nobody uses these data types, if you do or think they are useful please open an Issue or send us an email):warning:
3-
42
Please report issues here: https://github.com/ARM-software/ComputeLibrary/issues
53
**Make sure you are using the latest version of the library before opening an issue. Thanks**
64

75
News:
86

9-
- We're hiring: Senior Machine Learning C++ Software Engineer in Cambridge (UK)
7+
- We're hiring: Staff Machine Learning C++ Software Engineer in Cambridge (UK)
108
- Required skills:
119
- Proficient in C++11.
1210
- Preferred skills:
@@ -16,7 +14,7 @@ News:
1614
- Experience programming in assembly language.
1715

1816
Interested ? Contact us: [email protected]
19-
- Come talk to us: [Gian Marco will be presenting his work at the EVS](https://www.embedded-vision.com/summit/even-faster-cnns-exploring-new-class-winograd-algorithms)
17+
- [Gian Marco's talk on optimizing CNNs with Winograd algorithms at the EVS](https://www.embedded-vision.com/platinum-members/arm/embedded-vision-training/videos/pages/may-2018-embedded-vision-summit-iodice)
2018

2119
Related projects:
2220

@@ -27,6 +25,7 @@ Related projects:
2725

2826
Documentation available here:
2927

28+
- [v18.08](https://arm-software.github.io/ComputeLibrary/v18.08/)
3029
- [v18.05](https://arm-software.github.io/ComputeLibrary/v18.05/)
3130
- [v18.03](https://arm-software.github.io/ComputeLibrary/v18.03/)
3231
- [v18.02](https://arm-software.github.io/ComputeLibrary/v18.02/)
@@ -41,6 +40,8 @@ Documentation available here:
4140

4241
Binaries available here:
4342

43+
- [v18.08-linux](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.08/arm_compute-v18.08-bin-linux.tar.gz)
44+
- [v18.08-android](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.08/arm_compute-v18.08-bin-android.tar.gz)
4445
- [v18.05-linux](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.05/arm_compute-v18.05-bin-linux.tar.gz)
4546
- [v18.05-android](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.05/arm_compute-v18.05-bin-android.tar.gz)
4647
- [v18.03-linux](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.03/arm_compute-v18.03-bin-linux.tar.gz)

SConscript

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ import os.path
2424
import re
2525
import subprocess
2626

27-
VERSION = "v18.05"
28-
SONAME_VERSION="11.0.0"
27+
VERSION = "v18.08"
28+
SONAME_VERSION="12.0.0"
2929

3030
Import('env')
3131
Import('vars')
@@ -43,7 +43,7 @@ def build_library(name, sources, static=False, libs=[]):
4343
library_prefix = obj[0].path[:-(1 + len(SONAME_VERSION))]
4444
real_lib = "%s.%s" % (library_prefix, SONAME_VERSION)
4545

46-
for f in Glob("#%s*" % library_prefix):
46+
for f in Glob("#%s.*" % library_prefix):
4747
if str(f) != real_lib:
4848
symlinks.append("%s/%s" % (directory,str(f)))
4949

@@ -118,15 +118,16 @@ def create_version_file(target, source, env):
118118
except (OSError, subprocess.CalledProcessError):
119119
git_hash="unknown"
120120

121-
version_filename = "%s/arm_compute_version.embed" % Dir("src/core").path
122121
build_info = "\"arm_compute_version=%s Build options: %s Git hash=%s\"" % (VERSION, vars.args, git_hash.strip())
123122
with open(target[0].get_path(), "w") as fd:
124123
fd.write(build_info)
125124

126125
arm_compute_env = env.Clone()
126+
version_file = arm_compute_env.Command("src/core/arm_compute_version.embed", "", action=create_version_file)
127+
arm_compute_env.AlwaysBuild(version_file)
127128

128129
# Generate embed files
129-
generate_embed = [ arm_compute_env.Command("src/core/arm_compute_version.embed", "", action=create_version_file) ]
130+
generate_embed = [ version_file ]
130131
if env['opencl'] and env['embed_kernels']:
131132
cl_files = Glob('src/core/CL/cl_kernels/*.cl')
132133
cl_files += Glob('src/core/CL/cl_kernels/*.h')
@@ -190,6 +191,7 @@ if env['opencl']:
190191
if env['neon']:
191192
core_files += Glob('src/core/NEON/*.cpp')
192193
core_files += Glob('src/core/NEON/kernels/*.cpp')
194+
core_files += Glob('src/core/NEON/kernels/assembly/*.cpp')
193195

194196
core_files += Glob('src/core/NEON/kernels/arm_gemm/*.cpp')
195197

@@ -209,6 +211,7 @@ if env['neon']:
209211

210212
runtime_files += Glob('src/runtime/NEON/*.cpp')
211213
runtime_files += Glob('src/runtime/NEON/functions/*.cpp')
214+
runtime_files += Glob('src/runtime/NEON/functions/assembly/*.cpp')
212215

213216
if env['gles_compute']:
214217
if env['os'] != 'android':

SConstruct

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ env.Append(CXXFLAGS = ['-Wno-deprecated-declarations','-Wall','-DARCH_ARM',
8989
'-Wextra','-Wno-unused-parameter','-pedantic','-Wdisabled-optimization','-Wformat=2',
9090
'-Winit-self','-Wstrict-overflow=2','-Wswitch-default',
9191
'-fpermissive','-std=gnu++11','-Wno-vla','-Woverloaded-virtual',
92-
'-Wctor-dtor-privacy','-Wsign-promo','-Weffc++','-Wno-format-nonliteral','-Wno-overlength-strings','-Wno-strict-overflow','-Wno-implicit-fallthrough'])
92+
'-Wctor-dtor-privacy','-Wsign-promo','-Weffc++','-Wno-format-nonliteral','-Wno-overlength-strings','-Wno-strict-overflow'])
9393

9494
env.Append(CPPDEFINES = ['_GLIBCXX_USE_NANOSLEEP'])
9595

@@ -104,7 +104,7 @@ if env['os'] == 'android' and ( 'clang++' not in cpp_compiler or 'clang' not in
104104
if 'clang++' in cpp_compiler:
105105
env.Append(CXXFLAGS = ['-Wno-format-nonliteral','-Wno-deprecated-increment-bool','-Wno-vla-extension','-Wno-mismatched-tags'])
106106
else:
107-
env.Append(CXXFLAGS = ['-Wlogical-op','-Wnoexcept','-Wstrict-null-sentinel'])
107+
env.Append(CXXFLAGS = ['-Wlogical-op','-Wnoexcept','-Wstrict-null-sentinel','-Wno-implicit-fallthrough'])
108108

109109
if env['cppthreads']:
110110
env.Append(CPPDEFINES = [('ARM_COMPUTE_CPP_SCHEDULER', 1)])

arm_compute/core/CL/CLHelpers.h

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ std::string get_underlying_cl_type_from_data_type(const DataType &dt);
6969
*
7070
* @return the GPU target
7171
*/
72-
GPUTarget get_target_from_device(cl::Device &device);
72+
GPUTarget get_target_from_device(const cl::Device &device);
7373

7474
/** Helper function to get the highest OpenCL version supported
7575
*
@@ -102,5 +102,30 @@ bool fp16_supported(const cl::Device &device);
102102
* @return True if the extension is supported
103103
*/
104104
bool arm_non_uniform_workgroup_supported(const cl::Device &device);
105+
/** Helper function to check whether the cl_arm_integer_dot_product_int8 extension is supported
106+
*
107+
* @param[in] device A CL device
108+
*
109+
* @return True if the extension is supported
110+
*/
111+
bool dot8_supported(const cl::Device &device);
112+
113+
/** Helper function to check whether the cl_arm_integer_dot_product_accumulate_int8 extension is supported
114+
*
115+
* @param[in] device A CL device
116+
*
117+
* @return True if the extension is supported
118+
*/
119+
bool dot8_acc_supported(const cl::Device &device);
120+
121+
/** This function checks if the Winograd configuration (defined through the output tile, kernel size and the data layout) is supported on OpenCL
122+
*
123+
* @param[in] output_tile Output tile for the Winograd filtering algorithm
124+
* @param[in] kernel_size Kernel size for the Winograd filtering algorithm
125+
* @param[in] data_layout Data layout of the input tensor
126+
*
127+
* @return True if the configuration is supported
128+
*/
129+
bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Size2D &kernel_size, DataLayout data_layout);
105130
}
106131
#endif /* __ARM_COMPUTE_CLHELPERS_H__ */

arm_compute/core/CL/CLKernelLibrary.h

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -208,11 +208,11 @@ class CLKernelLibrary
208208
static CLKernelLibrary &get();
209209
/** Initialises the kernel library.
210210
*
211-
* @param[in] kernel_path (Optional) Path of the directory from which kernel sources are loaded.
212-
* @param[in] context (Optional) CL context used to create programs.
213-
* @param[in] device (Optional) CL device for which the programs are created.
211+
* @param[in] kernel_path Path of the directory from which kernel sources are loaded.
212+
* @param[in] context CL context used to create programs.
213+
* @param[in] device CL device for which the programs are created.
214214
*/
215-
void init(std::string kernel_path = ".", cl::Context context = cl::Context::getDefault(), cl::Device device = cl::Device::getDefault())
215+
void init(std::string kernel_path, cl::Context context, cl::Device device)
216216
{
217217
_kernel_path = std::move(kernel_path);
218218
_context = std::move(context);
@@ -277,6 +277,12 @@ class CLKernelLibrary
277277
return _context;
278278
}
279279

280+
/** Gets the CL device for which the programs are created. */
281+
cl::Device &get_device()
282+
{
283+
return _device;
284+
}
285+
280286
/** Sets the CL device for which the programs are created.
281287
*
282288
* @param[in] device A CL device.
@@ -329,6 +335,18 @@ class CLKernelLibrary
329335
*/
330336
void add_built_program(const std::string &built_program_name, cl::Program program);
331337

338+
/** Returns true if FP16 is supported by the CL device
339+
*
340+
* @return true if the CL device supports FP16
341+
*/
342+
bool fp16_supported() const;
343+
344+
/** Returns true if int64_base_atomics extension is supported by the CL device
345+
*
346+
* @return true if the CL device supports int64_base_atomics extension
347+
*/
348+
bool int64_base_atomics_supported() const;
349+
332350
private:
333351
/** Load program and its dependencies.
334352
*

arm_compute/core/CL/CLKernels.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h"
3030
#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
3131
#include "arm_compute/core/CL/kernels/CLArithmeticAdditionKernel.h"
32+
#include "arm_compute/core/CL/kernels/CLArithmeticDivisionKernel.h"
3233
#include "arm_compute/core/CL/kernels/CLArithmeticSubtractionKernel.h"
3334
#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
3435
#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h"
@@ -61,6 +62,7 @@
6162
#include "arm_compute/core/CL/kernels/CLErodeKernel.h"
6263
#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h"
6364
#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
65+
#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h"
6466
#include "arm_compute/core/CL/kernels/CLFloorKernel.h"
6567
#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
6668
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h"

arm_compute/core/CL/CLValidate.h

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* Copyright (c) 2018 ARM Limited.
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to
8+
* deal in the Software without restriction, including without limitation the
9+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10+
* sell copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*/
24+
#ifndef __ARM_COMPUTE_CL_VALIDATE_H__
25+
#define __ARM_COMPUTE_CL_VALIDATE_H__
26+
27+
#include "arm_compute/core/Validate.h"
28+
29+
namespace arm_compute
30+
{
31+
#define ARM_COMPUTE_ERROR_ON_F16_UNSUPPORTED(tensor) \
32+
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_fp16(__func__, __FILE__, __LINE__, tensor, CLKernelLibrary::get().fp16_supported()))
33+
34+
#define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor) \
35+
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_fp16(__func__, __FILE__, __LINE__, tensor, CLKernelLibrary::get().fp16_supported()))
36+
37+
/** Return an error if int64_base_atomics extension is not supported by the device.
38+
*
39+
* @param[in] function Function in which the error occurred.
40+
* @param[in] file Name of the file where the error occurred.
41+
* @param[in] line Line on which the error occurred.
42+
*
43+
* @return Status
44+
*/
45+
inline arm_compute::Status error_on_unsupported_int64_base_atomics(const char *function, const char *file, const int line)
46+
{
47+
if(!CLKernelLibrary::get().int64_base_atomics_supported())
48+
{
49+
return ARM_COMPUTE_CREATE_ERROR_LOC(arm_compute::ErrorCode::UNSUPPORTED_EXTENSION_USE, function, file, line, "Atomic functions are not supported");
50+
}
51+
return arm_compute::Status{};
52+
}
53+
54+
#define ARM_COMPUTE_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED() \
55+
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_int64_base_atomics(__func__, __FILE__, __LINE__));
56+
57+
#define ARM_COMPUTE_RETURN_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED() \
58+
ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_int64_base_atomics(__func__, __FILE__, __LINE__));
59+
60+
} // namespace arm_compute
61+
#endif /* __ARM_COMPUTE_CL_VALIDATE_H__ */

arm_compute/core/CL/ICLKernel.h

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,23 @@ class ICLKernel : public IKernel
6161
{
6262
return 2 + 2 * dimension_size;
6363
}
64+
using IKernel::configure; //Prevent children from calling IKernel::configure() directly
65+
protected:
66+
/** Configure the kernel's window and local workgroup size hint.
67+
*
68+
* @param[in] window The maximum window which will be returned by window()
69+
* @param[in] lws_hint (Optional) Local-Workgroup-Size to use.
70+
*/
71+
void configure_internal(const Window &window, cl::NDRange lws_hint = CLKernelLibrary::get().default_ndrange())
72+
{
73+
_lws_hint = lws_hint;
74+
IKernel::configure(window);
75+
}
6476

6577
public:
6678
/** Constructor */
6779
ICLKernel()
68-
: _kernel(nullptr), _lws_hint(CLKernelLibrary::get().default_ndrange()), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0)
80+
: _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _lws_hint()
6981
{
7082
}
7183
/** Returns a reference to the OpenCL kernel of this object.
@@ -196,6 +208,7 @@ class ICLKernel : public IKernel
196208
*/
197209
void set_lws_hint(const cl::NDRange &lws_hint)
198210
{
211+
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // lws_hint will be overwritten by configure()
199212
_lws_hint = lws_hint;
200213
}
201214

@@ -282,10 +295,11 @@ class ICLKernel : public IKernel
282295

283296
protected:
284297
cl::Kernel _kernel; /**< OpenCL kernel to run */
285-
cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */
286298
GPUTarget _target; /**< The targeted GPU */
287299
std::string _config_id; /**< Configuration ID */
288300
size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */
301+
private:
302+
cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */
289303
};
290304

291305
/** Add the kernel to the command queue with the given window.

arm_compute/core/CL/OpenCL.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,14 @@
3131
#ifndef ARM_COMPUTE_NO_EXCEPTIONS
3232
#define CL_HPP_ENABLE_EXCEPTIONS
3333
#endif // ARM_COMPUTE_NO_EXCEPTIONS
34-
#define CL_HPP_CL_1_2_DEFAULT_BUILD
34+
#define CL_TARGET_OPENCL_VERSION 200
3535
#define CL_HPP_TARGET_OPENCL_VERSION 110
3636
#define CL_HPP_MINIMUM_OPENCL_VERSION 110
37+
#pragma GCC diagnostic push
38+
#pragma GCC diagnostic ignored "-Weffc++"
39+
#pragma GCC diagnostic ignored "-Wignored-qualifiers"
3740
#include <CL/cl2.hpp>
41+
#pragma GCC diagnostic pop
3842

3943
namespace cl
4044
{
@@ -78,6 +82,7 @@ class CLSymbols final
7882
#define DECLARE_FUNCTION_PTR(func_name) \
7983
std::function<decltype(func_name)> func_name##_ptr = nullptr
8084

85+
DECLARE_FUNCTION_PTR(clCreateContext);
8186
DECLARE_FUNCTION_PTR(clCreateContextFromType);
8287
DECLARE_FUNCTION_PTR(clCreateCommandQueue);
8388
DECLARE_FUNCTION_PTR(clGetContextInfo);

0 commit comments

Comments
 (0)