Skip to content

Commit 4ba87db

Browse files
author
Jenkins
committed
arm_compute v19.05
1 parent 29f6788 commit 4ba87db

File tree

9,844 files changed

+287103
-143071
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

9,844 files changed

+287103
-143071
lines changed

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
MIT License
22

3-
Copyright (c) 2017-2018 ARM Software
3+
Copyright (c) 2017-2019 ARM Software
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ Related projects:
2323

2424
Documentation available here:
2525

26-
- [v19.02](https://arm-software.github.io/ComputeLibrary/v19.02/index.xhtml)
26+
- [v19.05](https://arm-software.github.io/ComputeLibrary/v19.05/)
27+
- [v19.02](https://arm-software.github.io/ComputeLibrary/v19.02/)
2728
- [v18.11](https://arm-software.github.io/ComputeLibrary/v18.11/index.xhtml)
2829
- [v18.08](https://arm-software.github.io/ComputeLibrary/v18.08/)
2930
- [v18.05](https://arm-software.github.io/ComputeLibrary/v18.05/)
@@ -40,6 +41,8 @@ Documentation available here:
4041

4142
Binaries available here:
4243

44+
- [v19.05-linux](https://github.com/ARM-software/ComputeLibrary/releases/download/v19.05/arm_compute-v19.05-bin-linux.tar.gz)
45+
- [v19.05-android](https://github.com/ARM-software/ComputeLibrary/releases/download/v19.05/arm_compute-v19.05-bin-android.tar.gz)
4346
- [v19.02-linux](https://github.com/ARM-software/ComputeLibrary/releases/download/v19.02/arm_compute-v19.02-bin-linux.tar.gz)
4447
- [v19.02-android](https://github.com/ARM-software/ComputeLibrary/releases/download/v19.02/arm_compute-v19.02-bin-android.tar.gz)
4548
- [v18.11-linux](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.11/arm_compute-v18.11-bin-linux.tar.gz)

SConscript

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ import os.path
2424
import re
2525
import subprocess
2626

27-
VERSION = "v19.02"
28-
SONAME_VERSION="14.0.0"
27+
VERSION = "v19.05"
28+
SONAME_VERSION="15.0.0"
2929

3030
Import('env')
3131
Import('vars')
@@ -70,7 +70,7 @@ def resolve_includes(target, source, env):
7070
for i in range(len(source)):
7171
src = source[i]
7272
dst = target[i]
73-
contents = src.get_contents().splitlines()
73+
contents = src.get_contents().decode('utf-8').splitlines()
7474
entry = FileEntry(target_name=dst, file_contents=contents)
7575
files.append((os.path.basename(src.get_path()),entry))
7676

@@ -186,11 +186,13 @@ if env['openmp']:
186186
if env['opencl']:
187187
core_files += Glob('src/core/CL/*.cpp')
188188
core_files += Glob('src/core/CL/kernels/*.cpp')
189+
core_files += Glob('src/core/CL/gemm/*.cpp')
190+
core_files += Glob('src/core/CL/gemm/reshaped/*.cpp')
191+
core_files += Glob('src/core/CL/gemm/reshaped_only_rhs/*.cpp')
189192

190193
runtime_files += Glob('src/runtime/CL/*.cpp')
191194
runtime_files += Glob('src/runtime/CL/functions/*.cpp')
192195
runtime_files += Glob('src/runtime/CL/tuners/*.cpp')
193-
runtime_files += Glob('src/runtime/CL/gemm_reshaped/*.cpp')
194196

195197
graph_files += Glob('src/graph/backends/CL/*.cpp')
196198

@@ -205,7 +207,7 @@ if env['neon']:
205207
# build winograd sources for either v7a / v8a
206208
core_files += Glob('src/core/NEON/kernels/convolution/*/*.cpp')
207209
core_files += Glob('src/core/NEON/kernels/convolution/winograd/*/*.cpp')
208-
arm_compute_env.Append(CPPPATH = ["arm_compute/core/NEON/kernels/winograd/", "arm_compute/core/NEON/kernels/assembly/"])
210+
arm_compute_env.Append(CPPPATH = ["arm_compute/core/NEON/kernels/convolution/winograd/","arm_compute/core/NEON/kernels/convolution/common/" , "arm_compute/core/NEON/kernels/assembly/"])
209211

210212
graph_files += Glob('src/graph/backends/NEON/*.cpp')
211213

SConstruct

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ elif env['arch'] == 'x86_32':
204204
env.Append(CCFLAGS = ['-m32'])
205205
env.Append(LINKFLAGS = ['-m32'])
206206
elif env['arch'] == 'x86_64':
207+
env.Append(CXXFLAGS = ['-fPIC'])
207208
env.Append(CCFLAGS = ['-m64'])
208209
env.Append(LINKFLAGS = ['-m64'])
209210

@@ -296,6 +297,9 @@ env.Append(LINKFLAGS = env['extra_link_flags'])
296297

297298
Default( install_include("arm_compute"))
298299
Default( install_include("support"))
300+
Default( install_include("utils"))
301+
for dirname in os.listdir("./include"):
302+
Default( install_include("include/%s" % dirname))
299303

300304
Export('version_at_least')
301305

arm_compute/core/CL/CLHelpers.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016-2018 ARM Limited.
2+
* Copyright (c) 2016-2019 ARM Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -144,5 +144,14 @@ bool cl_winograd_convolution_layer_supported(const Size2D &output_tile, const Si
144144
* @return preferred vector width
145145
*/
146146
size_t preferred_vector_width(const cl::Device &device, DataType dt);
147+
148+
/** Helper function to check if "dummy work-items" are preferred to have a power of two NDRange
149+
* In case dummy work-items is enabled, it is OpenCL kernel responsibility to check if the work-item is out-of range or not
150+
*
151+
* @param[in] device A CL device
152+
*
153+
* @return True if dummy work-items should be preferred to dispatch the NDRange
154+
*/
155+
bool preferred_dummy_work_items_support(const cl::Device &device);
147156
}
148157
#endif /* __ARM_COMPUTE_CLHELPERS_H__ */

arm_compute/core/CL/CLKernelLibrary.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016-2018 ARM Limited.
2+
* Copyright (c) 2016-2019 ARM Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -333,7 +333,7 @@ class CLKernelLibrary
333333
* @param[in] built_program_name Name of the program
334334
* @param[in] program Built program to add to the cache
335335
*/
336-
void add_built_program(const std::string &built_program_name, cl::Program program);
336+
void add_built_program(const std::string &built_program_name, const cl::Program &program);
337337

338338
/** Returns true if FP16 is supported by the CL device
339339
*

arm_compute/core/CL/CLKernels.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@
4646
#include "arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h"
4747
#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
4848
#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
49+
#include "arm_compute/core/CL/kernels/CLCropKernel.h"
4950
#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
51+
#include "arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
5052
#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
5153
#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
5254
#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
@@ -63,6 +65,9 @@
6365
#include "arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h"
6466
#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
6567
#include "arm_compute/core/CL/kernels/CLErodeKernel.h"
68+
#include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h"
69+
#include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h"
70+
#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h"
6671
#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h"
6772
#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
6873
#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h"
@@ -71,6 +76,7 @@
7176
#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
7277
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h"
7378
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
79+
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
7480
#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
7581
#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
7682
#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
@@ -81,6 +87,7 @@
8187
#include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h"
8288
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
8389
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
90+
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
8491
#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
8592
#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
8693
#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
@@ -93,6 +100,7 @@
93100
#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
94101
#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h"
95102
#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
103+
#include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
96104
#include "arm_compute/core/CL/kernels/CLHistogramKernel.h"
97105
#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
98106
#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h"
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Copyright (c) 2019 ARM Limited.
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to
8+
* deal in the Software without restriction, including without limitation the
9+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10+
* sell copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*/
24+
#ifndef __ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H__
25+
#define __ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H__
26+
27+
#include "arm_compute/core/GPUTarget.h"
28+
#include "arm_compute/core/Types.h"
29+
30+
namespace arm_compute
31+
{
32+
/** Basic interface for the GEMM kernel configuration */
33+
class ICLGEMMKernelConfiguration
34+
{
35+
public:
36+
/** Constructor
37+
*
38+
* @param[in] arch GPU target
39+
*/
40+
ICLGEMMKernelConfiguration(GPUTarget arch)
41+
: _target(arch)
42+
{
43+
}
44+
/** Prevent instances of this class from being copied (As this class contains pointers) */
45+
ICLGEMMKernelConfiguration(const ICLGEMMKernelConfiguration &) = delete;
46+
/** Prevent instances of this class from being copied (As this class contains pointers) */
47+
ICLGEMMKernelConfiguration &operator=(const ICLGEMMKernelConfiguration &) = delete;
48+
/** Default Move Constructor. */
49+
ICLGEMMKernelConfiguration(ICLGEMMKernelConfiguration &&) = default;
50+
/** Default move assignment operator */
51+
ICLGEMMKernelConfiguration &operator=(ICLGEMMKernelConfiguration &&) = default;
52+
/** Virtual destructor */
53+
virtual ~ICLGEMMKernelConfiguration() = default;
54+
/** Given M, N, K and B, this method returns the @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo to be used
55+
*
56+
* @param[in] m Number of rows LHS matrix
57+
* @param[in] n Number of columns RHS matrix
58+
* @param[in] k Number of columns LHS matrix or number of rows RHS matrix
59+
* @param[in] b Batch size
60+
* @param[in] data_type Data type
61+
*/
62+
virtual std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) = 0;
63+
64+
protected:
65+
GPUTarget _target;
66+
};
67+
} // namespace arm_compute
68+
#endif /*__ARM_COMPUTE_ICLGEMMKERNELCONFIGURATION_H__ */

arm_compute/core/CL/ICLKernel.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016-2018 ARM Limited.
2+
* Copyright (c) 2016-2019 ARM Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -308,14 +308,16 @@ class ICLKernel : public IKernel
308308
*
309309
* @note If kernel->kernel() is empty then the function will return without adding anything to the queue.
310310
*
311-
* @param[in,out] queue OpenCL command queue.
312-
* @param[in] kernel Kernel to enqueue
313-
* @param[in] window Window the kernel has to process.
314-
* @param[in] lws_hint Local workgroup size requested. Default is based on the device target.
311+
* @param[in,out] queue OpenCL command queue.
312+
* @param[in] kernel Kernel to enqueue
313+
* @param[in] window Window the kernel has to process.
314+
* @param[in] lws_hint (Optional) Local workgroup size requested. Default is based on the device target.
315+
* @param[in] use_dummy_work_items (Optional) Use dummy work items in order to have two dimensional power of two NDRange. Default is false
316+
* Note: it is kernel responsibility to check if the work-item is out-of-range
315317
*
316318
* @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed.
317319
*/
318-
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange());
320+
void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items = false);
319321

320322
/** Add the passed array's parameters to the object's kernel's arguments starting from the index idx.
321323
*

arm_compute/core/CL/OpenCL.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016-2018 ARM Limited.
2+
* Copyright (c) 2016-2019 ARM Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -131,6 +131,9 @@ class CLSymbols final
131131
DECLARE_FUNCTION_PTR(clEnqueueMarker);
132132
DECLARE_FUNCTION_PTR(clWaitForEvents);
133133

134+
// Third-party extensions
135+
DECLARE_FUNCTION_PTR(clImportMemoryARM);
136+
134137
#undef DECLARE_FUNCTION_PTR
135138

136139
private:

0 commit comments

Comments
 (0)