ARM-software
diff --git a/‎README.md‎
Lines changed: 2 additions & 0 deletions b/‎README.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎SConscript‎
Lines changed: 8 additions & 3 deletions b/‎SConscript‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎SConstruct‎
Lines changed: 15 additions & 7 deletions b/‎SConstruct‎
Lines changed: 15 additions & 7 deletions
diff --git a/‎arm_compute/core/CL/CLKernelLibrary.h‎
Lines changed: 9 additions & 1 deletion b/‎arm_compute/core/CL/CLKernelLibrary.h‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎arm_compute/core/CL/CLKernels.h‎
Lines changed: 3 additions & 1 deletion b/‎arm_compute/core/CL/CLKernels.h‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎arm_compute/core/CL/OpenCL.h‎
Lines changed: 3 additions & 0 deletions b/‎arm_compute/core/CL/OpenCL.h‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h‎
Lines changed: 80 additions & 0 deletions b/‎arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h‎
Lines changed: 8 additions & 0 deletions b/‎arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h‎
Lines changed: 10 additions & 0 deletions b/‎arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h‎
Lines changed: 13 additions & 0 deletions b/‎arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h‎
Lines changed: 13 additions & 0 deletions
@@ -9,6 +9,7 @@ Related projects:
 
 Documentation available here:
 
+- [v18.01](https://arm-software.github.io/ComputeLibrary/v18.01/)
 - [v17.12](https://arm-software.github.io/ComputeLibrary/v17.12/)
 - [v17.10](https://arm-software.github.io/ComputeLibrary/v17.10/)
 - [v17.09](https://arm-software.github.io/ComputeLibrary/v17.09/)
@@ -19,6 +20,7 @@ Documentation available here:
 
 Binaries available here:
 
+- [v18.01](https://github.com/ARM-software/ComputeLibrary/releases/download/v18.01/arm_compute-v18.01-bin.tar.gz)
 - [v17.12](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.12/arm_compute-v17.12-bin.tar.gz)
 - [v17.10](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.10/arm_compute-v17.10-bin.tar.gz)
 - [v17.09](https://github.com/ARM-software/ComputeLibrary/releases/download/v17.09/arm_compute-v17.09-bin.tar.gz)
 
@@ -24,8 +24,8 @@ import os.path
 import re
 import subprocess
 
-VERSION = "v17.12"
-SONAME_VERSION="6.0.0"
+VERSION = "v18.01"
+SONAME_VERSION="7.0.0"
 
 Import('env')
 Import('vars')
@@ -175,6 +175,11 @@ if env['neon']:
     core_files += Glob('src/core/NEON/*.cpp')
     core_files += Glob('src/core/NEON/kernels/*.cpp')
 
+    # build winograd sources for either v7a / v8a
+    core_files += Glob('src/core/NEON/kernels/winograd/*.cpp')
+    core_files += Glob('src/core/NEON/kernels/winograd/transforms/*.cpp')
+    arm_compute_env.Append(CPPPATH = ["arm_compute/core/NEON/kernels/winograd/"])
+
     if env['arch'] == "armv7a":
         core_files += Glob('src/core/NEON/kernels/arm32/*.cpp')
 
@@ -235,7 +240,7 @@ if env['neon'] and env['opencl']:
     Export('arm_compute_graph_a')
 
     arm_compute_env.Append(LIBPATH = ["#build/%s/opencl-1.2-stubs" % env['build_dir']])
-    arm_compute_graph_so = build_library('arm_compute_graph', shared_graph_objects, static=False, libs = [ "arm_compute", "arm_compute_core", "OpenCL" ])
+    arm_compute_graph_so = build_library('arm_compute_graph', shared_graph_objects, static=False, libs = [ "arm_compute", "arm_compute_core"])
     Depends(arm_compute_graph_so, arm_compute_so)
     Depends(arm_compute_graph_so, opencl)
     Export('arm_compute_graph_so')
 
@@ -49,7 +49,7 @@ vars.AddVariables(
     BoolVariable("opencl", "Enable OpenCL support", True),
     BoolVariable("neon", "Enable Neon support", False),
     BoolVariable("gles_compute", "Enable OpenGL ES Compute Shader support", False),
-    BoolVariable("embed_kernels", "Embed OpenCL kernels and OpenGL ES compute shaders in library binary", False),
+    BoolVariable("embed_kernels", "Embed OpenCL kernels and OpenGL ES compute shaders in library binary", True),
     BoolVariable("set_soname", "Set the library's soname and shlibversion (requires SCons 2.4 or above)", False),
     BoolVariable("openmp", "Enable OpenMP backend", False),
     BoolVariable("cppthreads", "Enable C++11 threads backend", True),
@@ -86,7 +86,15 @@ env.Append(CXXFLAGS = ['-Wno-deprecated-declarations','-Wall','-DARCH_ARM',
 
 env.Append(CPPDEFINES = ['_GLIBCXX_USE_NANOSLEEP'])
 
-if os.environ.get('CXX', 'g++') == 'clang++':
+default_cpp_compiler = 'g++' if env['os'] != 'android' else 'clang++'
+default_c_compiler = 'gcc' if env['os'] != 'android' else 'clang'
+cpp_compiler = os.environ.get('CXX', default_cpp_compiler)
+c_compiler = os.environ.get('CC', default_c_compiler)
+
+if env['os'] == 'android' and ( cpp_compiler != 'clang++' or c_compiler != 'clang'):
+    print "WARNING: Only clang is officially supported to build the Compute Library for Android"
+
+if cpp_compiler == 'clang++':
     env.Append(CXXFLAGS = ['-Wno-format-nonliteral','-Wno-deprecated-increment-bool','-Wno-vla-extension','-Wno-mismatched-tags'])
 else:
     env.Append(CXXFLAGS = ['-Wlogical-op','-Wnoexcept','-Wstrict-null-sentinel'])
@@ -95,7 +103,7 @@ if env['cppthreads']:
     env.Append(CPPDEFINES = [('ARM_COMPUTE_CPP_SCHEDULER', 1)])
 
 if env['openmp']:
-    if os.environ.get('CXX', 'g++') == 'clang++':
+    if cpp_compiler == 'clang++':
         print "Clang does not support OpenMP. Use scheduler=cpp."
         Exit(1)
 
@@ -128,7 +136,7 @@ elif env['arch'] == 'arm64-v8a':
 elif env['arch'] == 'arm64-v8.2-a':
     env.Append(CPPDEFINES = ['ARM_COMPUTE_AARCH64_V8_2'])
 
-    if os.environ.get('CXX', 'g++') == 'clang++':
+    if cpp_compiler == 'clang++':
         env.Append(CXXFLAGS = ['-fno-integrated-as'])
 
     if env['os'] == 'linux':
@@ -147,8 +155,8 @@ elif env['arch'] == 'x86_64':
 if env['build'] == 'native':
     prefix = ""
 
-env['CC'] = prefix + os.environ.get('CC', 'gcc')
-env['CXX'] = prefix + os.environ.get('CXX', 'g++')
+env['CC'] = prefix + c_compiler
+env['CXX'] = prefix + cpp_compiler
 env['LD'] = prefix + "ld"
 env['AS'] = prefix + "as"
 env['AR'] = prefix + "ar"
@@ -161,7 +169,7 @@ if not GetOption("help"):
         print("ERROR: Compiler '%s' not found" % env['CXX'])
         Exit(1)
 
-    if os.environ.get('CXX','g++') == 'g++':
+    if cpp_compiler == 'g++':
         if env['arch'] == 'arm64-v8.2-a' and not version_at_least(compiler_ver, '6.2.1'):
             print "GCC 6.2.1 or newer is required to compile armv8.2-a code"
             Exit(1)
 
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -286,6 +286,14 @@ class CLKernelLibrary
      */
     cl::NDRange default_ndrange() const;
 
+    /** Clear the library's cache of binary programs
+     */
+    void clear_programs_cache()
+    {
+        _programs_map.clear();
+        _built_programs_map.clear();
+    }
+
 private:
     /** Load program and its dependencies.
      *
 
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,6 +42,7 @@
 #include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
 #include "arm_compute/core/CL/kernels/CLColorConvertKernel.h"
 #include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
+#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
 #include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
 #include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
 #include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h"
@@ -87,6 +88,7 @@
 #include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h"
 #include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
 #include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h"
+#include "arm_compute/core/CL/kernels/CLPermuteKernel.h"
 #include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
 #include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h"
 #include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h"
 
@@ -59,6 +59,9 @@ class CLSymbols final
 #define DECLARE_FUNCTION_PTR(func_name) \
     std::function<decltype(func_name)> func_name##_ptr = nullptr
 
+    DECLARE_FUNCTION_PTR(clCreateContextFromType);
+    DECLARE_FUNCTION_PTR(clCreateCommandQueue);
+    DECLARE_FUNCTION_PTR(clGetContextInfo);
     DECLARE_FUNCTION_PTR(clBuildProgram);
     DECLARE_FUNCTION_PTR(clEnqueueNDRangeKernel);
     DECLARE_FUNCTION_PTR(clSetKernelArg);
 
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017, 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H__
+#define __ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the Deconvolution layer kernel on OpenCL.
+ */
+class CLDeconvolutionLayerUpsampleKernel : public ICLKernel
+{
+public:
+    /** Constructor */
+    CLDeconvolutionLayerUpsampleKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLDeconvolutionLayerUpsampleKernel(const CLDeconvolutionLayerUpsampleKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLDeconvolutionLayerUpsampleKernel &operator=(const CLDeconvolutionLayerUpsampleKernel &) = delete;
+    /** Default Move Constructor. */
+    CLDeconvolutionLayerUpsampleKernel(CLDeconvolutionLayerUpsampleKernel &&) = default;
+    /** Default move assignment operator. */
+    CLDeconvolutionLayerUpsampleKernel &operator=(CLDeconvolutionLayerUpsampleKernel &&) = default;
+    /** Default destructor */
+    ~CLDeconvolutionLayerUpsampleKernel() = default;
+
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  input        Source tensor. Data types supported: F32.
+     * @param[out] output       Destination tensor. Data types supported: F32. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+     * @param[in]  inner_border Top and right inner border sizes. These rows and columns will be filled with zero.
+     * @param[in]  info         Contains padding and stride information described in @ref PadStrideInfo.
+     */
+    void configure(const ICLTensor *input, ICLTensor *output, const BorderSize &inner_border, const PadStrideInfo &info);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
+     *
+     * @param[in] input        Source tensor info. Data types supported: F32.
+     * @param[in] output       Destination tensor info. Data types supported: F32. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+     * @param[in] inner_border Top and right inner border sizes. These rows and columns will be filled with zero.
+     * @param[in] info         Contains padding and stride information described in @ref PadStrideInfo.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const BorderSize &inner_border, const PadStrideInfo &info);
+
+    // Inherited methods overridden:
+    void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+    const ICLTensor *_input;
+    ICLTensor       *_output;
+    BorderSize       _inner_border;
+    PadStrideInfo    _info;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H__ */
@@ -68,6 +68,14 @@ class CLGEMMInterleave4x4Kernel : public ICLKernel
      * @param[out] output Output tensor. Data type supported: same as @p input
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMInterleave4x4Kernel
+     *
+     * @param[in] input  Input tensor info. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32
+     * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
 
     // Inherited methods overridden
     void run(const Window &window, cl::CommandQueue &queue) override;
 
@@ -61,6 +61,16 @@ class CLGEMMLowpMatrixMultiplyKernel : public ICLKernel
      * @param[in]  is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel
      */
     void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, bool is_interleaved_transposed = true);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyKernel
+     *
+     * @param[in] input0                    Input tensor info containing the interleaved Matrix A. Data type supported: QASYMM8
+     * @param[in] input1                    Input tensor info containing the transposed Matrix B. Data type supported: same as @p input0
+     * @param[in] output                    Output tensor info to store the result of matrix multiplication. Data type supported: S32
+     * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, bool is_interleaved_transposed = true);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
 
@@ -68,6 +68,19 @@ class CLGEMMLowpOffsetContributionKernel : public ICLKernel
      * @param[in]      b_offset       Offset to be added to each element of the matrix B.
      */
     void configure(ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, int32_t k, int32_t a_offset, int32_t b_offset);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel
+     *
+     * @param[in] mm_result      Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32
+     * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
+     *                           Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
+     * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
+     *                           Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
+     * @param[in] a_offset       Offset to be added to each element of the matrix A.
+     * @param[in] b_offset       Offset to be added to each element of the matrix B.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, int32_t a_offset, int32_t b_offset);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;