-
Notifications
You must be signed in to change notification settings - Fork 0
Description
I noticed that when Tensorflow is built with --config=mkl or --config=mkl_threadpool, it just uses the CPU and not the Intel GPU.
intel_gpu_top says 0% usage while running a test program.
With this patch against the tensorflow recipe:
diff --git a/third_party/mkl_dnn/mkldnn.BUILD b/third_party/mkl_dnn/mkldnn.BUILD
index 11b9b917fa0..8a395b431a8 100644
--- a/third_party/mkl_dnn/mkldnn.BUILD
+++ b/third_party/mkl_dnn/mkldnn.BUILD
@@ -19,7 +19,7 @@ template_rule(
out = "include/mkldnn_config.h",
substitutions = {
"#cmakedefine MKLDNN_CPU_BACKEND MKLDNN_BACKEND_${MKLDNN_CPU_BACKEND}": "#define MKLDNN_CPU_BACKEND MKLDNN_BACKEND_NATIVE",
- "#cmakedefine MKLDNN_GPU_BACKEND MKLDNN_BACKEND_${MKLDNN_GPU_BACKEND}": "#define MKLDNN_GPU_BACKEND MKLDNN_BACKEND_NONE",
+ "#cmakedefine MKLDNN_GPU_BACKEND MKLDNN_BACKEND_${MKLDNN_GPU_BACKEND}": "#define MKLDNN_GPU_BACKEND MKLDNN_BACKEND_OCL",
},
)
diff --git a/third_party/mkl_dnn/mkldnn_v1.BUILD b/third_party/mkl_dnn/mkldnn_v1.BUILD
index f88d50dfc19..cc3f4376941 100644
--- a/third_party/mkl_dnn/mkldnn_v1.BUILD
+++ b/third_party/mkl_dnn/mkldnn_v1.BUILD
@@ -21,13 +21,13 @@ load(
_DNNL_RUNTIME_OMP = {
"#cmakedefine DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_${DNNL_CPU_THREADING_RUNTIME}": "#define DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_OMP",
"#cmakedefine DNNL_CPU_RUNTIME DNNL_RUNTIME_${DNNL_CPU_RUNTIME}": "#define DNNL_CPU_RUNTIME DNNL_RUNTIME_OMP",
- "#cmakedefine DNNL_GPU_RUNTIME DNNL_RUNTIME_${DNNL_GPU_RUNTIME}": "#define DNNL_GPU_RUNTIME DNNL_RUNTIME_NONE",
+ "#cmakedefine DNNL_GPU_RUNTIME DNNL_RUNTIME_${DNNL_GPU_RUNTIME}": "#define DNNL_GPU_RUNTIME DNNL_RUNTIME_OCL",
}
_DNNL_RUNTIME_THREADPOOL = {
"#cmakedefine DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_${DNNL_CPU_THREADING_RUNTIME}": "#define DNNL_CPU_THREADING_RUNTIME DNNL_RUNTIME_THREADPOOL",
"#cmakedefine DNNL_CPU_RUNTIME DNNL_RUNTIME_${DNNL_CPU_RUNTIME}": "#define DNNL_CPU_RUNTIME DNNL_RUNTIME_THREADPOOL",
- "#cmakedefine DNNL_GPU_RUNTIME DNNL_RUNTIME_${DNNL_GPU_RUNTIME}": "#define DNNL_GPU_RUNTIME DNNL_RUNTIME_NONE",
+ "#cmakedefine DNNL_GPU_RUNTIME DNNL_RUNTIME_${DNNL_GPU_RUNTIME}": "#define DNNL_GPU_RUNTIME DNNL_RUNTIME_OCL",
}
template_rule(
the build eventually fails with:
/home/zozo/sicom-manifest-3.3/tmp-sicom-glibc/hosttools/gcc @bazel-out/host/bin/tensorflow/python/gen_tensor_forest_ops_py_wrappers_cc-2.params)
Execution platform: @local_execution_config_platform//:platform
bazel-out/host/bin/_solib_k8/_U_S_Stensorflow_Spython_Cgen_Utensor_Uforest_Uops_Upy_Uwrappers_Ucc___Utensorflow/libtensorflow_framework.so.2: error: undefined reference to 'dnnl::impl::gpu::ocl::ocl_gpu_device_info_t::get_llc_ca
che_size() const'
bazel-out/host/bin/_solib_k8/_U_S_Stensorflow_Spython_Cgen_Utensor_Uforest_Uops_Upy_Uwrappers_Ucc___Utensorflow/libtensorflow_framework.so.2: error: undefined reference to 'dnnl::impl::gpu::ocl::get_ocl_devices(std::vector<_cl_d
evice_id*, std::allocator<_cl_device_id*> >*, unsigned long)'
bazel-out/host/bin/_solib_k8/_U_S_Stensorflow_Spython_Cgen_Utensor_Uforest_Uops_Upy_Uwrappers_Ucc___Utensorflow/libtensorflow_framework.so.2: error: undefined reference to 'dnnl::impl::gpu::ocl::ocl_gpu_engine_t::init()'
bazel-out/host/bin/_solib_k8/_U_S_Stensorflow_Spython_Cgen_Utensor_Uforest_Uops_Upy_Uwrappers_Ucc___Utensorflow/libtensorflow_framework.so.2: error: undefined reference to 'vtable for dnnl::impl::gpu::ocl::ocl_gpu_engine_t'
/home/zozo/sicom-manifest-3.3/tmp-sicom-glibc/hosttools/ld.gold: the vtable symbol may be undefined because the class is missing its key function
bazel-out/host/bin/_solib_k8/_U_S_Stensorflow_Spython_Cgen_Utensor_Uforest_Uops_Upy_Uwrappers_Ucc___Utensorflow/libtensorflow_framework.so.2: error: undefined reference to 'clReleaseContext'
bazel-out/host/bin/_solib_k8/_U_S_Stensorflow_Spython_Cgen_Utensor_Uforest_Uops_Upy_Uwrappers_Ucc___Utensorflow/libtensorflow_framework.so.2: error: undefined reference to 'clGetDeviceInfo'
collect2: error: ld returned 1 exit status
apparently because libtensorflow_framework.so.2 doesn't link with -lOpenCL
I wonder why Tensorflow doesn't use a proper build framework like autotools, CMake or meson instead of this homegrown Bazel.
Also, why does it insist on using custom downloaded and custom built dependencies instead of external ones?
Can you please add a proper patch to allow building Tensorflow with either the external oneDNN from meta-intel that allows OpenCL GPU usage to be enabled, or one that enables OpenCL internally in Tensorflow with --config=mkl?