diff --git a/CMakeLists.txt b/CMakeLists.txt
index 62b99d136..144c8b343 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -112,7 +112,7 @@ set(SOURCES
   src/cpu/backend.cc
   src/cpu/cpu_info.cc
   src/cpu/cpu_isa.cc
-  src/cpu/kernels.cc
+  #src/cpu/kernels.cc
   src/cpu/parallel.cc
   src/cpu/primitives.cc
   src/decoding.cc
@@ -242,7 +242,10 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "(arm64)|(aarch64)"
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(amd64)|(AMD64)")
   add_definitions(-DCT2_X86_BUILD)
   set(CT2_BUILD_ARCH "x86_64")
-
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(ppc64le)|(PPC64LE)")
+  add_definitions(-DCT2_PPC64LE_BUILD)
+  set(CT2_BUILD_ARCH "ppc64le")
+  
   if(BUILD_SHARED_LIBS)
     set(CMAKE_POSITION_INDEPENDENT_CODE ON)
   endif()
@@ -269,6 +272,8 @@ if(ENABLE_CPU_DISPATCH)
     endif()
   elseif(CT2_BUILD_ARCH STREQUAL "arm64")
     ct2_compile_kernels_for_isa(neon "-DUSE_NEON")
+   elseif(CT2_BUILD_ARCH STREQUAL "ppc64le")
+    ct2_compile_kernels_for_isa(ppc64le "-mcpu=power10 -O3 -flto")
   endif()
 endif()
 
diff --git a/docker/Dockerfile.ppc64le b/docker/Dockerfile.ppc64le
new file mode 100644
index 000000000..e608d359a
--- /dev/null
+++ b/docker/Dockerfile.ppc64le
@@ -0,0 +1,91 @@
+FROM ppc64le/ubuntu:22.04 as builder
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        python3-dev \
+        python3-pip \
+        wget \
+	git \
+	build-essential \
+        && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* 
+
+
+WORKDIR /root
+
+RUN python3 -m pip --no-cache-dir install cmake==3.22.*
+
+RUN wget -qO- https://public.dhe.ibm.com/software/server/POWER/Linux/toolchain/at/ubuntu/dists/jammy/615d762f.gpg.key |  tee /etc/apt/trusted.gpg.d/615d762f.asc && \
+    echo "deb [signed-by=/etc/apt/trusted.gpg.d/615d762f.asc] https://public.dhe.ibm.com/software/server/POWER/Linux/toolchain/at/ubuntu jammy at17.0" >> /etc/apt/sources.list && \ 
+    cat /etc/apt/sources.list && \
+    cat /etc/apt/trusted.gpg.d/615d762f.asc &&  \
+    apt update && \
+    cat /etc/apt/sources.list && \
+    apt install -y advance-toolchain-at17.0-runtime advance-toolchain-at17.0-devel advance-toolchain-at17.0-perf advance-toolchain-at17.0-mcore-libs
+
+ENV SLEEF_VERSION=3.6.1    
+RUN wget -q https://github.com/shibatch/sleef/archive/refs/tags/${SLEEF_VERSION}.tar.gz  && \
+    tar xf *.tar.gz && \
+    rm *.tar.gz && \
+    cd sleef* && \
+    mkdir build && \
+    cd build && \
+    cmake -DSLEEF_BUILD_INLINE_HEADERS=TRUE  -DCMAKE_CXX_FLAGS='-mcpu=power10 -mtune=power10 -O3 -std=gnu++11 -maltivec -mabi=altivec -mstrict-align ' -DCMAKE_C_COMPILER=/opt/at17.0/bin/gcc -DCMAKE_CXX_COMPILER=/opt/at17.0/bin/g++  -DAT_PATH=/opt/at17.0/ -DBUILD_SHARED_LIBS=FALSE -DBUILD_TESTS=FALSE -DENFORCE_VSX3=TRUE -DSLEEF_SHOW_CONFIG=1 -DCMAKE_BUILD_TYPE=Release   .. && \
+    cd .. && \
+    cmake --build build -j --clean-first && \
+    cmake --install build --prefix=/usr/
+
+
+ENV ONEDNN_VERSION=3.1.1
+RUN wget -q https://github.com/oneapi-src/oneDNN/archive/refs/tags/v${ONEDNN_VERSION}.tar.gz && \
+    tar xf *.tar.gz && \
+    rm *.tar.gz && \
+    cd oneDNN-* && \
+    cmake -DCMAKE_BUILD_TYPE=Release -DONEDNN_LIBRARY_TYPE=STATIC -DONEDNN_BUILD_EXAMPLES=OFF -DONEDNN_BUILD_TESTS=OFF -DONEDNN_ENABLE_WORKLOAD=INFERENCE -DONEDNN_ENABLE_PRIMITIVE="CONVOLUTION;REORDER" -DONEDNN_BUILD_GRAPH=OFF  -DCMAKE_CXX_FLAGS='-mcpu=power10 -mtune=power10 -O3 -maltivec' -DOPENMP_RUNTIME=COMP . && \
+    make -j$(nproc) install && \
+    cd .. && \
+    rm -r oneDNN-*
+
+COPY third_party third_party
+COPY cli cli
+COPY include include
+COPY src src
+COPY cmake cmake
+COPY python python
+COPY CMakeLists.txt .
+
+ARG CXX_FLAGS
+ENV CXX_FLAGS=${CXX_FLAGS:-"-mcpu=power10 -mtune=power10 -O3 -ffp-contract=off"}
+
+ENV CTRANSLATE2_ROOT=/opt/ctranslate2
+
+RUN mkdir build && \
+    cd build && \
+    cmake -DCMAKE_INSTALL_PREFIX=${CTRANSLATE2_ROOT} \
+          -DWITH_CUDA=OFF -DWITH_MKL=OFF -DWITH_OPENBLAS=OFF \
+          -DWITH_DNNL=ON -DOPENMP_RUNTIME=COMP \
+	  -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
+	  -DCMAKE_BUILD_TYPE=Release \
+	  .. && \
+    VERBOSE=1 make -j$(nproc) install
+
+ENV LANG=en_US.UTF-8
+COPY README.md .
+
+RUN cd python && \
+    python3 -m pip --no-cache-dir install -r install_requirements.txt && \
+    python3 setup-ppc64le.py bdist_wheel --dist-dir $CTRANSLATE2_ROOT
+
+
+ENV CTRANSLATE2_ROOT=/opt/ctranslate2
+ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CTRANSLATE2_ROOT/lib
+
+#COPY --from=builder $CTRANSLATE2_ROOT $CTRANSLATE2_ROOT
+RUN pip3 install --force-reinstall ninja
+
+
+RUN python3 -m pip --no-cache-dir install $CTRANSLATE2_ROOT/*.whl && \
+    rm $CTRANSLATE2_ROOT/*.whl
+
+ENTRYPOINT ["/opt/ctranslate2/bin/ct2-translator"]
diff --git a/docs/ppc64le.md b/docs/ppc64le.md
new file mode 100644
index 000000000..4828aad2b
--- /dev/null
+++ b/docs/ppc64le.md
@@ -0,0 +1,56 @@
+# IBM Power10 -ppc64le
+
+CTranslate2 fully supports IBM Power10 MMA and VSX extensions. Each Power10 core has 4 Matrix Math Accelerator units. For optimum performance use at least SMT4, in some cases SMT8 seems to perform better, but it is advicable to try out both. A simple way to test this is to use --intra_threads parameter to control the number of threads CTranslate2 is executing. At maximum this should be 8*number of physical cores (SMT-8).
+
+Based on preliminary testing Power10 core offer 27-42% higher tokens/s compared to Intel Gold Core.
+
+It should be possible to build for Power9, but missing MMA units will have significant impact on performance.
+
+OneDNN is used for int8 matrix math that is fully utilizing MMA units, it should be possible to build with OpenBLAS for 16bit MMA usage.
+
+## Build docker / podman container
+
+This is the easy way:
+```git clone --recursive https://github.com/OpenNMT/CTranslate2/
+cd CTranslate2/docker
+podman build  -t elinar.ai/ct2-ppc64le -f Dockerfile.ppc64le ..
+
+```
+
+Then run CTranslate2 container (substitue mount point, MODEL_LOCATION and SRC_FILE):
+```podman run  --security-opt=label=disable  --ipc=host --ulimit=host -it --rm -v /tmp:/tmp  elinar.ai/ct2-ppc64le --model MODEL_LOCATION --src SRC_FILE --intra_threads 16```
+
+## Install from sources
+This build has been tested on RHEL 9 / ppc64le and requires IBM Advance Toolchain 17.0 ( https://www.ibm.com/support/pages/advance-toolchain-linux-power )
+```
+#sleef:
+git clone -b 3.6.1 https://github.com/shibatch/sleef
+
+cd sleef
+mkdir build && cd build
+cmake -DSLEEF_BUILD_INLINE_HEADERS=TRUE  -DCMAKE_CXX_FLAGS='-mcpu=power10 -mtune=power10 -O3 -std=gnu++11 -maltivec -mabi=altivec -mstrict-align ' -DCMAKE_C_COMPILER=/opt/at17.0/bin/gcc -DCMAKE_CXX_COMPILER=/opt/at17.0/bin/g++  -DAT_PATH=/opt/at17.0/ -DBUILD_SHARED_LIBS=FALSE -DBUILD_TESTS=FALSE -DENFORCE_VSX3=TRUE -DSLEEF_SHOW_CONFIG=1 -DCMAKE_BUILD_TYPE=Release   ..
+
+cmake --build build -j --clean-first
+sudo cmake --install build --prefix=/usr/
+
+
+#OneDNN;
+git clone  -b v3.2 --recursive https://github.com/oneapi-src/oneDNN
+cd oneDNN
+mkdir build && cd build
+cmake -DCMAKE_CXX_FLAGS='-mcpu=power10 -mtune=power10 -O3 -maltivec' -DOPENMP_RUNTIME=COMP  ..
+make -j16
+sudo make install
+
+
+git clone --recursive https://github.com/Dagamies/CTranslate2
+cd CTranslate2
+mkdir build
+cd build
+cmake -DWITH_CUDA=OFF -DWITH_MKL=OFF -DWITH_OPENBLAS=OFF -DWITH_DNNL=ON -DCMAKE_CXX_FLAGS='-mcpu=power10 -mtune=power10 -O3 -ffp-contract=off' -DOPENMP_RUNTIME=COMP ..
+make -j16
+sudo make install
+sudo ldconfig -v
+export LD_LIBRARY_PATH=/usr/local/lib64/
+
+```
\ No newline at end of file
diff --git a/python/setup-ppc64le.py b/python/setup-ppc64le.py
new file mode 100644
index 000000000..51a21fd43
--- /dev/null
+++ b/python/setup-ppc64le.py
@@ -0,0 +1,126 @@
+import glob
+import os
+import sys
+
+import pybind11
+
+from pybind11.setup_helpers import ParallelCompile
+from setuptools import Extension, find_packages, setup
+
+base_dir = os.path.dirname(os.path.abspath(__file__))
+include_dirs = [pybind11.get_include()]
+library_dirs = []
+
+
+def _get_long_description():
+    readme_path = os.path.join(base_dir, "README.md")
+    if not os.path.exists(readme_path):
+        return ""
+    with open(readme_path, encoding="utf-8") as readme_file:
+        return readme_file.read()
+
+
+def _get_project_version():
+    version_path = os.path.join(base_dir, "ctranslate2", "version.py")
+    version = {}
+    with open(version_path, encoding="utf-8") as fp:
+        exec(fp.read(), version)
+    return version["__version__"]
+
+
+def _maybe_add_library_root(lib_name):
+    if "%s_ROOT" % lib_name in os.environ:
+        root = os.environ["%s_ROOT" % lib_name]
+        include_dirs.append("%s/include" % root)
+        for lib_dir in ("lib", "lib64"):
+            path = "%s/%s" % (root, lib_dir)
+            if os.path.exists(path):
+                library_dirs.append(path)
+                break
+
+
+_maybe_add_library_root("CTRANSLATE2")
+
+cflags = ["-std=c++17", "-fvisibility=hidden"]
+ldflags = []
+package_data = {}
+if sys.platform == "darwin":
+    # std::visit requires macOS 10.14
+    cflags.append("-mmacosx-version-min=10.14")
+    ldflags.append("-Wl,-rpath,/usr/local/lib")
+elif sys.platform == "win32":
+    cflags = ["/std:c++17", "/d2FH4-"]
+    package_data["ctranslate2"] = ["*.dll"]
+
+ctranslate2_module = Extension(
+    "ctranslate2._ext",
+    sources=glob.glob(os.path.join("cpp", "*.cc")),
+    extra_compile_args=cflags,
+    extra_link_args=ldflags,
+    include_dirs=include_dirs,
+    library_dirs=library_dirs,
+    libraries=["ctranslate2"],
+)
+
+ParallelCompile("CMAKE_BUILD_PARALLEL_LEVEL").install()
+
+setup(
+    name="ctranslate2",
+    version=_get_project_version(),
+    license="MIT",
+    description="Fast inference engine for Transformer models",
+    long_description=_get_long_description(),
+    long_description_content_type="text/markdown",
+    author="OpenNMT",
+    url="https://opennmt.net",
+    classifiers=[
+        "Development Status :: 5 - Production/Stable",
+        "Environment :: GPU :: NVIDIA CUDA :: 11.0",
+        "Environment :: GPU :: NVIDIA CUDA :: 11.1",
+        "Environment :: GPU :: NVIDIA CUDA :: 11.2",
+        "Environment :: GPU :: NVIDIA CUDA :: 11.3",
+        "Environment :: GPU :: NVIDIA CUDA :: 11.4",
+        "Environment :: GPU :: NVIDIA CUDA :: 11.5",
+        "Environment :: GPU :: NVIDIA CUDA :: 11.6",
+        "Environment :: GPU :: NVIDIA CUDA :: 11.7",
+        "Environment :: GPU :: NVIDIA CUDA :: 11.8",
+        "Intended Audience :: Developers",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: MIT License",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3 :: Only",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    ],
+    project_urls={
+        "Documentation": "https://opennmt.net/CTranslate2",
+        "Forum": "https://forum.opennmt.net",
+        "Gitter": "https://gitter.im/OpenNMT/CTranslate2",
+        "Source": "https://github.com/OpenNMT/CTranslate2",
+    },
+    keywords="opennmt nmt neural machine translation cuda mkl inference quantization",
+    packages=find_packages(exclude=["bin"]),
+    package_data=package_data,
+    ext_modules=[ctranslate2_module],
+    python_requires=">=3.8",
+    install_requires=[
+        "setuptools",
+        "numpy==1.25.2",
+        "pyyaml>=5.3,<7",
+    ],
+    entry_points={
+        "console_scripts": [
+            "ct2-fairseq-converter=ctranslate2.converters.fairseq:main",
+            "ct2-marian-converter=ctranslate2.converters.marian:main",
+            "ct2-openai-gpt2-converter=ctranslate2.converters.openai_gpt2:main",
+            "ct2-opennmt-py-converter=ctranslate2.converters.opennmt_py:main",
+            "ct2-opennmt-tf-converter=ctranslate2.converters.opennmt_tf:main",
+            "ct2-opus-mt-converter=ctranslate2.converters.opus_mt:main",
+            "ct2-transformers-converter=ctranslate2.converters.transformers:main",
+        ],
+    },
+)
diff --git a/src/cpu/cpu_info.cc b/src/cpu/cpu_info.cc
index 9030ac7a4..c320dae71 100644
--- a/src/cpu/cpu_info.cc
+++ b/src/cpu/cpu_info.cc
@@ -58,4 +58,20 @@ namespace ctranslate2 {
   }
 }
 
+#elif defined(CT2_PPC64LE_BUILD)
+
+namespace ctranslate2 {
+  namespace cpu {
+
+    const char* cpu_vendor() {
+      return "POWER";
+    }
+
+    bool cpu_supports_power10() {
+      return true;
+    }
+
+  }
+}
+
 #endif
diff --git a/src/cpu/cpu_info.h b/src/cpu/cpu_info.h
index c2951bcc0..0c696805a 100644
--- a/src/cpu/cpu_info.h
+++ b/src/cpu/cpu_info.h
@@ -14,6 +14,8 @@ namespace ctranslate2 {
     bool cpu_supports_avx512();
 #elif defined(CT2_ARM64_BUILD)
     bool cpu_supports_neon();
+#elif defined(CT2_PPC64LE_BUILD)
+    bool cpu_supports_power10();
 #endif
 
   }
diff --git a/src/cpu/cpu_isa.cc b/src/cpu/cpu_isa.cc
index c16aeda22..c84c2a669 100644
--- a/src/cpu/cpu_isa.cc
+++ b/src/cpu/cpu_isa.cc
@@ -35,7 +35,11 @@ namespace ctranslate2 {
 #elif defined(CT2_ARM64_BUILD)
       case CpuIsa::NEON:
         return "NEON";
+#elif defined(CT2_PPC64LE_BUILD)
+      case CpuIsa::POWER10:
+        return "POWER10";
 #endif
+
       default:
         return "GENERIC";
       }
@@ -54,6 +58,9 @@ namespace ctranslate2 {
 #elif defined(CT2_ARM64_BUILD)
         if (env_isa == "NEON")
           return try_isa(env_isa, CpuIsa::NEON, cpu_supports_neon());
+#elif defined(CT2_PPC64LE_BUILD)
+        if (env_isa == "POWER10")
+          return try_isa(env_isa, CpuIsa::POWER10, cpu_supports_power10());
 #endif
         if (env_isa == "GENERIC")
           return CpuIsa::GENERIC;
@@ -71,6 +78,9 @@ namespace ctranslate2 {
 #  elif defined(CT2_ARM64_BUILD)
       if (cpu_supports_neon())
         return CpuIsa::NEON;
+#  elif defined(CT2_PPC64LE_BUILD)
+      if (cpu_supports_power10())
+        return CpuIsa::POWER10;
 #  endif
 #endif
 
diff --git a/src/cpu/cpu_isa.h b/src/cpu/cpu_isa.h
index 4f42bdf26..b32379c7b 100644
--- a/src/cpu/cpu_isa.h
+++ b/src/cpu/cpu_isa.h
@@ -6,13 +6,15 @@ namespace ctranslate2 {
   namespace cpu {
 
     enum class CpuIsa {
-      GENERIC,
+      GENERIC,POWER10,
 #if defined(CT2_X86_BUILD)
       AVX,
       AVX2,
       AVX512,
 #elif defined(CT2_ARM64_BUILD)
       NEON,
+      /*#elif defined(CT2_PPC64LE_BUILD)
+	POWER10,*/
 #endif
     };
 
@@ -54,6 +56,11 @@ namespace ctranslate2 {
     CPU_ISA_CASE(cpu::CpuIsa::NEON, SINGLE_ARG(STMTS))        \
     CPU_ISA_DEFAULT(cpu::CpuIsa::GENERIC, SINGLE_ARG(STMTS))  \
   }
+#elif defined(CT2_PPC64LE_BUILD)
+#  define CPU_ISA_DISPATCH(STMTS)                             \
+  switch (cpu::get_cpu_isa()) {                               \
+    CPU_ISA_DEFAULT(cpu::CpuIsa::POWER10, SINGLE_ARG(STMTS))  \
+  }
 #endif
 #elif defined(__AVX512F__)
 #  define CPU_ISA_DISPATCH(STMTS)                             \
diff --git a/src/cpu/kernels.cc b/src/cpu/kernels.cc
index c1f48553d..cf186a74d 100644
--- a/src/cpu/kernels.cc
+++ b/src/cpu/kernels.cc
@@ -1,10 +1,13 @@
 #include "cpu/kernels.h"
-
+//#include "cpu/cpu_isa.h"
 #include <limits>
 
 #if defined(__AVX512F__)
 #  define TARGET_ISA CpuIsa::AVX512
 #  include "cpu/vec_avx512.h"
+#elif defined(CT2_PPC64LE_BUILD)
+#  define TARGET_ISA CpuIsa::POWER10
+#  include "cpu/vec_power10.h"
 #elif defined(__AVX2__)
 #  define TARGET_ISA CpuIsa::AVX2
 #  include "cpu/vec_avx.h"
@@ -14,6 +17,9 @@
 #elif (defined(__ARM_NEON) && !defined(CT2_WITH_CPU_DISPATCH)) || defined(USE_NEON)
 #  define TARGET_ISA CpuIsa::NEON
 #  include "cpu/vec_neon.h"
+//#elif defined(CT2_PPC64LE_BUILD)
+//#  define TARGET_ISA CpuIsa::GENERIC
+//#  include "cpu/vec_power10.h"
 #else
 #  define TARGET_ISA CpuIsa::GENERIC
 #  include "cpu/vec.h"
diff --git a/src/cpu/vec_power10.h b/src/cpu/vec_power10.h
new file mode 100644
index 000000000..c9c71abe3
--- /dev/null
+++ b/src/cpu/vec_power10.h
@@ -0,0 +1,236 @@
+#pragma once
+
+
+#include <algorithm>
+#include <cmath>
+#include <cstdlib>
+#include <iostream>
+#include <altivec.h>
+
+#include <sleefinline_vsx3.h>
+
+#include "vec.h"
+
+#if defined(__GNUC__) || defined(__clang__)
+#  define __ct2_align16__ __attribute__((aligned(16)))
+#else
+#  define __ct2_align16__
+#endif
+
+namespace ctranslate2 {
+  namespace cpu {
+
+    #define ALIGNMENT_VALUE     16u
+    
+    template<>
+    struct Vec<float, TARGET_ISA> {
+
+      using value_type = __ct2_align16__  __vector float;
+      using mask_type = __ct2_align16__ __vector bool int;
+      static constexpr dim_t width = 4;
+
+      static inline value_type unaligned_load(const float* ptr){
+	return (value_type){*ptr,*(ptr+1),*(ptr+2),*(ptr+3)};
+      }
+      
+
+      static inline value_type load(float value) {
+	return (value_type){value,value,value,value};
+      }
+
+      static inline value_type load(const float* ptr) {
+	return (value_type){*ptr,*(ptr+1),*(ptr+2),*(ptr+3)};
+      }
+
+      static inline value_type load(const float* ptr, dim_t count, float default_value = float(0)) {
+	if (count == width) {
+          return load(ptr);
+        } else {
+          __ct2_align16__ float tmp_values[width];
+          std::fill(tmp_values, tmp_values + width, default_value);
+          std::copy(ptr, ptr + count, tmp_values);
+          return load(tmp_values);
+        }
+      }
+
+      static inline value_type load_and_convert(const int32_t* ptr) {
+	return vec_ctf((vector signed int){*ptr,*(ptr+1),*(ptr+2),*(ptr+3)},0);
+      }
+
+      static inline value_type load_and_convert(const int32_t* ptr,
+                                                dim_t count,
+                                                int32_t default_value = 0) {
+        if (count == width) {
+          return load_and_convert(ptr);
+        } else {
+          __ct2_align16__ int32_t tmp_values[width];
+          std::fill(tmp_values, tmp_values + width, default_value);
+          std::copy(ptr, ptr + count, tmp_values);
+          return load_and_convert(tmp_values);
+        }
+      }
+      static inline void unaligned_store(value_type value, float* ptr) {
+	vec_xst(value,0,ptr);
+      }
+
+      static inline void store(value_type value, float* ptr) {
+	if (((uintptr_t)ptr % ALIGNMENT_VALUE) != 0)
+        {
+	  unaligned_store(value,ptr);
+        } else
+	  vec_st(value,0,ptr);
+      }
+
+      static inline void store(value_type value, float* ptr, dim_t count) {
+        if (count == width) {
+	  store(value,ptr);
+        } else {
+          __ct2_align16__ float tmp_values[width];
+          store(value,tmp_values);
+          std::copy(tmp_values, tmp_values + count, ptr);
+        }
+      }
+
+      static inline value_type bit_and(value_type  a, value_type b) {
+	return vec_and(a,b);
+      }
+
+      static inline value_type bit_xor(value_type a, value_type b) {
+        return vec_xor(a,b);
+      }
+
+      static inline mask_type lt(value_type a, value_type b) {
+        return vec_cmplt(a,b);
+      }
+
+      static inline value_type select(mask_type mask, value_type a, value_type b) {
+	return vec_sel(a,b,mask);
+      }
+
+      static inline value_type abs(value_type a) {
+        return vec_abs(a);
+      }
+
+      static inline value_type neg(value_type a) {
+        return vec_neg(a);
+      }
+
+      static inline value_type rcp(value_type a) {
+        return vec_re(a);
+      }
+
+      static inline value_type exp(value_type a) {
+	return Sleef_expf4_u10vsx3(a);	 
+      }
+
+      static inline value_type log(value_type a) {
+         return Sleef_logf4_u35vsx3(a);
+
+      }
+      static inline value_type sin(value_type a) {
+        return Sleef_sinf4_u35vsx3(a);
+      }
+
+      static inline value_type cos(value_type a) {
+        return Sleef_cosf4_u35vsx3(a);
+
+      }
+
+      static inline value_type tanh(value_type a) {
+	return Sleef_tanhf4_u35vsx3(a);
+
+      }
+
+      static inline value_type erf(value_type a) {
+        return Sleef_erff4_u10vsx3(a);
+      }
+
+      static inline value_type max(value_type a, value_type b) {
+        return vec_max(a, b);
+      }
+
+      static inline value_type min(value_type a, value_type b) {
+        return vec_min(a, b);
+      }
+
+      static inline value_type add(value_type a, value_type b) {
+        return vec_add(a,b);
+      }
+
+      static inline value_type sub(value_type a, value_type b) {
+        return vec_sub(a,b);
+      }
+
+      static inline value_type mul(value_type a, value_type b) {
+        return vec_mul(a,b);
+      }
+
+      static inline value_type div(value_type a, value_type b) {
+        return vec_div(a,b);
+      }
+
+      static inline value_type mul_add(value_type a, value_type b, value_type c) {
+        
+	return vec_madd(a,b,c);
+      }
+
+      static inline float reduce_add(value_type a) {
+
+
+        unsigned long __element_selector_10 = 1 & 0x03;
+        unsigned long __element_selector_32 = (1 >> 2) & 0x03;
+        unsigned long __element_selector_54 = (1 >> 4) & 0x03;
+        unsigned long __element_selector_76 = (1 >> 6) & 0x03;
+        static const unsigned int __permute_selectors[4] =
+          {
+#ifdef __LITTLE_ENDIAN__
+            0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+#else
+            0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F
+#endif
+          };
+        __vector unsigned int __t;
+        __t[0] = __permute_selectors[__element_selector_10];
+        __t[1] = __permute_selectors[__element_selector_32];
+        __t[2] = __permute_selectors[__element_selector_54] + 0x10101010;
+        __t[3] = __permute_selectors[__element_selector_76] + 0x10101010;
+
+        __vector unsigned long long v1 = vec_mergel((__vector unsigned long long)a,(__vector unsigned long long)a);
+	value_type v2 = (value_type)a + (value_type)v1;
+        value_type v3 = vec_perm (v2, v2,(__vector unsigned char) __t);
+	return  v2[0]+v3[0];
+      }
+      
+      static inline float reduce_max(value_type a) {
+	float t0 = a[0] > a[1] ? a[0] : a[1];
+        float t1 = a[2] > a[3] ? a[2] : a[3];
+	return t0 > t1 ? t0 : t1;
+      }
+
+      static inline value_type round(value_type a) {
+	return vec_round(a);
+      }
+
+      static inline void convert_and_store(value_type v, int8_t *a, dim_t count) {
+	auto i32 = vec_cts(v,0);
+	
+	int8_t tmp[4];
+	tmp[0]=i32[0];
+	tmp[1]=i32[1];
+	tmp[2]=i32[2];
+	tmp[3]=i32[3];
+	std::copy(tmp, tmp + count, a);
+      }
+
+      static inline void convert_and_store(value_type v, uint8_t *a, dim_t count) {
+	auto u32 = vec_ctu(v,0);
+        uint8_t tmp[4];
+        tmp[0]=u32[0];
+        tmp[1]=u32[1];
+        tmp[2]=u32[2];
+        tmp[3]=u32[3];
+        std::copy(tmp, tmp + count, a);	
+      }
+    };
+  }
+}
diff --git a/src/utils.cc b/src/utils.cc
index 4f8bde57c..571fbb07c 100644
--- a/src/utils.cc
+++ b/src/utils.cc
@@ -42,6 +42,10 @@ namespace ctranslate2 {
     spdlog::info("CPU: {} (NEON={})",
                  cpu::cpu_vendor(),
                  cpu::cpu_supports_neon());
+#elif defined(CT2_PPC64LE_BUILD)
+        spdlog::info("CPU: {} (NEON={})",
+                 cpu::cpu_vendor(),
+                 cpu::cpu_supports_power10());
 #endif
     spdlog::info(" - Selected ISA: {}", cpu::isa_to_str(cpu::get_cpu_isa()));
     spdlog::info(" - Use Intel MKL: {}", cpu::mayiuse_mkl());