using old patch for vec error

sandeepgupta12 · sandeepgupta12 · commit 2df04ca8a590 · 2025-08-01T10:48:38.000+05:30
diff --git a/.github/workflows/_binary-build-linux.yml b/.github/workflows/_binary-build-linux.yml
@@ -273,7 +273,7 @@ jobs:
             -w / \
             "${DOCKER_IMAGE}"
           )
-          #docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
+          docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
           if [[ ${BUILD_ENVIRONMENT} == *"aarch64"* ]]; then
             docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /pytorch/.ci/aarch64_linux/aarch64_ci_build.sh"
           else
diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_common_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_common_vsx.h
@@ -14,9 +14,9 @@
 #include <ATen/cpu/vec/vec256/vsx/vec256_qint8_vsx.h>
 #include <ATen/cpu/vec/vec256/vsx/vec256_quint8_vsx.h>
 
-#include <ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h>
-#include <ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h>
 
+#include <ATen/cpu/vec/vec256/vsx/vec256_complex_float_vsx.h>
+#include <ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h>
 
 #include <ATen/cpu/vec/vec256/vsx/vec256_bfloat16_vsx.h>
 
diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_complex_double_vsx.h
@@ -478,7 +478,7 @@ class Vectorized<ComplexDbl> {
     this->store(tmp1);
     b.store(tmp2);
 
-    for (const auto i : c10::irange(Vectorized<c10::complex<double>>::size())) {
+    for (const auto i : c10::irange(Vectorized<c10::complex<float>>::size())) {
       out[i] = tmp1[i] / tmp2[i];
     }
     return loadu(out);
diff --git a/aten/src/ATen/native/Blas.cpp b/aten/src/ATen/native/Blas.cpp
@@ -296,7 +296,7 @@ _scaled_mm_out_cpu(const Tensor& mat1, const Tensor& mat2,
           std::optional<c10::ScalarType> out_dtype,
           bool use_fast_accum,
           Tensor& out) {
-#if AT_MKLDNN_ENABLED() && !defined(__powerpc__) && !defined(__powerpc64__) && !defined(__PPC__) && !defined(__ppc__)
+#if AT_MKLDNN_ENABLED() && !defined(__powerpc__)
   if (at::globalContext().userEnabledMkldnn()) {
     bool mixed_dtype = mat1.scalar_type() != mat2.scalar_type();
     if ((!mixed_dtype && cpuinfo_has_x86_amx_int8()) ||
diff --git a/aten/src/ATen/native/mkldnn/Conv.cpp b/aten/src/ATen/native/mkldnn/Conv.cpp
@@ -161,13 +161,8 @@ static bool mkldnn_conv_enabled_fpmath_mode_bf16(){
 }
 
 static bool mkldnn_conv_enabled_fpmath_mode_tf32(){
-  #if defined(__powerpc64__)
-  // TF32 is not supported or relevant on Power — skip
-  return false;
-  #else
-    return at::globalContext().float32Precision("mkldnn", "conv") == "tf32" &&
-          cpuinfo_has_x86_amx_fp16(); // or whatever the original intent was
-  #endif
+  return at::globalContext().float32Precision("mkldnn", "conv") == "tf32" &&
+      cpuinfo_has_x86_amx_fp16();
 }
 
 static inline at::MemoryFormat mkldnn_convolution_memory_format(int64_t dims, bool is_channels_last) {
diff --git a/cmake/Modules/FindMKLDNN.cmake b/cmake/Modules/FindMKLDNN.cmake
@@ -85,12 +85,9 @@ IF(NOT MKLDNN_FOUND)
   ENDIF(NOT APPLE AND NOT WIN32 AND NOT BUILD_LITE_INTERPRETER)
 
   IF(EXISTS "${MKLDNN_ROOT}/include/oneapi/dnnl/dnnl_ukernel.hpp")
-    IF(CPU_POWER)
-      SET(DNNL_EXPERIMENTAL_UKERNEL OFF CACHE BOOL "" FORCE)
-    ELSE()
       MESSAGE("-- Will build oneDNN UKERNEL")
       SET(DNNL_EXPERIMENTAL_UKERNEL ON CACHE BOOL "" FORCE)
-    ENDIF()
+    
   ENDIF(EXISTS "${MKLDNN_ROOT}/include/oneapi/dnnl/dnnl_ukernel.hpp")
 
   FIND_PACKAGE(BLAS)

Original file line number	Diff line number	Diff line change
`@@ -273,7 +273,7 @@ jobs:`
`273`	`273`	`-w / \`
`274`	`274`	`"${DOCKER_IMAGE}"`
`275`	`275`	`)`
`276`		`- #docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"`
	`276`	`+ docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"`
`277`	`277`	`if [[ ${BUILD_ENVIRONMENT} == "aarch64" ]]; then`
`278`	`278`	`docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /pytorch/.ci/aarch64_linux/aarch64_ci_build.sh"`
`279`	`279`	`else`
Original file line number	Diff line number	Diff line change
`@@ -478,7 +478,7 @@ class Vectorized<ComplexDbl> {`
`478`	`478`	`this->store(tmp1);`
`479`	`479`	`b.store(tmp2);`
`480`	`480`
`481`		`- for (const auto i : c10::irange(Vectorized<c10::complex<double>>::size())) {`
	`481`	`+ for (const auto i : c10::irange(Vectorized<c10::complex<float>>::size())) {`
`482`	`482`	`out[i] = tmp1[i] / tmp2[i];`
`483`	`483`	`}`
`484`	`484`	`return loadu(out);`
Original file line number	Diff line number	Diff line change
`@@ -161,13 +161,8 @@ static bool mkldnn_conv_enabled_fpmath_mode_bf16(){`
`161`	`161`	`}`
`162`	`162`
`163`	`163`	`static bool mkldnn_conv_enabled_fpmath_mode_tf32(){`
`164`		`- #if defined(__powerpc64__)`
`165`		`- // TF32 is not supported or relevant on Power — skip`
`166`		`- return false;`
`167`		`- #else`
`168`		`- return at::globalContext().float32Precision("mkldnn", "conv") == "tf32" &&`
`169`		`- cpuinfo_has_x86_amx_fp16(); // or whatever the original intent was`
`170`		`- #endif`
	`164`	`+ return at::globalContext().float32Precision("mkldnn", "conv") == "tf32" &&`
	`165`	`+ cpuinfo_has_x86_amx_fp16();`
`171`	`166`	`}`
`172`	`167`
`173`	`168`	`static inline at::MemoryFormat mkldnn_convolution_memory_format(int64_t dims, bool is_channels_last) {`