Skip to content

Commit 51d33c6

Browse files
guangyeymengfei25
andauthored
[Reland] Install xpu codegen header to torch/include (#1743)
# Motivation This PR addresses a code generation issue related to XPU. Currently, there are two separate codegen paths for XPU: 1. **Stock PyTorch** – Generates code for oneDNN ops. 2. **torch-xpu-ops** – Generates code for SYCL kernel ops. The corresponding build directories are: 1. `build/aten/src/ATen` (for stock PyTorch) 2. `build/xpu/ATen` (for torch-xpu-ops) However, in the torch-xpu-ops codegen, we mistakenly omitted installing XPU op headers from `build/xpu/ATen/ops` to `build/aten/src/ATen/ops`. This PR fixes the issue and also removes some unnecessary code for better maintainability. # Solution We copy the codegen from torch-xpu-ops to stock PyTorch # Additional Context The original PR is #1405 Fix pytorch/pytorch#145902 --------- Co-authored-by: mengfei25 <[email protected]>
1 parent d9b81b8 commit 51d33c6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+389
-217
lines changed

cmake/Codegen.cmake

Lines changed: 98 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ endif()
44
set(Codegen_XPU_cmake_included true)
55

66
set(BUILD_TORCH_XPU_ATEN_GENERATED "${CMAKE_BINARY_DIR}/xpu/ATen")
7+
set(BUILD_TORCH_ATEN_GENERATED "${CMAKE_BINARY_DIR}/aten/src/ATen")
78
file(MAKE_DIRECTORY ${BUILD_TORCH_XPU_ATEN_GENERATED})
89

910
set(RegisterXPU_GENERATED ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterXPU_0.cpp)
@@ -14,81 +15,74 @@ set(XPUFallback_TEMPLATE ${TORCH_XPU_OPS_ROOT}/src/ATen/native/xpu/XPUFallback.t
1415
set(XPU_AOTI_INSTALL_DIR ${TORCH_ROOT}/torch/csrc/inductor/aoti_torch/generated/extend)
1516
set(XPU_AOTI_SHIM_HEADER ${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.h)
1617
set(XPU_AOTI_SHIM_SOURCE ${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.cpp)
18+
set(CODEGEN_XPU_YAML_DIR ${TORCH_XPU_OPS_ROOT}/yaml)
1719

20+
# Codegen prepare process
1821
if(WIN32)
19-
set(FILE_DISPLAY_CMD type)
22+
file(TO_NATIVE_PATH "${CODEGEN_XPU_YAML_DIR}/templates" DestPATH)
23+
file(TO_NATIVE_PATH "${CMAKE_SOURCE_DIR}/aten/src/ATen/templates" SrcPATH)
24+
# Copy pytorch templates
25+
execute_process(COMMAND cmd /c xcopy ${SrcPATH} ${DestPATH} /E /H /C /I /Y > nul)
2026
else()
21-
set(FILE_DISPLAY_CMD cat)
27+
# soft link to pytorch templates
28+
execute_process(COMMAND ln -sf ${CMAKE_SOURCE_DIR}/aten/src/ATen/templates ${CODEGEN_XPU_YAML_DIR})
2229
endif()
23-
file(TO_NATIVE_PATH "${RegisterXPU_GENERATED}" RegisterXPU_GENERATED_NATIVE)
24-
file(TO_NATIVE_PATH "${XPUFallback_TEMPLATE}" XPUFallback_TEMPLATE_NATIVE)
25-
set(REGISTER_FALLBACK_CMD ${FILE_DISPLAY_CMD} ${XPUFallback_TEMPLATE_NATIVE} ">>" ${RegisterXPU_GENERATED_NATIVE})
26-
27-
function(GEN_XPU file_yaml)
28-
set(generated_files "")
29-
foreach(f ${ARGN})
30-
list(APPEND generated_files "${f}")
31-
endforeach()
32-
set(CODEGEN_XPU_YAML_DIR ${TORCH_XPU_OPS_ROOT}/yaml)
3330

34-
# Codegen prepare process
35-
if(WIN32)
36-
file(TO_NATIVE_PATH "${CODEGEN_XPU_YAML_DIR}/templates" DestPATH)
37-
file(TO_NATIVE_PATH "${CMAKE_SOURCE_DIR}/aten/src/ATen/templates" SrcPATH)
38-
execute_process(COMMAND cmd /c xcopy ${SrcPATH} ${DestPATH} /E /H /C /I /Y > nul)
39-
else()
40-
execute_process(COMMAND ln -sf ${CMAKE_SOURCE_DIR}/aten/src/ATen/templates ${CODEGEN_XPU_YAML_DIR}) # soft link to pytorch templates
41-
endif()
31+
set(XPU_CODEGEN_COMMAND
32+
"${Python_EXECUTABLE}" -m torchgen.gen
33+
--source-path ${CODEGEN_XPU_YAML_DIR}
34+
--install-dir ${BUILD_TORCH_XPU_ATEN_GENERATED}
35+
--per-operator-headers
36+
--backend-whitelist XPU SparseXPU SparseCsrXPU NestedTensorXPU
37+
--xpu
38+
)
4239

43-
set(XPU_CODEGEN_COMMAND
44-
"${Python_EXECUTABLE}" -m torchgen.gen
45-
--source-path ${CODEGEN_XPU_YAML_DIR}
46-
--install-dir ${BUILD_TORCH_XPU_ATEN_GENERATED}
47-
--per-operator-headers
48-
--backend-whitelist XPU SparseXPU SparseCsrXPU NestedTensorXPU
49-
--xpu
50-
)
40+
set(XPU_INSTALL_HEADER_COMMAND
41+
"${Python_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/install_xpu_headers.py
42+
--src-header-dir ${BUILD_TORCH_XPU_ATEN_GENERATED}
43+
--dst-header-dir ${BUILD_TORCH_ATEN_GENERATED}
44+
)
5145

52-
add_custom_command(
53-
COMMENT "Generating XPU ATen Codegen..."
54-
OUTPUT ${generated_files}
55-
COMMAND
46+
# Generate ops_generated_headers.cmake for torch-xpu-ops
47+
execute_process(
48+
COMMAND
5649
${XPU_CODEGEN_COMMAND}
57-
--static-dispatch-backend
58-
# --update-aoti-c-shim: generate extend/c_shim_xpu.h
59-
--update-aoti-c-shim
60-
# --exten-aoti-c-shim: specifiy the extend/c_shim_xpu
61-
# is out-of-tree extention for in-tree c_shim_xpu
62-
--extend-aoti-c-shim
63-
# --aoti-install-dir: generates c_shim_xpu.h and c_shim_xpu.cpp at
64-
# torch/csrc/inductor/aoti_torch/generated/extend/
65-
--aoti-install-dir=${XPU_AOTI_INSTALL_DIR}
66-
COMMAND
67-
${REGISTER_FALLBACK_CMD}
68-
# Codegen post-process
69-
COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterXPU_GENERATED}
70-
COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterSparseXPU_GENERATED}
71-
COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterSparseCsrXPU_GENERATED}
72-
COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterNestedTensorXPU_GENERATED}
73-
WORKING_DIRECTORY ${TORCH_ROOT}
74-
DEPENDS
75-
${CODEGEN_XPU_YAML_DIR}/native/${file_yaml}
76-
${XPUFallback_TEMPLATE}
77-
)
50+
--generate headers
51+
--dry-run
52+
--output-dependencies ${BUILD_TORCH_XPU_ATEN_GENERATED}/generated_headers.cmake
53+
RESULT_VARIABLE RETURN_VALUE
54+
WORKING_DIRECTORY ${TORCH_ROOT}
55+
)
56+
57+
if(NOT RETURN_VALUE EQUAL 0)
58+
message(FATAL_ERROR "Failed to generate ops_generated_headers.cmake for torch-xpu-ops.")
59+
endif()
60+
61+
# Generate xpu_ops_generated_headers.cmake
62+
execute_process(
63+
COMMAND
64+
${XPU_INSTALL_HEADER_COMMAND}
65+
--dry-run
66+
RESULT_VARIABLE RETURN_VALUE
67+
WORKING_DIRECTORY ${TORCH_ROOT}
68+
)
69+
70+
if(NOT RETURN_VALUE EQUAL 0)
71+
message(FATAL_ERROR "Failed to generate xpu_ops_generated_headers.cmake.")
72+
endif()
73+
74+
include(${BUILD_TORCH_XPU_ATEN_GENERATED}/xpu_ops_generated_headers.cmake)
7875

79-
# Post codegen delete the copied templates folder only on Windows.
80-
if(WIN32)
81-
add_custom_target(DELETE_TEMPLATES ALL DEPENDS ${generated_files})
82-
add_custom_command(
83-
TARGET DELETE_TEMPLATES
84-
POST_BUILD
85-
COMMAND ${CMAKE_COMMAND} -E remove_directory "${DestPATH}"
86-
)
87-
endif()
88-
endfunction(GEN_XPU)
76+
if(WIN32)
77+
set(FILE_DISPLAY_CMD type)
78+
else()
79+
set(FILE_DISPLAY_CMD cat)
80+
endif()
81+
file(TO_NATIVE_PATH "${RegisterXPU_GENERATED}" RegisterXPU_GENERATED_NATIVE)
82+
file(TO_NATIVE_PATH "${XPUFallback_TEMPLATE}" XPUFallback_TEMPLATE_NATIVE)
83+
set(REGISTER_FALLBACK_CMD ${FILE_DISPLAY_CMD} ${XPUFallback_TEMPLATE_NATIVE} ">>" ${RegisterXPU_GENERATED_NATIVE})
8984

90-
GEN_XPU(
91-
native_functions.yaml
85+
set(OUTPUT_LIST
9286
${BUILD_TORCH_XPU_ATEN_GENERATED}/XPUFunctions.h
9387
${BUILD_TORCH_XPU_ATEN_GENERATED}/XPUFunctions_inl.h
9488
${RegisterXPU_GENERATED}
@@ -99,17 +93,49 @@ GEN_XPU(
9993
${XPU_AOTI_SHIM_SOURCE}
10094
)
10195

102-
# The c_shim_xpu.cpp needs include files in ${CMAKE_BINARY_DIR}/xpu/ATen/ops/*.h)
103-
# The include path is auto generated as "#include <ATen/ops/*.h">
104-
# To follow the design of aoti codegen, here ${CMAKE_BINARY_DIR}/xpu is added to
105-
# $TORCH_XPU_OPS_INCLUDE_DIRS, so that "#include <ATen/ops/*.h>" works.
106-
list(APPEND TORCH_XPU_OPS_INCLUDE_DIRS ${CMAKE_BINARY_DIR}/xpu)
96+
# Generate torch-xpu-ops codegen
97+
add_custom_command(
98+
COMMENT "Generating XPU ATen Codegen..."
99+
OUTPUT ${OUTPUT_LIST}
100+
COMMAND
101+
${XPU_CODEGEN_COMMAND}
102+
--static-dispatch-backend
103+
--update-aoti-c-shim
104+
--extend-aoti-c-shim
105+
--aoti-install-dir=${XPU_AOTI_INSTALL_DIR}
106+
COMMAND
107+
${REGISTER_FALLBACK_CMD}
108+
# Codegen post process
109+
COMMAND
110+
${XPU_INSTALL_HEADER_COMMAND}
111+
DEPENDS
112+
ATEN_CPU_FILES_GEN_TARGET
113+
ATEN_XPU_FILES_GEN_TARGET
114+
${XPUFallback_TEMPLATE}
115+
${TORCH_XPU_OPS_ROOT}/tools/codegen/install_xpu_headers.py
116+
${BUILD_TORCH_XPU_ATEN_GENERATED}/xpu_ops_generated_headers.cmake
117+
${CODEGEN_XPU_YAML_DIR}/native/native_functions.yaml
118+
${all_python} ${headers_templates}
119+
${TORCH_ROOT}/aten/src/ATen/native/native_functions.yaml
120+
${TORCH_ROOT}/aten/src/ATen/native/tags.yaml
121+
WORKING_DIRECTORY ${TORCH_ROOT}
122+
)
123+
124+
# Codegen post progress
125+
if(WIN32)
126+
add_custom_target(DELETE_TEMPLATES ALL DEPENDS ${OUTPUT_LIST})
127+
# Delete the copied templates folder only on Windows.
128+
add_custom_command(
129+
TARGET DELETE_TEMPLATES
130+
POST_BUILD
131+
COMMAND ${CMAKE_COMMAND} -E remove_directory "${DestPATH}"
132+
)
133+
endif()
107134

108-
list(APPEND xpu_generated_src
135+
set(ATen_XPU_GEN_SRCS
109136
${RegisterXPU_GENERATED}
110137
${RegisterSparseXPU_GENERATED}
111138
${RegisterSparseCsrXPU_GENERATED}
112139
${RegisterNestedTensorXPU_GENERATED}
113140
${XPU_AOTI_SHIM_SOURCE}
114141
)
115-
set(ATen_XPU_GEN_SRCS ${xpu_generated_src})

src/ATen/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,7 @@ set(ATen_XPU_SYCL_SRCS ${ATen_XPU_SYCL_SRCS} PARENT_SCOPE)
2121
foreach(HEADER ${xpu_h})
2222
install(FILES ${HEADER} DESTINATION "${AT_INSTALL_INCLUDE_DIR}/ATen/xpu")
2323
endforeach()
24+
25+
foreach(HEADER ${xpu_ops_generated_headers})
26+
install(FILES ${HEADER} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen/ops)
27+
endforeach()

src/ATen/native/sparse/xpu/SparseCsrTensorMath.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include <ATen/native/sparse/SparseStubs.h>
22
#include <ATen/native/sparse/xpu/sycl/SparseCsrTensorMathKernels.h>
3-
#include <xpu/ATen/ops/_convert_indices_from_coo_to_csr_native.h>
4-
#include <xpu/ATen/ops/_convert_indices_from_csr_to_coo_native.h>
3+
#include <ATen/ops/_convert_indices_from_coo_to_csr_native.h>
4+
#include <ATen/ops/_convert_indices_from_csr_to_coo_native.h>
55

66
namespace at::native {
77

src/ATen/native/xpu/Activation.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
#include <ATen/native/TensorIterator.h>
88

99
#include <ATen/ops/empty_like.h>
10-
#include <xpu/ATen/ops/empty.h>
11-
#include <xpu/ATen/ops/gelu_backward_native.h>
12-
#include <xpu/ATen/ops/gelu_native.h>
10+
#include <ATen/ops/empty.h>
11+
#include <ATen/ops/gelu_backward_native.h>
12+
#include <ATen/ops/gelu_native.h>
1313

1414
#include <ATen/native/xpu/sycl/ActivationEluKernels.h>
1515
#include <ATen/native/xpu/sycl/ActivationGeluKernel.h>

src/ATen/native/xpu/AdaptiveAveragePooling2d.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77

88
#include <ATen/ops/mean.h>
99
#include <ATen/ops/zeros_like.h>
10-
#include <xpu/ATen/ops/_adaptive_avg_pool2d_backward_native.h>
11-
#include <xpu/ATen/ops/_adaptive_avg_pool2d_native.h>
10+
#include <ATen/ops/_adaptive_avg_pool2d_backward_native.h>
11+
#include <ATen/ops/_adaptive_avg_pool2d_native.h>
1212

1313
#include <ATen/native/xpu/sycl/AdaptiveAveragePooling2dKernels.h>
1414

src/ATen/native/xpu/AdaptiveAveragePooling3d.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44

55
#include <ATen/ops/empty.h>
66
#include <ATen/ops/empty_like.h>
7-
#include <xpu/ATen/ops/adaptive_avg_pool3d_backward_native.h>
8-
#include <xpu/ATen/ops/adaptive_avg_pool3d_native.h>
7+
#include <ATen/ops/adaptive_avg_pool3d_backward_native.h>
8+
#include <ATen/ops/adaptive_avg_pool3d_native.h>
99

1010
namespace at::native {
1111

src/ATen/native/xpu/AdaptiveMaxPooling2d.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
#include <ATen/native/xpu/sycl/AdaptiveMaxPooling2dKernels.h>
55
#include <comm/RegisterUtils.h>
66

7-
#include <xpu/ATen/ops/adaptive_max_pool2d_backward_native.h>
8-
#include <xpu/ATen/ops/adaptive_max_pool2d_native.h>
7+
#include <ATen/ops/adaptive_max_pool2d_backward_native.h>
8+
#include <ATen/ops/adaptive_max_pool2d_native.h>
99

1010
namespace at {
1111
namespace native {

src/ATen/native/xpu/AdaptiveMaxPooling3d.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
#include <ATen/native/xpu/sycl/AdaptiveMaxPooling3dKernels.h>
55

66
#include <ATen/ops/empty.h>
7-
#include <xpu/ATen/ops/adaptive_max_pool3d_backward_native.h>
8-
#include <xpu/ATen/ops/adaptive_max_pool3d_native.h>
7+
#include <ATen/ops/adaptive_max_pool3d_backward_native.h>
8+
#include <ATen/ops/adaptive_max_pool3d_native.h>
99

1010
namespace at {
1111
namespace native {

src/ATen/native/xpu/AveragePool2d.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
#include <ATen/native/xpu/sycl/AveragePool2dKernels.h>
66
#include <comm/RegisterUtils.h>
77

8-
#include <xpu/ATen/ops/avg_pool2d_backward_native.h>
9-
#include <xpu/ATen/ops/avg_pool2d_native.h>
8+
#include <ATen/ops/avg_pool2d_backward_native.h>
9+
#include <ATen/ops/avg_pool2d_native.h>
1010

1111
namespace at {
1212
namespace native {

src/ATen/native/xpu/AveragePool3d.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#include <ATen/core/Tensor.h>
22
#include <ATen/native/xpu/sycl/AveragePool3dKernels.h>
33

4-
#include <xpu/ATen/ops/avg_pool3d_backward_native.h>
5-
#include <xpu/ATen/ops/avg_pool3d_native.h>
4+
#include <ATen/ops/avg_pool3d_backward_native.h>
5+
#include <ATen/ops/avg_pool3d_native.h>
66

77
namespace at {
88
namespace native {

0 commit comments

Comments
 (0)