1
- From 76e456a8fe536f6095a236ebb27c0463f32a9e4b Mon Sep 17 00:00:00 2001
1
+ From ed3e134049155029178bfb612c7d221345bd8f89 Mon Sep 17 00:00:00 2001
2
2
From: "tianyu.zhou" <
[email protected] >
3
3
Date: Mon, 26 May 2025 14:21:47 +0800
4
- Subject: [PATCH] Adapt for Iluvatar 0723 .
4
+ Subject: [PATCH] Adapt for Iluvatar 0811 .
5
5
6
6
---
7
7
CMakeLists.txt | 2 +-
@@ -10,7 +10,7 @@ Subject: [PATCH] Adapt for Iluvatar 0723.
10
10
.../fluid/platform/device/gpu/nccl_helper.h | 2 +-
11
11
paddle/phi/backends/dynload/cudnn.cc | 4 +++
12
12
paddle/phi/backends/dynload/cudnn.h | 9 +++++++
13
- paddle/phi/backends/dynload/cusolver.h | 2 --
13
+ paddle/phi/backends/dynload/cusolver.h | 6 --- --
14
14
.../backends/gpu/cuda/cuda_device_function.h | 4 +--
15
15
paddle/phi/backends/gpu/cuda/cuda_graph.cc | 4 +--
16
16
paddle/phi/backends/gpu/cuda/cuda_graph.h | 2 +-
@@ -22,6 +22,7 @@ Subject: [PATCH] Adapt for Iluvatar 0723.
22
22
paddle/phi/core/distributed/nccl_tools.cc | 2 +-
23
23
paddle/phi/core/enforce.h | 6 ++++-
24
24
paddle/phi/core/utils/data_type.h | 2 +-
25
+ paddle/phi/kernels/funcs/activation_functor.h | 2 ++
25
26
paddle/phi/kernels/funcs/affine_grid_utils.h | 2 ++
26
27
paddle/phi/kernels/funcs/segmented_array.h | 8 ++++++
27
28
paddle/phi/kernels/funcs/softmax_impl.h | 1 +
@@ -35,10 +36,10 @@ Subject: [PATCH] Adapt for Iluvatar 0723.
35
36
paddle/phi/kernels/squeeze_kernel.cc | 2 ++
36
37
paddle/phi/kernels/strided_slice_kernel.cc | 2 ++
37
38
paddle/phi/kernels/unsqueeze_kernel.cc | 2 ++
38
- 31 files changed, 114 insertions(+), 34 deletions(-)
39
+ 32 files changed, 116 insertions(+), 38 deletions(-)
39
40
40
41
diff --git a/CMakeLists.txt b/CMakeLists.txt
41
- index b2c4e6a650..c51f3df1f0 100755
42
+ index 9c2a59d879..9dac2ecbf9 100755
42
43
--- a/CMakeLists.txt
43
44
+++ b/CMakeLists.txt
44
45
@@ -63,7 +63,7 @@ option(WITH_IPU "Compile PaddlePaddle with Graphcore IPU" OFF)
@@ -125,25 +126,45 @@ index 7a5450c349..329fff1f73 100644
125
126
} // namespace phi
126
127
127
128
diff --git a/paddle/phi/backends/dynload/cusolver.h b/paddle/phi/backends/dynload/cusolver.h
128
- index d580751fe4 ..42a261bdfd 100644
129
+ index 86651fc8f1 ..42a261bdfd 100644
129
130
--- a/paddle/phi/backends/dynload/cusolver.h
130
131
+++ b/paddle/phi/backends/dynload/cusolver.h
131
- @@ -77,7 +77,6 @@ CUSOLVER_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP);
132
+ @@ -46,10 +46,8 @@ extern void *cusolver_dso_handle;
133
+ __macro(cusolverDnSetStream); \
134
+ __macro(cusolverDnSpotrf_bufferSize); \
135
+ __macro(cusolverDnDpotrf_bufferSize); \
136
+ - __macro(cusolverDnXpotrf_bufferSize); \
137
+ __macro(cusolverDnSpotrf); \
138
+ __macro(cusolverDnDpotrf); \
139
+ - __macro(cusolverDnXpotrf); \
140
+ __macro(cusolverDnSpotrs); \
141
+ __macro(cusolverDnDpotrs); \
142
+ __macro(cusolverDnCpotrs); \
143
+ @@ -79,7 +77,6 @@ CUSOLVER_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP);
132
144
__macro(cusolverDnDgeqrf_bufferSize); \
133
145
__macro(cusolverDnCgeqrf_bufferSize); \
134
146
__macro(cusolverDnZgeqrf_bufferSize); \
135
147
- __macro(cusolverDnXgeqrf_bufferSize); \
136
148
__macro(cusolverDnSorgqr_bufferSize); \
137
149
__macro(cusolverDnDorgqr_bufferSize); \
138
150
__macro(cusolverDnSormqr_bufferSize); \
139
- @@ -106 ,7 +105,6 @@ CUSOLVER_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP);
151
+ @@ -108 ,7 +105,6 @@ CUSOLVER_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP);
140
152
__macro(cusolverDnDgeqrf); \
141
153
__macro(cusolverDnCgeqrf); \
142
154
__macro(cusolverDnZgeqrf); \
143
155
- __macro(cusolverDnXgeqrf); \
144
156
__macro(cusolverDnSorgqr); \
145
157
__macro(cusolverDnDorgqr); \
146
158
__macro(cusolverDnSormqr); \
159
+ @@ -122,8 +118,6 @@ CUSOLVER_ROUTINE_EACH_R1(DECLARE_DYNAMIC_LOAD_CUSOLVER_WRAP)
160
+ #if CUDA_VERSION >= 9020
161
+ #define CUSOLVER_ROUTINE_EACH_R2(__macro) \
162
+ __macro(cusolverDnCreateSyevjInfo); \
163
+ - __macro(cusolverDnCreateParams); \
164
+ - __macro(cusolverDnDestroyParams); \
165
+ __macro(cusolverDnSsyevj_bufferSize); \
166
+ __macro(cusolverDnDsyevj_bufferSize); \
167
+ __macro(cusolverDnCheevj_bufferSize); \
147
168
diff --git a/paddle/phi/backends/gpu/cuda/cuda_device_function.h b/paddle/phi/backends/gpu/cuda/cuda_device_function.h
148
169
index 4ff2e528a9..956bac0c64 100644
149
170
--- a/paddle/phi/backends/gpu/cuda/cuda_device_function.h
@@ -215,7 +236,7 @@ index 02753c0333..bcf435dfae 100644
215
236
return CUDA_R_16BF;
216
237
#endif
217
238
diff --git a/paddle/phi/backends/gpu/cuda/cudnn_helper.h b/paddle/phi/backends/gpu/cuda/cudnn_helper.h
218
- index 99b8fa120e..beba409fcc 100644
239
+ index 28c3d14d37..5dc5f79178 100644
219
240
--- a/paddle/phi/backends/gpu/cuda/cudnn_helper.h
220
241
+++ b/paddle/phi/backends/gpu/cuda/cudnn_helper.h
221
242
@@ -125,7 +125,7 @@ class CudnnDataType<phi::dtype::float8_e4m3fn> {
@@ -390,6 +411,25 @@ index 1d20fa3173..fab2b90ed2 100644
390
411
} else if (type == DataType::BFLOAT16) {
391
412
return ncclBfloat16;
392
413
#endif
414
+ diff --git a/paddle/phi/kernels/funcs/activation_functor.h b/paddle/phi/kernels/funcs/activation_functor.h
415
+ index fa55cd725f..f0e3f5b49c 100644
416
+ --- a/paddle/phi/kernels/funcs/activation_functor.h
417
+ +++ b/paddle/phi/kernels/funcs/activation_functor.h
418
+ @@ -3659,12 +3659,14 @@ struct CudaReciprocalFunctor<ComplexType<T>>
419
+ return ::isnan(real) || ::isnan(imag);
420
+ };
421
+ if (either_nan(x.real, x.imag) || both_inf(x.real, x.imag)) {
422
+ + #ifndef PADDLE_WITH_COREX
423
+ // If either is Nan or both are infinite, return {nan, nan}
424
+ if constexpr (std::is_same<T, float>::value) {
425
+ return ComplexType<T>(nanf(""), nanf(""));
426
+ } else if constexpr (std::is_same<T, double>::value) {
427
+ return ComplexType<T>(nan(""), nan(""));
428
+ }
429
+ + #endif
430
+ } else if (either_inf(x.real, x.imag)) {
431
+ // If either is Inf, return {0, 0}
432
+ return ComplexType<T>(static_cast<T>(0), static_cast<T>(0));
393
433
diff --git a/paddle/phi/kernels/funcs/affine_grid_utils.h b/paddle/phi/kernels/funcs/affine_grid_utils.h
394
434
index b973d75a9b..daeb4778c6 100644
395
435
--- a/paddle/phi/kernels/funcs/affine_grid_utils.h
@@ -472,7 +512,7 @@ index 9d0d474d90..b1a12276d9 100644
472
512
GPU,
473
513
ALL_LAYOUT,
474
514
diff --git a/paddle/phi/kernels/gpu/layer_norm_kernel.cu b/paddle/phi/kernels/gpu/layer_norm_kernel.cu
475
- index 648bb6cee2..5b4cdd0505 100644
515
+ index f621d5ed5b..909d28b62f 100644
476
516
--- a/paddle/phi/kernels/gpu/layer_norm_kernel.cu
477
517
+++ b/paddle/phi/kernels/gpu/layer_norm_kernel.cu
478
518
@@ -681,7 +681,7 @@ PD_REGISTER_KERNEL(layer_norm,
0 commit comments