Skip to content

Commit 5914f8f

Browse files
authored
Fix warning when building kernel libraries (pytorch#15405)
Getting warnings like the following: ``` /home/larryliu/executorch/kernels/portable/cpu/op_maximum.cpp: In lambda function: /home/larryliu/executorch/kernels/portable/cpu/op_maximum.cpp:52:9: note: the ABI for passing parameters with 32-byte alignment has changed in GCC 4.6 52 | [](const auto val_a, const auto val_b) { | ^ /home/larryliu/executorch/../executorch/runtime/core/exec_aten/util/scalar_type_util.h:919:7: note: in definition of macro 'ET_INTERNAL_SWITCH' 919 | __VA_ARGS__ \ | ^~~~~~~~~~~ /home/larryliu/executorch/../executorch/runtime/core/exec_aten/util/scalar_type_util.h:931:3: note: in expansion of macro 'ET_INTERNAL_SWITCH_CASE' 931 | ET_INTERNAL_SWITCH_CASE( \ | ^~~~~~~~~~~~~~~~~~~~~~~ /home/larryliu/executorch/../executorch/runtime/core/exec_aten/util/scalar_type_util.h:957:3: note: in expansion of macro 'ET_INTERNAL_SWITCH_CASE_INT_TYPES' 957 | ET_INTERNAL_SWITCH_CASE_INT_TYPES(CTYPE_ALIAS, __VA_ARGS__) \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /home/larryliu/executorch/../executorch/runtime/core/exec_aten/util/scalar_type_util.h:1008:3: note: in expansion of macro 'ET_INTERNAL_SWITCH_CASE_REAL_TYPES' 1008 | ET_INTERNAL_SWITCH_CASE_REAL_TYPES(CTYPE_ALIAS, __VA_ARGS__) \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /home/larryliu/executorch/../executorch/runtime/core/exec_aten/util/scalar_type_util.h:1136:7: note: in expansion of macro 'ET_INTERNAL_SWITCH_CASE_REAL_TYPES_AND' 1136 | ET_INTERNAL_SWITCH_CASE_REAL_TYPES_AND( \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /home/larryliu/executorch/../executorch/runtime/core/exec_aten/util/scalar_type_util.h:1172:3: note: in expansion of macro 'ET_SWITCH_REAL_TYPES_AND' 1172 | ET_SWITCH_REAL_TYPES_AND(Bool, TYPE, CONTEXT, NAME, CTYPE_ALIAS, __VA_ARGS__) | ^~~~~~~~~~~~~~~~~~~~~~~~ /home/larryliu/executorch/kernels/portable/cpu/op_maximum.cpp:47:3: note: in expansion of macro 'ET_SWITCH_REALB_TYPES' 47 | ET_SWITCH_REALB_TYPES(compute_type, ctx, op_name, CTYPE_COMPUTE, [&]() { | ^~~~~~~~~~~~~~~~~~~~~ ``` Fixing them in this PR ## Overview ---------- This branch addresses GCC compiler warnings related to 32-byte alignment (ABI) when passing SIMD vector types to lambda functions in both portable and optimized kernel implementations. **Key Changes:** * **CMakeLists.txt**: Added `-Wno-psabi` compiler flag to suppress ABI warnings for GNU compiler: `$<$<CXX_COMPILER_ID:GNU>:-Wno-psabi>` * **Removed explicit template parameter `<CTYPE>` and changed lambda parameters from reference `&` to value (pass by value)** from `at::vec::map()` and `at::vec::map3()` calls ``` // Before: at::vec::map<CTYPE>([alpha_val, b_val](Vec& x) { return x + Vec(alpha_val * b_val); }, ...); // After: at::vec::map([alpha_val, b_val](Vec x) { return x + Vec(alpha_val * b_val); }, ...); ``` * **Changed lambda parameters from value to reference `&`** for lambdas being passed to `apply_bitensor_elementwise_fn()` and similar functions. ``` // Before: [val_alpha](const auto val_a, const auto val_b) { return val_a + val_alpha * val_b; } // After: [val_alpha](const auto& val_a, const auto& val_b) { return val_a + val_alpha * val_b; } ``` ## Problem Solved -------------- **Before:** GCC compiler produced numerous warnings like: `note: the ABI for passing parameters with 32-byte alignment has changed in GCC 4.6` These warnings appeared when SIMD vector types (e.g., `__m256` from AVX) were passed to lambda functions, cluttering build output and making it difficult to spot real issues. **After:** Clean builds with no ABI warnings, while maintaining identical functionality and performance characteristics.
1 parent 216f69d commit 5914f8f

File tree

18 files changed

+42
-20
lines changed

18 files changed

+42
-20
lines changed

kernels/optimized/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ endif()
2424
set(_common_compile_options
2525
$<$<CXX_COMPILER_ID:MSVC>:/wd4996>
2626
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wno-deprecated-declarations>
27+
$<$<CXX_COMPILER_ID:GNU>:-Wno-psabi>
2728
)
2829

2930
# Note for apple platform we can rely on Accelerate framework Will come back to

kernels/optimized/cpu/op_elu.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ void elu(
4141
0,
4242
out.numel(),
4343
::executorch::extension::internal::GRAIN_SIZE,
44-
[&](const auto begin, const auto end) {
44+
[&](const auto& begin, const auto& end) {
4545
using Vec = at::vec::Vectorized<CTYPE>;
4646
const auto vectorized_begin =
4747
begin + (Vec::size() - begin % Vec::size()) % Vec::size();

kernels/optimized/cpu/op_log_softmax.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ void log_softmax_kernel(const Tensor& input, int64_t dim, Tensor& out) {
5555
0,
5656
outer_size,
5757
::executorch::extension::internal::GRAIN_SIZE,
58-
[&](const auto begin, const auto end) {
58+
[&](const auto& begin, const auto& end) {
5959
at::native::serial_vec_log_softmax_lastdim_range(
6060
input_data_base,
6161
output_data_base,

kernels/portable/cpu/op_add.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ Tensor& add_out(
8080
CTYPE_COMPUTE,
8181
op_name,
8282
utils::SupportedTensorDtypes::REALHBBF16>(
83-
[val_alpha](const auto val_a, const auto val_b) {
83+
[val_alpha](const auto& val_a, const auto& val_b) {
8484
return val_a + val_alpha * val_b;
8585
},
8686
ctx,
@@ -136,7 +136,7 @@ Tensor& add_scalar_out(
136136
CTYPE_COMPUTE,
137137
op_name,
138138
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
139-
[val_alpha_times_b](const auto val_a) {
139+
[val_alpha_times_b](const auto& val_a) {
140140
// Cast here supports vectorization; either it does nothing
141141
// or it casts from CTYPE_COMPUTE to
142142
// Vectorized<CTYPE_COMPUTE>.

kernels/portable/cpu/op_addmm.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ Tensor& addmm_out(
9292
CTYPE,
9393
op_name,
9494
utils::SupportedTensorDtypes::REALHBF16>(
95-
[alpha_val, beta_val](const auto val_a, const auto val_b) {
95+
[alpha_val, beta_val](const auto& val_a, const auto& val_b) {
9696
return val_a * alpha_val + val_b * beta_val;
9797
},
9898
ctx,

kernels/portable/cpu/op_atan2.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ Tensor& atan2_out(
5959
CTYPE_COMPUTE,
6060
op_name,
6161
utils::SupportedTensorDtypes::FLOATHBF16>(
62-
[](const auto val_a, const auto val_b) {
62+
[](const auto& val_a, const auto& val_b) {
6363
return executorch::math::atan2(val_a, val_b);
6464
},
6565
ctx,

kernels/portable/cpu/op_clamp.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ Tensor& clamp_out(
139139
CTYPE_COMPUTE,
140140
op_name,
141141
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
142-
[has_min, min_opt, has_max, max_opt](const auto val_in) {
142+
[has_min, min_opt, has_max, max_opt](const auto& val_in) {
143143
auto val_out = val_in;
144144
if (has_min) {
145145
val_out = utils::max_override(

kernels/portable/cpu/op_div.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ Tensor& div_out(
6262
CTYPE_COMPUTE,
6363
op_name,
6464
utils::SupportedTensorDtypes::FLOATHBF16>(
65-
[](const auto val_a, const auto val_b) { return val_a / val_b; },
65+
[](const auto& val_a, const auto& val_b) { return val_a / val_b; },
6666
ctx,
6767
a,
6868
utils::SupportedTensorDtypes::REALHBBF16,
@@ -195,7 +195,7 @@ Tensor& div_scalar_out(
195195
CTYPE_COMPUTE,
196196
op_name,
197197
utils::SupportedTensorDtypes::SAME_AS_COMMON>(
198-
[val_b](const auto val_a) { return val_a / val_b; },
198+
[val_b](const auto& val_a) { return val_a / val_b; },
199199
ctx,
200200
a,
201201
utils::SupportedTensorDtypes::REALHBBF16,

kernels/portable/cpu/op_fmod.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ Tensor& fmod_Scalar_out(
138138
CTYPE_COMPUTE,
139139
op_name,
140140
utils::SupportedTensorDtypes::REALHBF16>(
141-
[val_b](const auto val_a) {
141+
[val_b](const auto& val_a) {
142142
return executorch::math::fmod(val_a, (decltype(val_a))val_b);
143143
},
144144
ctx,

kernels/portable/cpu/op_isinf.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,18 @@ namespace torch {
1414
namespace executor {
1515
namespace native {
1616

17-
DEFINE_UNARY_UFUNC_REALHBBF16_TO_BOOL(isinf_out, std::isinf)
17+
bool isinf_float(float x) {
18+
return std::isinf(x);
19+
}
20+
21+
bool isinf_double(double x) {
22+
return std::isinf(x);
23+
}
24+
25+
Tensor& isinf_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
26+
return internal::unary_ufunc_realhbbf16_to_bool(
27+
isinf_float, isinf_double, ctx, in, out);
28+
}
1829

1930
} // namespace native
2031
} // namespace executor

0 commit comments

Comments
 (0)