Skip to content

Commit 468e39d

Browse files
CNClareChenIskXCr
authored andcommitted
Merge pull request opencv#23929 from CNClareChen:4.x
* Optimize some function with lasx. Optimize some function with lasx. opencv#23929 This patch optimizes some lasx functions and reduces the runtime of opencv_test_core from 662,238ms to 633603ms on the 3A5000 platform. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
1 parent d01271e commit 468e39d

File tree

9 files changed

+2800
-414
lines changed

9 files changed

+2800
-414
lines changed

cmake/OpenCVCompilerOptimizations.cmake

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ list(APPEND CPU_ALL_OPTIMIZATIONS NEON VFPV3 FP16 NEON_DOTPROD)
5353
list(APPEND CPU_ALL_OPTIMIZATIONS MSA)
5454
list(APPEND CPU_ALL_OPTIMIZATIONS VSX VSX3)
5555
list(APPEND CPU_ALL_OPTIMIZATIONS RVV)
56+
list(APPEND CPU_ALL_OPTIMIZATIONS LSX)
5657
list(APPEND CPU_ALL_OPTIMIZATIONS LASX)
5758
list(REMOVE_DUPLICATES CPU_ALL_OPTIMIZATIONS)
5859

@@ -397,10 +398,16 @@ elseif(RISCV)
397398
set(CPU_BASELINE "DETECT" CACHE STRING "${HELP_CPU_BASELINE}")
398399

399400
elseif(LOONGARCH64)
401+
ocv_update(CPU_LSX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_lsx.cpp")
400402
ocv_update(CPU_LASX_TEST_FILE "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_lasx.cpp")
401-
ocv_update(CPU_KNOWN_OPTIMIZATIONS "LASX")
403+
ocv_update(CPU_KNOWN_OPTIMIZATIONS "LSX;LASX")
404+
ocv_update(CPU_LSX_FLAGS_ON "-mlsx")
402405
ocv_update(CPU_LASX_FLAGS_ON "-mlasx")
403-
set(CPU_BASELINE "LASX" CACHE STRING "${HELP_CPU_BASELINE}")
406+
if("${CPU_BASELINE_DISABLE}" STREQUAL "LASX")
407+
set(CPU_BASELINE "LSX" CACHE STRING "${HELP_CPU_BASELINE}")
408+
else()
409+
set(CPU_BASELINE "LASX" CACHE STRING "${HELP_CPU_BASELINE}")
410+
endif()
404411

405412
endif()
406413

cmake/checks/cpu_lsx.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#include <stdio.h>
2+
#include <lsxintrin.h>
3+
4+
int test()
5+
{
6+
const float src[] = { 0.0f, 1.0f, 2.0f, 3.0f};
7+
v4f32 val = (v4f32)__lsx_vld((const float*)(src), 0);
8+
return __lsx_vpickve2gr_w(__lsx_vftint_w_s(val), 3);
9+
}
10+
11+
int main()
12+
{
13+
printf("%d\n", test());
14+
return 0;
15+
}

modules/core/include/opencv2/core/cv_cpu_dispatch.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,11 @@
172172
# define CV_MSA 1
173173
#endif
174174

175+
#ifdef CV_CPU_COMPILE_LSX
176+
# include <lsxintrin.h>
177+
# define CV_LSX 1
178+
#endif
179+
175180
#ifdef CV_CPU_COMPILE_LASX
176181
# include <lasxintrin.h>
177182
# define CV_LASX 1
@@ -376,6 +381,10 @@ struct VZeroUpperGuard {
376381
# define CV_RVV 0
377382
#endif
378383

384+
#ifndef CV_LSX
385+
# define CV_LSX 0
386+
#endif
387+
379388
#ifndef CV_LASX
380389
# define CV_LASX 0
381390
#endif

modules/core/include/opencv2/core/cv_cpu_helper.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,27 @@
525525
#endif
526526
#define __CV_CPU_DISPATCH_CHAIN_RVV(fn, args, mode, ...) CV_CPU_CALL_RVV(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
527527

528+
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_LSX
529+
# define CV_TRY_LSX 1
530+
# define CV_CPU_FORCE_LSX 1
531+
# define CV_CPU_HAS_SUPPORT_LSX 1
532+
# define CV_CPU_CALL_LSX(fn, args) return (cpu_baseline::fn args)
533+
# define CV_CPU_CALL_LSX_(fn, args) return (opt_LSX::fn args)
534+
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_LSX
535+
# define CV_TRY_LSX 1
536+
# define CV_CPU_FORCE_LSX 0
537+
# define CV_CPU_HAS_SUPPORT_LSX (cv::checkHardwareSupport(CV_CPU_LSX))
538+
# define CV_CPU_CALL_LSX(fn, args) if (CV_CPU_HAS_SUPPORT_LSX) return (opt_LSX::fn args)
539+
# define CV_CPU_CALL_LSX_(fn, args) if (CV_CPU_HAS_SUPPORT_LSX) return (opt_LSX::fn args)
540+
#else
541+
# define CV_TRY_LSX 0
542+
# define CV_CPU_FORCE_LSX 0
543+
# define CV_CPU_HAS_SUPPORT_LSX 0
544+
# define CV_CPU_CALL_LSX(fn, args)
545+
# define CV_CPU_CALL_LSX_(fn, args)
546+
#endif
547+
#define __CV_CPU_DISPATCH_CHAIN_LSX(fn, args, mode, ...) CV_CPU_CALL_LSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
548+
528549
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_LASX
529550
# define CV_TRY_LASX 1
530551
# define CV_CPU_FORCE_LASX 1

modules/core/include/opencv2/core/cvdef.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,8 @@ namespace cv {
281281

282282
#define CV_CPU_RVV 210
283283

284-
#define CV_CPU_LASX 230
284+
#define CV_CPU_LSX 230
285+
#define CV_CPU_LASX 231
285286

286287
// CPU features groups
287288
#define CV_CPU_AVX512_SKX 256
@@ -342,7 +343,8 @@ enum CpuFeatures {
342343

343344
CPU_RVV = 210,
344345

345-
CPU_LASX = 230,
346+
CPU_LSX = 230,
347+
CPU_LASX = 231,
346348

347349
CPU_AVX512_SKX = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL
348350
CPU_AVX512_COMMON = 257, //!< Common instructions AVX-512F/CD for all CPUs that support AVX-512

modules/core/include/opencv2/core/hal/intrin.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
206206
# undef CV_RVV
207207
#endif
208208

209-
#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD || CV_RVV071) && !defined(CV_FORCE_SIMD128_CPP)
209+
#if (CV_SSE2 || CV_NEON || CV_VSX || CV_MSA || CV_WASM_SIMD || CV_RVV071 || CV_LSX) && !defined(CV_FORCE_SIMD128_CPP)
210210
#define CV__SIMD_FORWARD 128
211211
#include "opencv2/core/hal/intrin_forward.hpp"
212212
#endif
@@ -242,6 +242,10 @@ using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
242242
#include "opencv2/core/hal/intrin_rvv.hpp"
243243
#endif
244244

245+
#elif CV_LSX && !defined(CV_FORCE_SIMD128_CPP)
246+
247+
#include "opencv2/core/hal/intrin_lsx.hpp"
248+
245249
#elif CV_LASX
246250
#if !defined(CV_FORCE_SIMD128_CPP)
247251
#define CV_FORCE_SIMD128_CPP 1

0 commit comments

Comments
 (0)