From 626cd48200010b5bbd68b94e76fa9bdecb2a096d Mon Sep 17 00:00:00 2001 From: Hans Johnson Date: Fri, 1 May 2026 13:18:35 -0500 Subject: [PATCH 1/6] ENH: Bump Eigen3 import tag to for/itk-20260501-879885e1 Phase 2 prep for Eigen 5.x update. Points UpdateFromUpstream.sh at the refreshed InsightSoftwareConsortium/eigen tag derived from gitlab libeigen/eigen master tip 879885e (2026-05-01), which adds the "Modernize internal utilities for C++14" patch (libeigen/eigen!2490) on top of the 5.0.1 release tip plus three small follow-on fixes (RealQZ pushDownZero counting, small-determinant LU fastpath, and IncompleteLUT static row matching). Adds new top-level support headers introduced since the previous import: AccelerateSupport, KLUSupport, ThreadPool, and Version. The Version header is required because Eigen 5 moved EIGEN_{MAJOR,MINOR,PATCH}_VERSION out of src/Core/util/Macros.h. --- Modules/ThirdParty/Eigen3/UpdateFromUpstream.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Modules/ThirdParty/Eigen3/UpdateFromUpstream.sh b/Modules/ThirdParty/Eigen3/UpdateFromUpstream.sh index 9a4d2cfaa1a..4ec0799d392 100755 --- a/Modules/ThirdParty/Eigen3/UpdateFromUpstream.sh +++ b/Modules/ThirdParty/Eigen3/UpdateFromUpstream.sh @@ -6,10 +6,16 @@ shopt -s dotglob readonly name="Eigen3" readonly ownership="Eigen Upstream " +# ITK has applied a small number of post-import patches under itkeigen/ +# (e.g., SelfadjointMatrixVector.h pzero init), so the tree no longer +# bytewise matches the previous import commit's tree. Use log-based +# matching instead of exact-tree matching for the previous-import probe. +exact_tree_match=false readonly subtree="Modules/ThirdParty/Eigen3/src/itkeigen" readonly repo="https://github.com/InsightSoftwareConsortium/eigen" -readonly tag="for/itk-20260305-4c99fca" +readonly tag="for/itk-20260501-879885e1" readonly paths=" +Eigen/AccelerateSupport Eigen/Cholesky Eigen/CholmodSupport Eigen/Core @@ -20,6 +26,7 @@ Eigen/Geometry Eigen/Householder Eigen/IterativeLinearSolvers Eigen/Jacobi +Eigen/KLUSupport Eigen/LU Eigen/MetisSupport Eigen/OrderingMethods @@ -38,7 +45,9 @@ Eigen/StdDeque Eigen/StdList Eigen/StdVector Eigen/SuperLUSupport +Eigen/ThreadPool Eigen/UmfPackSupport +Eigen/Version Eigen/src COPYING.BSD From bc87d16685adbadd975a619beef26a030a53b922 Mon Sep 17 00:00:00 2001 From: Eigen Upstream Date: Fri, 1 May 2026 13:12:39 -0500 Subject: [PATCH 2/6] Eigen3 2026-05-01 (505023a2) Code extracted from: https://github.com/InsightSoftwareConsortium/eigen at commit 505023a2a0fc8eeff5f92d07683ab5ff0c03ba0f (for/itk-20260501-879885e1). --- .gitattributes | 6 + CMakeLists.txt | 234 +- COPYING.MPL2 | 2 +- COPYING.README | 18 +- Eigen/AccelerateSupport | 52 + Eigen/Cholesky | 2 - Eigen/CholmodSupport | 2 +- Eigen/Core | 145 +- Eigen/Dense | 12 + Eigen/Eigen | 12 + Eigen/Eigenvalues | 10 +- Eigen/Geometry | 4 +- Eigen/Householder | 2 +- Eigen/KLUSupport | 43 + Eigen/LU | 7 +- Eigen/PaStiXSupport | 2 +- Eigen/QR | 5 +- Eigen/QtAlignedMalloc | 6 +- Eigen/SPQRSupport | 2 +- Eigen/SVD | 7 +- Eigen/SparseCore | 4 - Eigen/SparseQR | 2 +- Eigen/SuperLUSupport | 1 + Eigen/ThreadPool | 80 + Eigen/UmfPackSupport | 2 +- Eigen/Version | 21 + .../src/AccelerateSupport/AccelerateSupport.h | 2 +- Eigen/src/Cholesky/LDLT.h | 45 +- Eigen/src/Cholesky/LLT.h | 31 +- Eigen/src/CholmodSupport/CholmodSupport.h | 4 +- Eigen/src/Core/ArithmeticSequence.h | 10 +- Eigen/src/Core/Array.h | 23 +- Eigen/src/Core/ArrayBase.h | 11 +- Eigen/src/Core/ArrayWrapper.h | 13 +- Eigen/src/Core/Assign.h | 18 +- Eigen/src/Core/AssignEvaluator.h | 125 +- Eigen/src/Core/Assign_AOCL.h | 301 + Eigen/src/Core/Assign_MKL.h | 8 +- Eigen/src/Core/Block.h | 45 +- Eigen/src/Core/CommaInitializer.h | 3 +- Eigen/src/Core/ConcatOp.h | 343 + Eigen/src/Core/ConditionEstimator.h | 63 +- Eigen/src/Core/CoreEvaluators.h | 573 +- Eigen/src/Core/CoreIterators.h | 2 +- Eigen/src/Core/CwiseBinaryOp.h | 20 +- Eigen/src/Core/CwiseNullaryOp.h | 28 +- Eigen/src/Core/CwiseTernaryOp.h | 12 +- Eigen/src/Core/CwiseUnaryOp.h | 16 +- Eigen/src/Core/CwiseUnaryView.h | 15 +- Eigen/src/Core/DenseBase.h | 131 +- Eigen/src/Core/DenseCoeffsBase.h | 61 +- Eigen/src/Core/DenseStorage.h | 290 +- Eigen/src/Core/DeviceWrapper.h | 4 +- Eigen/src/Core/Diagonal.h | 40 +- Eigen/src/Core/DiagonalMatrix.h | 85 +- Eigen/src/Core/Dot.h | 15 +- Eigen/src/Core/EigenBase.h | 18 +- Eigen/src/Core/Fill.h | 32 +- Eigen/src/Core/FindCoeff.h | 16 +- Eigen/src/Core/ForceAlignedAccess.h | 21 +- Eigen/src/Core/Fuzzy.h | 12 +- Eigen/src/Core/GeneralProduct.h | 29 +- Eigen/src/Core/GenericPacketMath.h | 181 +- Eigen/src/Core/GlobalFunctions.h | 8 +- Eigen/src/Core/IO.h | 2 +- Eigen/src/Core/IndexedView.h | 19 +- Eigen/src/Core/InnerProduct.h | 57 +- Eigen/src/Core/Inverse.h | 4 +- Eigen/src/Core/Map.h | 9 +- Eigen/src/Core/MapBase.h | 24 +- Eigen/src/Core/MathFunctions.h | 224 +- Eigen/src/Core/MathFunctionsImpl.h | 27 +- Eigen/src/Core/Matrix.h | 75 +- Eigen/src/Core/MatrixBase.h | 101 +- Eigen/src/Core/NestByValue.h | 16 +- Eigen/src/Core/NoAlias.h | 4 +- Eigen/src/Core/NumTraits.h | 18 +- Eigen/src/Core/PartialReduxEvaluator.h | 51 +- Eigen/src/Core/PermutationMatrix.h | 21 +- Eigen/src/Core/PlainObjectBase.h | 79 +- Eigen/src/Core/Product.h | 10 +- Eigen/src/Core/ProductEvaluators.h | 448 +- Eigen/src/Core/Random.h | 4 +- Eigen/src/Core/RandomImpl.h | 26 +- Eigen/src/Core/RealView.h | 292 + Eigen/src/Core/Redux.h | 20 +- Eigen/src/Core/Ref.h | 25 +- Eigen/src/Core/Replicate.h | 10 +- Eigen/src/Core/Reshaped.h | 87 +- Eigen/src/Core/ReturnByValue.h | 4 +- Eigen/src/Core/Reverse.h | 4 +- Eigen/src/Core/Select.h | 98 +- Eigen/src/Core/SelfAdjointView.h | 52 +- Eigen/src/Core/SelfCwiseBinaryOp.h | 28 +- Eigen/src/Core/SkewSymmetricMatrix3.h | 59 +- Eigen/src/Core/Solve.h | 4 +- Eigen/src/Core/SolveTriangular.h | 26 +- Eigen/src/Core/SolverBase.h | 6 +- Eigen/src/Core/StableNorm.h | 3 +- Eigen/src/Core/StlIterators.h | 15 +- Eigen/src/Core/Stride.h | 19 +- Eigen/src/Core/StructuredBindings.h | 155 + Eigen/src/Core/Swap.h | 11 +- Eigen/src/Core/Transpose.h | 29 +- Eigen/src/Core/TriangularMatrix.h | 85 +- Eigen/src/Core/VectorBlock.h | 4 +- Eigen/src/Core/VectorwiseOp.h | 36 +- Eigen/src/Core/Visitor.h | 14 +- Eigen/src/Core/arch/AVX/Complex.h | 52 +- Eigen/src/Core/arch/AVX/MathFunctions.h | 54 +- Eigen/src/Core/arch/AVX/PacketMath.h | 86 +- Eigen/src/Core/arch/AVX/TypeCasting.h | 44 +- Eigen/src/Core/arch/AVX512/Complex.h | 38 +- Eigen/src/Core/arch/AVX512/GemmKernel.h | 280 +- Eigen/src/Core/arch/AVX512/MathFunctions.h | 26 +- Eigen/src/Core/arch/AVX512/PacketMath.h | 148 +- Eigen/src/Core/arch/AVX512/PacketMathFP16.h | 16 +- Eigen/src/Core/arch/AVX512/Reductions.h | 2 +- Eigen/src/Core/arch/AVX512/TrsmKernel.h | 106 +- Eigen/src/Core/arch/AVX512/TrsmUnrolls.inc | 12 +- Eigen/src/Core/arch/AVX512/TypeCasting.h | 12 +- Eigen/src/Core/arch/AltiVec/Complex.h | 39 +- .../src/Core/arch/AltiVec/MatrixProductMMA.h | 5 +- .../Core/arch/AltiVec/MatrixVectorProduct.inc | 14 +- Eigen/src/Core/arch/AltiVec/PacketMath.h | 101 +- Eigen/src/Core/arch/AltiVec/TypeCasting.h | 2 +- Eigen/src/Core/arch/Default/BFloat16.h | 65 +- Eigen/src/Core/arch/Default/ConjHelper.h | 18 + .../arch/Default/GenericPacketMathComplex.h | 283 + .../Default/GenericPacketMathDoubleWord.h | 208 + .../Default/GenericPacketMathFrexpLdexp.h | 162 + .../arch/Default/GenericPacketMathFunctions.h | 2575 +---- .../Default/GenericPacketMathFunctionsFwd.h | 131 +- .../Default/GenericPacketMathPolynomials.h | 151 + .../Core/arch/Default/GenericPacketMathPow.h | 724 ++ .../Core/arch/Default/GenericPacketMathTrig.h | 1067 ++ Eigen/src/Core/arch/Default/Half.h | 180 +- Eigen/src/Core/arch/GPU/Complex.h | 58 +- Eigen/src/Core/arch/GPU/PacketMath.h | 455 +- Eigen/src/Core/arch/GPU/Tuple.h | 40 +- Eigen/src/Core/arch/GPU/TypeCasting.h | 3 +- Eigen/src/Core/arch/HVX/PacketMath.h | 47 +- Eigen/src/Core/arch/LSX/Complex.h | 28 +- Eigen/src/Core/arch/LSX/PacketMath.h | 102 +- Eigen/src/Core/arch/LSX/TypeCasting.h | 318 +- Eigen/src/Core/arch/MSA/Complex.h | 15 +- Eigen/src/Core/arch/MSA/PacketMath.h | 92 +- Eigen/src/Core/arch/NEON/Complex.h | 122 +- Eigen/src/Core/arch/NEON/MathFunctions.h | 7 +- Eigen/src/Core/arch/NEON/PacketMath.h | 472 +- Eigen/src/Core/arch/NEON/TypeCasting.h | 2 +- .../Core/arch/RVV10/GeneralBlockPanelKernel.h | 236 + Eigen/src/Core/arch/RVV10/MathFunctions.h | 30 + Eigen/src/Core/arch/RVV10/PacketMath.h | 2442 ++++ Eigen/src/Core/arch/RVV10/PacketMath2.h | 1527 +++ Eigen/src/Core/arch/RVV10/PacketMath4.h | 1462 +++ Eigen/src/Core/arch/RVV10/PacketMathBF16.h | 838 ++ Eigen/src/Core/arch/RVV10/PacketMathFP16.h | 998 ++ Eigen/src/Core/arch/RVV10/TypeCasting.h | 284 + Eigen/src/Core/arch/SSE/Complex.h | 41 +- Eigen/src/Core/arch/SSE/PacketMath.h | 326 +- Eigen/src/Core/arch/SSE/TypeCasting.h | 50 - Eigen/src/Core/arch/SVE/MathFunctions.h | 26 +- Eigen/src/Core/arch/SVE/PacketMath.h | 30 +- Eigen/src/Core/arch/SYCL/InteropHeaders.h | 41 +- Eigen/src/Core/arch/SYCL/MathFunctions.h | 314 +- Eigen/src/Core/arch/SYCL/PacketMath.h | 25 - Eigen/src/Core/arch/ZVector/Complex.h | 38 +- Eigen/src/Core/arch/ZVector/MathFunctions.h | 16 +- Eigen/src/Core/arch/ZVector/PacketMath.h | 244 +- Eigen/src/Core/arch/clang/Complex.h | 702 ++ Eigen/src/Core/arch/clang/MathFunctions.h | 47 + Eigen/src/Core/arch/clang/PacketMath.h | 1171 ++ Eigen/src/Core/arch/clang/Reductions.h | 355 + Eigen/src/Core/arch/clang/TypeCasting.h | 186 + Eigen/src/Core/functors/AssignmentFunctors.h | 8 +- Eigen/src/Core/functors/BinaryFunctors.h | 130 +- Eigen/src/Core/functors/NullaryFunctors.h | 34 +- Eigen/src/Core/functors/TernaryFunctors.h | 7 +- Eigen/src/Core/functors/UnaryFunctors.h | 215 +- .../Core/products/GeneralBlockPanelKernel.h | 1729 +-- Eigen/src/Core/products/GeneralMatrixMatrix.h | 7 +- .../products/GeneralMatrixMatrixTriangular.h | 4 +- .../GeneralMatrixMatrixTriangular_BLAS.h | 6 +- .../Core/products/GeneralMatrixMatrix_BLAS.h | 19 +- Eigen/src/Core/products/GeneralMatrixVector.h | 427 +- .../Core/products/GeneralMatrixVector_BLAS.h | 2 + Eigen/src/Core/products/Parallelizer.h | 6 +- .../Core/products/SelfadjointMatrixMatrix.h | 6 +- .../products/SelfadjointMatrixMatrix_BLAS.h | 4 + .../Core/products/SelfadjointMatrixVector.h | 326 +- .../products/SelfadjointMatrixVector_BLAS.h | 2 + Eigen/src/Core/products/SelfadjointProduct.h | 108 +- .../Core/products/SelfadjointRank2Update.h | 225 +- .../Core/products/TriangularMatrixMatrix.h | 24 - .../products/TriangularMatrixMatrix_BLAS.h | 4 + .../Core/products/TriangularMatrixVector.h | 154 +- .../products/TriangularMatrixVector_BLAS.h | 3 + .../Core/products/TriangularSolverMatrix.h | 82 +- .../products/TriangularSolverMatrix_BLAS.h | 2 + Eigen/src/Core/util/AOCL_Support.h | 174 + Eigen/src/Core/util/BlasUtil.h | 66 +- Eigen/src/Core/util/ConfigureVectorization.h | 94 +- Eigen/src/Core/util/Constants.h | 50 +- Eigen/src/Core/util/DisableStupidWarnings.h | 3 +- Eigen/src/Core/util/EmulateArray.h | 25 +- Eigen/src/Core/util/ForwardDeclarations.h | 11 +- Eigen/src/Core/util/GpuHipCudaDefines.inc | 4 +- Eigen/src/Core/util/IndexedViewHelper.h | 49 +- Eigen/src/Core/util/IntegralConstant.h | 10 +- Eigen/src/Core/util/Macros.h | 195 +- Eigen/src/Core/util/MaxSizeVector.h | 81 +- Eigen/src/Core/util/Memory.h | 148 +- Eigen/src/Core/util/Meta.h | 259 +- Eigen/src/Core/util/MoreMeta.h | 115 +- Eigen/src/Core/util/Serializer.h | 9 +- Eigen/src/Core/util/SymbolicIndex.h | 12 +- Eigen/src/Core/util/XprHelper.h | 296 +- Eigen/src/Eigenvalues/ComplexEigenSolver.h | 7 +- Eigen/src/Eigenvalues/ComplexQZ.h | 651 ++ Eigen/src/Eigenvalues/ComplexSchur.h | 7 +- Eigen/src/Eigenvalues/EigenSolver.h | 2 +- .../src/Eigenvalues/HessenbergDecomposition.h | 2 +- Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h | 3 +- Eigen/src/Eigenvalues/RealQZ.h | 4 +- Eigen/src/Eigenvalues/RealSchur.h | 10 +- .../src/Eigenvalues/SelfAdjointEigenSolver.h | 152 +- Eigen/src/Eigenvalues/Tridiagonalization.h | 164 +- Eigen/src/Geometry/AngleAxis.h | 76 +- Eigen/src/Geometry/EulerAngles.h | 46 +- Eigen/src/Geometry/Homogeneous.h | 8 +- Eigen/src/Geometry/Hyperplane.h | 2 +- Eigen/src/Geometry/OrthoMethods.h | 6 +- Eigen/src/Geometry/Quaternion.h | 28 +- Eigen/src/Geometry/Rotation2D.h | 4 +- Eigen/src/Geometry/Scaling.h | 2 +- Eigen/src/Geometry/Transform.h | 13 +- Eigen/src/Geometry/Umeyama.h | 7 + Eigen/src/Householder/BlockHouseholder.h | 74 +- Eigen/src/Householder/Householder.h | 37 +- Eigen/src/Householder/HouseholderSequence.h | 50 +- .../BasicPreconditioners.h | 8 +- Eigen/src/IterativeLinearSolvers/BiCGSTAB.h | 5 +- .../ConjugateGradient.h | 1 + .../IncompleteCholesky.h | 2 +- .../IterativeLinearSolvers/IncompleteLUT.h | 265 +- .../LeastSquareConjugateGradient.h | 1 + Eigen/src/Jacobi/Jacobi.h | 65 +- Eigen/src/KLUSupport/KLUSupport.h | 86 +- Eigen/src/LU/Determinant.h | 5 +- Eigen/src/LU/FullPivLU.h | 180 +- Eigen/src/LU/InverseImpl.h | 7 +- Eigen/src/LU/PartialPivLU.h | 56 +- Eigen/src/MetisSupport/MetisSupport.h | 2 +- Eigen/src/OrderingMethods/Eigen_Colamd.h | 2 +- Eigen/src/OrderingMethods/Ordering.h | 10 +- Eigen/src/PardisoSupport/PardisoSupport.h | 7 +- Eigen/src/QR/ColPivHouseholderQR.h | 163 +- Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h | 4 +- .../src/QR/CompleteOrthogonalDecomposition.h | 8 +- Eigen/src/QR/FullPivHouseholderQR.h | 178 +- Eigen/src/QR/HouseholderQR.h | 24 +- Eigen/src/SPQRSupport/SuiteSparseQRSupport.h | 139 +- Eigen/src/SVD/BDCSVD.h | 1243 +-- Eigen/src/SVD/BDCSVDImpl.h | 821 ++ Eigen/src/SVD/JacobiSVD.h | 353 +- Eigen/src/SVD/SVDBase.h | 2 +- Eigen/src/SVD/UpperBidiagonalization.h | 33 +- Eigen/src/SparseCholesky/SimplicialCholesky.h | 25 +- .../SparseCholesky/SimplicialCholesky_impl.h | 7 +- Eigen/src/SparseCore/AmbiVector.h | 6 +- Eigen/src/SparseCore/CompressedStorage.h | 7 +- .../ConservativeSparseSparseProduct.h | 15 +- Eigen/src/SparseCore/SparseAssign.h | 2 +- Eigen/src/SparseCore/SparseBlock.h | 30 +- Eigen/src/SparseCore/SparseCompressedBase.h | 29 +- Eigen/src/SparseCore/SparseCwiseBinaryOp.h | 4 +- Eigen/src/SparseCore/SparseDenseProduct.h | 247 +- Eigen/src/SparseCore/SparseDiagonalProduct.h | 12 +- Eigen/src/SparseCore/SparseDot.h | 6 +- Eigen/src/SparseCore/SparseMap.h | 15 +- Eigen/src/SparseCore/SparseMatrix.h | 37 +- Eigen/src/SparseCore/SparseMatrixBase.h | 67 +- Eigen/src/SparseCore/SparsePermutation.h | 2 +- Eigen/src/SparseCore/SparseRef.h | 21 +- Eigen/src/SparseCore/SparseSelfAdjointView.h | 19 +- Eigen/src/SparseCore/SparseSolverBase.h | 13 +- .../SparseSparseProductWithPruning.h | 13 +- Eigen/src/SparseCore/SparseUtil.h | 2 +- Eigen/src/SparseCore/SparseVector.h | 42 +- Eigen/src/SparseCore/SparseView.h | 2 +- Eigen/src/SparseCore/TriangularSolver.h | 43 +- Eigen/src/SparseLU/SparseLU.h | 70 +- Eigen/src/SparseLU/SparseLU_Memory.h | 2 +- .../src/SparseLU/SparseLU_SupernodalMatrix.h | 6 +- Eigen/src/SparseLU/SparseLU_column_bmod.h | 2 +- .../src/SparseLU/SparseLU_heap_relax_snode.h | 2 +- Eigen/src/SparseLU/SparseLU_panel_bmod.h | 6 +- Eigen/src/SparseLU/SparseLU_panel_dfs.h | 5 +- Eigen/src/SparseQR/SparseQR.h | 41 +- Eigen/src/SuperLUSupport/SuperLUSupport.h | 39 +- Eigen/src/ThreadPool/Barrier.h | 2 +- Eigen/src/ThreadPool/InternalHeaderCheck.h | 3 +- Eigen/src/ThreadPool/NonBlockingThreadPool.h | 2 +- Eigen/src/ThreadPool/RunQueue.h | 10 +- Eigen/src/ThreadPool/ThreadLocal.h | 5 +- Eigen/src/UmfPackSupport/UmfPackSupport.h | 6 +- Eigen/src/misc/RankRevealingBase.h | 178 + Eigen/src/misc/RealSvd2x2.h | 53 - Eigen/src/misc/lapacke.h | 9912 +---------------- Eigen/src/plugins/ArrayCwiseBinaryOps.inc | 48 +- Eigen/src/plugins/ArrayCwiseUnaryOps.inc | 125 +- Eigen/src/plugins/BlockMethods.inc | 199 +- Eigen/src/plugins/CommonCwiseBinaryOps.inc | 20 +- Eigen/src/plugins/CommonCwiseUnaryOps.inc | 22 +- Eigen/src/plugins/MatrixCwiseBinaryOps.inc | 68 +- Eigen/src/plugins/MatrixCwiseUnaryOps.inc | 31 +- Eigen/src/plugins/ReshapedMethods.inc | 18 +- README.md | 2 +- cmake/Eigen3Config.cmake.in | 6 +- 320 files changed, 25723 insertions(+), 21419 deletions(-) create mode 100644 Eigen/AccelerateSupport create mode 100644 Eigen/KLUSupport create mode 100644 Eigen/ThreadPool create mode 100644 Eigen/Version create mode 100644 Eigen/src/Core/Assign_AOCL.h create mode 100644 Eigen/src/Core/ConcatOp.h create mode 100644 Eigen/src/Core/RealView.h create mode 100644 Eigen/src/Core/StructuredBindings.h create mode 100644 Eigen/src/Core/arch/Default/GenericPacketMathComplex.h create mode 100644 Eigen/src/Core/arch/Default/GenericPacketMathDoubleWord.h create mode 100644 Eigen/src/Core/arch/Default/GenericPacketMathFrexpLdexp.h create mode 100644 Eigen/src/Core/arch/Default/GenericPacketMathPolynomials.h create mode 100644 Eigen/src/Core/arch/Default/GenericPacketMathPow.h create mode 100644 Eigen/src/Core/arch/Default/GenericPacketMathTrig.h create mode 100644 Eigen/src/Core/arch/RVV10/GeneralBlockPanelKernel.h create mode 100644 Eigen/src/Core/arch/RVV10/MathFunctions.h create mode 100644 Eigen/src/Core/arch/RVV10/PacketMath.h create mode 100644 Eigen/src/Core/arch/RVV10/PacketMath2.h create mode 100644 Eigen/src/Core/arch/RVV10/PacketMath4.h create mode 100644 Eigen/src/Core/arch/RVV10/PacketMathBF16.h create mode 100644 Eigen/src/Core/arch/RVV10/PacketMathFP16.h create mode 100644 Eigen/src/Core/arch/RVV10/TypeCasting.h create mode 100644 Eigen/src/Core/arch/clang/Complex.h create mode 100644 Eigen/src/Core/arch/clang/MathFunctions.h create mode 100644 Eigen/src/Core/arch/clang/PacketMath.h create mode 100644 Eigen/src/Core/arch/clang/Reductions.h create mode 100644 Eigen/src/Core/arch/clang/TypeCasting.h create mode 100644 Eigen/src/Core/util/AOCL_Support.h create mode 100644 Eigen/src/Eigenvalues/ComplexQZ.h create mode 100644 Eigen/src/SVD/BDCSVDImpl.h create mode 100644 Eigen/src/misc/RankRevealingBase.h delete mode 100644 Eigen/src/misc/RealSvd2x2.h diff --git a/.gitattributes b/.gitattributes index 3d370f28b2a..830efe4baa4 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,9 @@ +*.sh eol=lf +debug/msvc/*.dat eol=crlf +debug/msvc/*.natvis eol=crlf + +# ITK fork: relax content-checks for the largest Eigen header so +# UpdateFromUpstream.sh / KWStyle hooks do not reject the import. * -whitespace Eigen/src/misc/lapacke.h hooks-max-size=1500000 Eigen/src/misc/lapacke.h hooks.MaxObjectKiB=2048 diff --git a/CMakeLists.txt b/CMakeLists.txt index 4bd238866f4..80494f4445d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,6 +6,8 @@ project(Eigen3) # ITK doesn't compile anything here, just generates targets for the INTERFACE library. if(FALSE) +cmake_minimum_required(VERSION 3.10.0) + #============================================================================== # CMake Policy issues. #============================================================================== @@ -40,10 +42,17 @@ if (POLICY CMP0177) cmake_policy(SET CMP0177 NEW) endif () +# Respect _ROOT variables. +if (POLICY CMP0074) + cmake_policy(SET CMP0074 NEW) +endif () + #============================================================================== # CMake Project. #============================================================================== +project(Eigen3) + # Remove this block after bumping CMake to v3.21.0 # PROJECT_IS_TOP_LEVEL is defined then by default if(CMAKE_VERSION VERSION_LESS 3.21.0) @@ -67,19 +76,12 @@ option(EIGEN_LEAVE_TEST_IN_ALL_TARGET "Leaves tests in the all target, needed by option(EIGEN_BUILD_BLAS "Toggles the building of the Eigen Blas library" ${PROJECT_IS_TOP_LEVEL}) option(EIGEN_BUILD_LAPACK "Toggles the building of the included Eigen LAPACK library" ${PROJECT_IS_TOP_LEVEL}) if (EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK) - # BLAS and LAPACK currently need a fortran compiler. - include(CMakeDetermineFortranCompiler) - if (NOT CMAKE_Fortran_COMPILER) - set(EIGEN_BUILD_BLAS OFF) - set(EIGEN_BUILD_LAPACK OFF) - else() - # Determine if we should build shared libraries for BLAS/LAPACK on this platform. + # Determine if we should build shared libraries for BLAS/LAPACK on this platform. + if (NOT EIGEN_BUILD_SHARED_LIBS) get_cmake_property(EIGEN_BUILD_SHARED_LIBS TARGET_SUPPORTS_SHARED_LIBS) endif() endif() -option(EIGEN_BUILD_BTL "Build benchmark suite" OFF) -option(EIGEN_BUILD_SPBENCH "Build sparse benchmark suite" OFF) # Avoid building docs if included from another project. # Building documentation requires creating and running executables on the host # platform. We shouldn't do this if cross-compiling. @@ -96,7 +98,7 @@ if(NOT WIN32 OR NOT CMAKE_HOST_SYSTEM_NAME MATCHES Windows) endif() option(EIGEN_BUILD_CMAKE_PACKAGE "Enables the creation of EigenConfig.cmake and related files" ${PROJECT_IS_TOP_LEVEL}) -if (EIGEN_BUILD_TESTING OR EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK OR EIGEN_BUILT_BTL OR EIGEN_BUILD_BTL OR EIGEN_BUILD_SPBENCH OR EIGEN_BUILD_DOC OR EIGEN_BUILD_DEMOS) +if (EIGEN_BUILD_TESTING OR EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK OR EIGEN_BUILD_DOC OR EIGEN_BUILD_DEMOS) set(EIGEN_IS_BUILDING_ ON) endif() @@ -104,15 +106,28 @@ endif() # Version Info. #============================================================================== -# Automatically parse the version number from header files. -file(READ "${PROJECT_SOURCE_DIR}/Eigen/src/Core/util/Macros.h" _eigen_version_header) -string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen_world_version_match "${_eigen_version_header}") -set(EIGEN_WORLD_VERSION "${CMAKE_MATCH_1}") -string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen_major_version_match "${_eigen_version_header}") -set(EIGEN_MAJOR_VERSION "${CMAKE_MATCH_1}") -string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen_minor_version_match "${_eigen_version_header}") -set(EIGEN_MINOR_VERSION "${CMAKE_MATCH_1}") -set(EIGEN_VERSION_NUMBER ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION}) +# If version information is not provided, automatically parse the version number +# from header files. +file(READ "${PROJECT_SOURCE_DIR}/Eigen/Version" _eigen_version_header) +if (NOT DEFINED EIGEN_WORLD_VERSION) + string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen_world_version_match "${_eigen_version_header}") + set(EIGEN_WORLD_VERSION "${CMAKE_MATCH_1}" CACHE STRING "") +endif() +if (NOT DEFINED EIGEN_MAJOR_VERSION) + string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen_major_version_match "${_eigen_version_header}") + set(EIGEN_MAJOR_VERSION "${CMAKE_MATCH_1}" CACHE STRING "") +endif() +if (NOT DEFINED EIGEN_MINOR_VERSION) + string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen_minor_version_match "${_eigen_version_header}") + set(EIGEN_MINOR_VERSION "${CMAKE_MATCH_1}" CACHE STRING "") +endif() +if (NOT DEFINED EIGEN_PATCH_VERSION) + string(REGEX MATCH "define[ \t]+EIGEN_PATCH_VERSION[ \t]+([0-9]+)" _eigen_patch_version_match "${_eigen_version_header}") + set(EIGEN_PATCH_VERSION "${CMAKE_MATCH_1}" CACHE STRING "") +endif() +if (NOT DEFINED EIGEN_PRERELEASE_VERSION) + set(EIGEN_PRERELEASE_VERSION "dev") +endif() # If we are in a git repo, extract a changeset. if(IS_DIRECTORY ${CMAKE_SOURCE_DIR}/.git) @@ -123,16 +138,32 @@ endif() # extract the git rev number from the git output... if(EIGEN_GIT_OUTPUT) -string(REGEX MATCH "^([0-9;a-f]+).*" EIGEN_GIT_CHANGESET_MATCH "${EIGEN_GIT_OUTPUT}") -set(EIGEN_GIT_REVNUM "${CMAKE_MATCH_1}") + string(REGEX MATCH "^([0-9;a-f]+).*" EIGEN_GIT_CHANGESET_MATCH "${EIGEN_GIT_OUTPUT}") + set(EIGEN_GIT_REVNUM "${CMAKE_MATCH_1}") endif() -#...and show it next to the version number -if(EIGEN_GIT_REVNUM) - set(EIGEN_VERSION "${EIGEN_VERSION_NUMBER} (git rev ${EIGEN_GIT_REVNUM})") + +if (NOT DEFINED EIGEN_BUILD_VERSION AND DEFINED EIGEN_GIT_REVNUM) + string(SUBSTRING "${EIGEN_GIT_REVNUM}" 0 8 EIGEN_BUILD_VERSION) else() - set(EIGEN_VERSION "${EIGEN_VERSION_NUMBER}") + set(EIGEN_BUILD_VERSION "" CACHE STRING "") +endif() + +# The EIGEN_VERSION_NUMBER must be of the form . +# The EIGEN_VERSION_STRING can contain the preprelease/build strings. +set(EIGEN_VERSION_NUMBER "${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION}.${EIGEN_PATCH_VERSION}" CACHE STRING "") +set(EIGEN_VERSION_STRING "${EIGEN_VERSION_NUMBER}" CACHE STRING "") +if (NOT "x${EIGEN_PRERELEASE_VERSION}" STREQUAL "x") + set(EIGEN_VERSION_STRING "${EIGEN_VERSION_STRING}-${EIGEN_PRERELEASE_VERSION}" CACHE STRING "") +endif() +if (NOT "x${EIGEN_BUILD_VERSION}" STREQUAL "x") + set(EIGEN_VERSION_STRING "${EIGEN_VERSION_STRING}+${EIGEN_BUILD_VERSION}" CACHE STRING "") endif() + +# Generate version file. +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/Version.in" + "${CMAKE_CURRENT_BINARY_DIR}/include/Eigen/Version") + #============================================================================== # Install Path Configuration. #============================================================================== @@ -180,11 +211,6 @@ endforeach() # Eigen Library. #============================================================================== -set ( EIGEN_VERSION_STRING ${EIGEN_VERSION_NUMBER} ) -set ( EIGEN_VERSION_MAJOR ${EIGEN_WORLD_VERSION} ) -set ( EIGEN_VERSION_MINOR ${EIGEN_MAJOR_VERSION} ) -set ( EIGEN_VERSION_PATCH ${EIGEN_MINOR_VERSION} ) - # Alias Eigen_*_DIR to Eigen3_*_DIR: set(Eigen_SOURCE_DIR ${Eigen3_SOURCE_DIR}) set(Eigen_BINARY_DIR ${Eigen3_BINARY_DIR}) @@ -197,6 +223,9 @@ target_include_directories (eigen INTERFACE $ ) +# Eigen requires at least C++14 +target_compile_features (eigen INTERFACE cxx_std_14) + # Export as title case Eigen set_target_properties (eigen PROPERTIES EXPORT_NAME Eigen) @@ -216,6 +245,9 @@ if(EIGEN_BUILD_PKGCONFIG) endif() install(DIRECTORY Eigen DESTINATION ${INCLUDE_INSTALL_DIR} COMPONENT Devel) +# Replace the "Version" header file with the generated one. +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/include/Eigen/Version + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/ COMPONENT Devel) install(TARGETS eigen EXPORT Eigen3Targets) @@ -229,25 +261,10 @@ if(EIGEN_BUILD_CMAKE_PACKAGE) NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components ) - # NOTE Remove the first code path once the minimum required CMake version is - # bumped to 3.14 or above. - if (CMAKE_VERSION VERSION_LESS 3.14) - # Remove CMAKE_SIZEOF_VOID_P from Eigen3ConfigVersion.cmake since Eigen does - # not depend on architecture specific settings or libraries. More - # specifically, an Eigen3Config.cmake generated from a 64 bit target can be - # used for 32 bit targets as well (and vice versa). - set (_Eigen3_CMAKE_SIZEOF_VOID_P ${CMAKE_SIZEOF_VOID_P}) - unset (CMAKE_SIZEOF_VOID_P) - write_basic_package_version_file (Eigen3ConfigVersion.cmake - VERSION ${EIGEN_VERSION_NUMBER} - COMPATIBILITY SameMajorVersion) - set (CMAKE_SIZEOF_VOID_P ${_Eigen3_CMAKE_SIZEOF_VOID_P}) - else (CMAKE_VERSION VERSION_LESS 3.14) - write_basic_package_version_file (Eigen3ConfigVersion.cmake - VERSION ${EIGEN_VERSION_NUMBER} - COMPATIBILITY SameMajorVersion - ARCH_INDEPENDENT) - endif (CMAKE_VERSION VERSION_LESS 3.14) + set(CVF_VERSION "${EIGEN_VERSION_NUMBER}") + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigVersion.cmake.in" + "Eigen3ConfigVersion.cmake" + @ONLY) # The Eigen target will be located in the Eigen3 namespace. Other CMake # targets can refer to it using Eigen3::Eigen. @@ -299,17 +316,29 @@ if (EIGEN_IS_BUILDING_) set(CMAKE_INCLUDE_CURRENT_DIR OFF) find_package(StandardMathLibrary) + find_package(AOCL QUIET) set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "") + if(AOCL_FOUND) + list(APPEND EIGEN_STANDARD_LIBRARIES_TO_LINK_TO ${AOCL_LIBRARIES}) + if(AOCL_INCLUDE_DIRS) + include_directories(${AOCL_INCLUDE_DIRS}) + endif() + endif() + if(NOT STANDARD_MATH_LIBRARY_FOUND) - message(FATAL_ERROR - "Can't link to the standard math library. Please report to the Eigen developers, telling them about your platform.") + message(FATAL_ERROR + "Can't link to the standard math library. Please report to the Eigen developers, telling them about your platform.") else() - if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) - set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO} ${STANDARD_MATH_LIBRARY}") - else() - set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${STANDARD_MATH_LIBRARY}") - endif() + if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) + set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO} ${STANDARD_MATH_LIBRARY}") + else() + set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${STANDARD_MATH_LIBRARY}") endif() + # Clean up any leading/trailing whitespace in the variable to avoid CMP0004 errors + string(STRIP "${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}" EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) + endif() + + if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) message(STATUS "Standard libraries to link to explicitly: ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}") else() @@ -386,6 +415,8 @@ if (EIGEN_BUILD_TESTING) ei_add_cxx_compiler_flag("-Wshorten-64-to-32") ei_add_cxx_compiler_flag("-Wlogical-op") ei_add_cxx_compiler_flag("-Wenum-conversion") + ei_add_cxx_compiler_flag("-Werror=deprecated-anon-enum-enum-conversion") + ei_add_cxx_compiler_flag("-Werror=deprecated-enum-enum-conversion") ei_add_cxx_compiler_flag("-Wc++11-extensions") ei_add_cxx_compiler_flag("-Wdouble-promotion") # ei_add_cxx_compiler_flag("-Wconversion") @@ -393,6 +424,7 @@ if (EIGEN_BUILD_TESTING) ei_add_cxx_compiler_flag("-Wno-psabi") ei_add_cxx_compiler_flag("-Wno-variadic-macros") ei_add_cxx_compiler_flag("-Wno-long-long") + ei_add_cxx_compiler_flag("-Wno-pass-failed") # disable clang's warning for unrolling when the loop count is dynamic. ei_add_cxx_compiler_flag("-fno-common") ei_add_cxx_compiler_flag("-fstrict-aliasing") ei_add_cxx_compiler_flag("-wd981") # disable ICC's "operands are evaluated in unspecified order" remark @@ -403,6 +435,17 @@ if (EIGEN_BUILD_TESTING) ei_add_cxx_compiler_flag("-fno-check-new") endif() + # GCC 12+ emits false-positive -Warray-bounds, -Wmaybe-uninitialized, + # -Wstringop-overread, and -Wnonnull warnings at -O2/-O3 in heavily + # templated code with mixed static/dynamic sizes. These are well-known + # compiler bugs (see GCC PR 109394, 106247, 105329, 98610, among others). + if (CMAKE_COMPILER_IS_GNUCXX) + ei_add_cxx_compiler_flag("-Wno-array-bounds") + ei_add_cxx_compiler_flag("-Wno-maybe-uninitialized") + ei_add_cxx_compiler_flag("-Wno-stringop-overread") + ei_add_cxx_compiler_flag("-Wno-nonnull") + endif() + if(ANDROID_NDK) ei_add_cxx_compiler_flag("-pie") @@ -639,7 +682,7 @@ if (EIGEN_BUILD_TESTING) endif() set(EIGEN_CUDA_CXX_FLAGS "" CACHE STRING "Additional flags to pass to the cuda compiler.") - set(EIGEN_CUDA_COMPUTE_ARCH 30 CACHE STRING "The CUDA compute architecture(s) to target when compiling CUDA code") + set(EIGEN_CUDA_COMPUTE_ARCH 70 CACHE STRING "The CUDA compute architecture(s) to target when compiling CUDA code") option(EIGEN_TEST_SYCL "Add Sycl support." OFF) if(EIGEN_TEST_SYCL) @@ -729,15 +772,6 @@ if(EIGEN_BUILD_DOC) add_subdirectory(doc EXCLUDE_FROM_ALL) endif() -# TODO: consider also replacing EIGEN_BUILD_BTL by a custom target "make btl"? -if(EIGEN_BUILD_BTL) - add_subdirectory(bench/btl EXCLUDE_FROM_ALL) -endif() - -if(NOT WIN32 AND EIGEN_BUILD_SPBENCH) - add_subdirectory(bench/spbench EXCLUDE_FROM_ALL) -endif() - if (EIGEN_BUILD_DEMOS) add_subdirectory(demos EXCLUDE_FROM_ALL) endif() @@ -791,8 +825,9 @@ if(PROJECT_IS_TOP_LEVEL) endif() message(STATUS "") -message(STATUS "Configured Eigen ${EIGEN_VERSION_NUMBER}") +message(STATUS "Configured Eigen ${EIGEN_VERSION_STRING}") message(STATUS "") + endif() # Regular CMakeLists of Eigen ends here ############################################################################### @@ -814,15 +849,33 @@ include(GNUInstallDirs) set(INCLUDE_INSTALL_DIR "${CMAKE_INSTALL_INCLUDEDIR}") set(CMAKEPACKAGE_INSTALL_DIR "${CMAKE_INSTALL_DATADIR}") -# automatically parse the version number -file(READ "${PROJECT_SOURCE_DIR}/Eigen/src/Core/util/Macros.h" _eigen_version_header) +# Automatically parse the version number. +# Eigen 5+ moved the version macros from Eigen/src/Core/util/Macros.h to +# Eigen/Version, and switched to semantic versioning. EIGEN_WORLD_VERSION +# is now permanently "3" (legacy "Eigen3" name); EIGEN_MAJOR_VERSION / +# EIGEN_MINOR_VERSION / EIGEN_PATCH_VERSION carry the real semver triple. +# Build EIGEN_VERSION_NUMBER as MAJOR.MINOR.PATCH so that +# find_package(Eigen3 X.Y.Z) and Eigen3_VERSION reflect the real version. +if(EXISTS "${PROJECT_SOURCE_DIR}/Eigen/Version") + file(READ "${PROJECT_SOURCE_DIR}/Eigen/Version" _eigen_version_header) +else() + file(READ "${PROJECT_SOURCE_DIR}/Eigen/src/Core/util/Macros.h" _eigen_version_header) +endif() string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen_world_version_match "${_eigen_version_header}") set(EIGEN_WORLD_VERSION "${CMAKE_MATCH_1}") string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen_major_version_match "${_eigen_version_header}") set(EIGEN_MAJOR_VERSION "${CMAKE_MATCH_1}") string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen_minor_version_match "${_eigen_version_header}") set(EIGEN_MINOR_VERSION "${CMAKE_MATCH_1}") -set(EIGEN_VERSION_NUMBER ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION}) +string(REGEX MATCH "define[ \t]+EIGEN_PATCH_VERSION[ \t]+([0-9]+)" _eigen_patch_version_match "${_eigen_version_header}") +set(EIGEN_PATCH_VERSION "${CMAKE_MATCH_1}") +if(EIGEN_WORLD_VERSION STREQUAL "3" AND EIGEN_PATCH_VERSION) + # Eigen >= 5.0: WORLD frozen at 3, real version is MAJOR.MINOR.PATCH. + set(EIGEN_VERSION_NUMBER ${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION}.${EIGEN_PATCH_VERSION}) +else() + # Eigen <= 3.4.x: legacy WORLD.MAJOR.MINOR layout. + set(EIGEN_VERSION_NUMBER ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION}) +endif() include (CMakePackageConfigHelpers) @@ -882,7 +935,7 @@ install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake # Install files (used for both eigen_external and eigen_internal) install( DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/Eigen/" - DESTINATION "${ITK3P_INSTALL_INCLUDE_DIR}/itkeigen/Eigen" + DESTINATION "${INCLUDE_INSTALL_DIR}/itkeigen/Eigen" PATTERN "*.txt" EXCLUDE) ######################### eigen_internal ##################################### @@ -896,6 +949,8 @@ add_library (ITKInternalEigen3::Eigen ALIAS eigen_internal) # This would wrongly enforce EIGEN_MPL2_ONLY to other libraries using Eigen. # We wrap this definition in ITK_USE_EIGEN_MPL2_ONLY, and only enabling it internally in the dashboards and CI, # to avoid introducing GPL code from Eigen3 internally in ITK. +option(ITK_USE_EIGEN_MPL2_ONLY "Set compile definition EIGEN_MPL2_ONLY for ITKInternalEigen3." OFF) +mark_as_advanced(ITK_USE_EIGEN_MPL2_ONLY) if(ITK_USE_EIGEN_MPL2_ONLY) target_compile_definitions (eigen_internal INTERFACE "EIGEN_MPL2_ONLY") @@ -905,10 +960,41 @@ endif() # #include # INSTALL: headers require pre-prend itkeigen/Eigen/X. target_include_directories (eigen_internal SYSTEM INTERFACE - $ - "$/${ITK3P_INSTALL_INCLUDE_DIR}/itkeigen>;" + $ + # $ + $ ) # Export as title case Eigen -install (TARGETS eigen_internal EXPORT ${ITK3P_INSTALL_EXPORT_NAME}) +set_target_properties (eigen_internal PROPERTIES EXPORT_NAME Eigen) +install (TARGETS eigen_internal EXPORT ITKInternalEigen3Targets) +set(EIGEN3_TARGETS_FILE ITKInternalEigen3Targets.cmake) +configure_package_config_file ( + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/ITKInternalEigen3Config.cmake + INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR} + NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components + ) +# Remove CMAKE_SIZEOF_VOID_P from Eigen3ConfigVersion.cmake since Eigen does +# not depend on architecture specific settings or libraries. More +# specifically, an Eigen3Config.cmake generated from a 64 bit target can be +# used for 32 bit targets as well (and vice versa). +set (_Eigen3_CMAKE_SIZEOF_VOID_P ${CMAKE_SIZEOF_VOID_P}) +unset (CMAKE_SIZEOF_VOID_P) +write_basic_package_version_file (ITKInternalEigen3ConfigVersion.cmake + VERSION ${EIGEN_VERSION_NUMBER} + COMPATIBILITY SameMajorVersion) +set (CMAKE_SIZEOF_VOID_P ${_Eigen3_CMAKE_SIZEOF_VOID_P}) +# The Eigen target will be located in the Eigen3 namespace. Other CMake +# targets can refer to it using Eigen3::Eigen. +export (TARGETS eigen_internal NAMESPACE ITKInternalEigen3:: FILE ITKInternalEigen3Targets.cmake) +install (EXPORT ITKInternalEigen3Targets NAMESPACE ITKInternalEigen3:: DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}) +# Files already installed in eigen_external +# install( +# DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/Eigen/" +# DESTINATION "${INCLUDE_INSTALL_DIR}/itkeigen/Eigen" +# PATTERN "*.txt" EXCLUDE) +install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/ITKInternalEigen3Config.cmake + ${CMAKE_CURRENT_BINARY_DIR}/ITKInternalEigen3ConfigVersion.cmake + DESTINATION ${CMAKEPACKAGE_INSTALL_DIR} ) diff --git a/COPYING.MPL2 b/COPYING.MPL2 index ee6256cdb62..d0a1fa1482e 100644 --- a/COPYING.MPL2 +++ b/COPYING.MPL2 @@ -35,7 +35,7 @@ Mozilla Public License Version 2.0 means any form of the work other than Source Code Form. 1.7. "Larger Work" - means a work that combines Covered Software with other material, in + means a work that combines Covered Software with other material, in a separate file or files, that is not Covered Software. 1.8. "License" diff --git a/COPYING.README b/COPYING.README index 11af93ca790..93ec692667c 100644 --- a/COPYING.README +++ b/COPYING.README @@ -1,6 +1,14 @@ -Eigen is primarily MPL2 licensed. See COPYING.MPL2 and these links: - http://www.mozilla.org/MPL/2.0/ - http://www.mozilla.org/MPL/2.0/FAQ.html +Eigen is primarily licensed under the Mozilla Public License 2.0. +See LICENSE, COPYING.MPL2, and these links: + https://www.mozilla.org/MPL/2.0/ + https://www.mozilla.org/MPL/2.0/FAQ.html -Some files contain third-party code under BSD or other MPL2-compatible licenses, -whence the other COPYING.* files here. \ No newline at end of file +Some files contain third-party code under permissive or otherwise +MPL2-compatible licenses, hence the other COPYING.* files here. These +include Apache-2.0, BSD-style notices, the MINPACK license, and the MORSE +CMake module BSD-style notice in cmake/MORSE-Copyright.txt. + +Note that some optional external dependencies (e.g. FFTW, MPFR C++) +are distributed under different licenses, including the GPL. Refer to +the individual source files and their respective COPYING files for +details. diff --git a/Eigen/AccelerateSupport b/Eigen/AccelerateSupport new file mode 100644 index 00000000000..533be688ed2 --- /dev/null +++ b/Eigen/AccelerateSupport @@ -0,0 +1,52 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ACCELERATESUPPORT_MODULE_H +#define EIGEN_ACCELERATESUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \ingroup Support_modules + * \defgroup AccelerateSupport_Module AccelerateSupport module + * + * This module provides an interface to the Apple Accelerate library. + * It provides the seven following main factorization classes: + * - class AccelerateLLT: a Cholesky (LL^T) factorization. + * - class AccelerateLDLT: the default LDL^T factorization. + * - class AccelerateLDLTUnpivoted: a Cholesky-like LDL^T factorization with only 1x1 pivots and no pivoting + * - class AccelerateLDLTSBK: an LDL^T factorization with Supernode Bunch-Kaufman and static pivoting + * - class AccelerateLDLTTPP: an LDL^T factorization with full threshold partial pivoting + * - class AccelerateQR: a QR factorization + * - class AccelerateCholeskyAtA: a QR factorization without storing Q (equivalent to A^TA = R^T R) + * + * \code + * #include + * \endcode + * + * In order to use this module, the Accelerate headers must be accessible from + * the include paths, and your binary must be linked to the Accelerate framework. + * The Accelerate library is only available on Apple hardware. + * + * Note that many of the algorithms can be influenced by the UpLo template + * argument. All matrices are assumed to be symmetric. For example, the following + * creates an LDLT factorization where your matrix is symmetric (implicit) and + * uses the lower triangle: + * + * \code + * AccelerateLDLT, Lower> ldlt; + * \endcode + */ + +// IWYU pragma: begin_exports +#include "src/AccelerateSupport/AccelerateSupport.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_ACCELERATESUPPORT_MODULE_H diff --git a/Eigen/Cholesky b/Eigen/Cholesky index b05ed8278c6..e75357a657b 100644 --- a/Eigen/Cholesky +++ b/Eigen/Cholesky @@ -14,8 +14,6 @@ #include "src/Core/util/DisableStupidWarnings.h" /** \defgroup Cholesky_Module Cholesky module - * - * * * This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices. * Those decompositions are also accessible via the following methods: diff --git a/Eigen/CholmodSupport b/Eigen/CholmodSupport index adc5f8d63e7..31725138be8 100644 --- a/Eigen/CholmodSupport +++ b/Eigen/CholmodSupport @@ -26,7 +26,7 @@ * For the sake of completeness, this module also propose the two following classes: * - class CholmodSimplicialLLT * - class CholmodSimplicialLDLT - * Note that these classes does not bring any particular advantage compared to the built-in + * Note that these classes do not bring any particular advantage compared to the built-in * SimplicialLLT and SimplicialLDLT factorization classes. * * \code diff --git a/Eigen/Core b/Eigen/Core index cf2b164b711..060c92bcd7f 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -11,6 +11,9 @@ #ifndef EIGEN_CORE_MODULE_H #define EIGEN_CORE_MODULE_H +// Eigen version information. +#include "Version" + // first thing Eigen does: stop the compiler from reporting useless warnings. #include "src/Core/util/DisableStupidWarnings.h" @@ -33,12 +36,6 @@ #include #endif -// Disable the ipa-cp-clone optimization flag with MinGW 6.x or older (enabled by default with -O3) -// See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details. -#if EIGEN_COMP_MINGW && EIGEN_GNUC_STRICT_LESS_THAN(6, 0, 0) -#pragma GCC optimize("-fno-ipa-cp-clone") -#endif - // Prevent ICC from specializing std::complex operators that silently fail // on device. This allows us to use our own device-compatible specializations // instead. @@ -50,10 +47,12 @@ // this include file manages BLAS and MKL related macros // and inclusion of their respective header files #include "src/Core/util/MKL_support.h" +#include "src/Core/util/AOCL_Support.h" -#if defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16) -#define EIGEN_HAS_GPU_FP16 -#endif + +// EIGEN_HAS_GPU_FP16 is now always true when compiling with CUDA or HIP. +// Use EIGEN_GPUCC (compile-time) or EIGEN_GPU_COMPILE_PHASE (device phase) instead. +// TODO: Remove EIGEN_HAS_GPU_BF16 similarly once HIP bf16 guards are cleaned up. #if defined(EIGEN_HAS_CUDA_BF16) || defined(EIGEN_HAS_HIP_BF16) #define EIGEN_HAS_GPU_BF16 @@ -68,8 +67,7 @@ #include #endif -// MSVC for windows mobile does not have the errno.h file -#if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM +#if !EIGEN_COMP_ARM #define EIGEN_HAS_ERRNO #endif @@ -92,16 +90,30 @@ #include #include +#include #include // for std::is_nothrow_move_assignable #include +// for std::move, std::forward, std::declval +#include + // for std::this_thread::yield(). #if !defined(EIGEN_USE_BLAS) && (defined(EIGEN_HAS_OPENMP) || defined(EIGEN_GEMM_THREADPOOL)) #include #endif +// for __cpp_lib feature test macros +#if defined(__has_include) && __has_include() +#include +#endif + +// for std::bit_cast() +#if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L +#include +#endif + // for outputting debug info #ifdef EIGEN_DEBUG_ASSIGN #include @@ -109,10 +121,18 @@ // required for __cpuid, needs to be included after cmath // also required for _BitScanReverse on Windows on ARM -#if EIGEN_COMP_MSVC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM64) && !EIGEN_OS_WINCE +#if EIGEN_COMP_MSVC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM64) #include #endif +// Required for querying cache sizes on Linux and macOS. +#if EIGEN_OS_LINUX +#include +#elif EIGEN_OS_MAC +#include +#include +#endif + #if defined(EIGEN_USE_SYCL) #undef min #undef max @@ -121,9 +141,7 @@ #undef isfinite #include #include -#include #include -#include #ifndef EIGEN_SYCL_LOCAL_THREAD_DIM0 #define EIGEN_SYCL_LOCAL_THREAD_DIM0 16 #endif @@ -132,19 +150,9 @@ #endif #endif -#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || \ - defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || \ - defined EIGEN2_SUPPORT -// This will generate an error message: -#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information -#endif - namespace Eigen { -// we use size_t frequently and we'll never remember to prepend it with std:: every time just to -// ensure QNX/QCC support using std::size_t; -// gcc 4.6.0 wants std:: for ptrdiff_t using std::ptrdiff_t; } // namespace Eigen @@ -162,6 +170,8 @@ using std::ptrdiff_t; #ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_MKL #include "mkl_lapacke.h" +#elif defined(EIGEN_LAPACKE_SYSTEM) +#include #else #include "src/misc/lapacke.h" #endif @@ -192,36 +202,58 @@ using std::ptrdiff_t; #include "src/Core/arch/Default/BFloat16.h" #include "src/Core/arch/Default/GenericPacketMathFunctionsFwd.h" -#if defined EIGEN_VECTORIZE_SSE +#if defined(EIGEN_VECTORIZE_GENERIC) && !defined(EIGEN_DONT_VECTORIZE) +#include "src/Core/arch/clang/PacketMath.h" +#include "src/Core/arch/clang/TypeCasting.h" +#include "src/Core/arch/clang/Complex.h" +#include "src/Core/arch/clang/Reductions.h" +#include "src/Core/arch/clang/MathFunctions.h" +#else +#if defined EIGEN_VECTORIZE_AVX512 #include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/Reductions.h" -#include "src/Core/arch/SSE/Complex.h" -#include "src/Core/arch/SSE/TypeCasting.h" -#include "src/Core/arch/SSE/MathFunctions.h" -#endif - -#if defined EIGEN_VECTORIZE_AVX #include "src/Core/arch/AVX/PacketMath.h" #include "src/Core/arch/AVX/Reductions.h" -#include "src/Core/arch/AVX/Complex.h" -#include "src/Core/arch/AVX/TypeCasting.h" -#include "src/Core/arch/AVX/MathFunctions.h" -#endif - -#if defined EIGEN_VECTORIZE_AVX512 #include "src/Core/arch/AVX512/PacketMath.h" #include "src/Core/arch/AVX512/Reductions.h" -#include "src/Core/arch/AVX512/Complex.h" -#include "src/Core/arch/AVX512/TypeCasting.h" -#include "src/Core/arch/AVX512/MathFunctions.h" -#include "src/Core/arch/AVX512/TrsmKernel.h" -#endif - #if defined EIGEN_VECTORIZE_AVX512FP16 #include "src/Core/arch/AVX512/PacketMathFP16.h" +#endif +#include "src/Core/arch/SSE/TypeCasting.h" +#include "src/Core/arch/AVX/TypeCasting.h" +#include "src/Core/arch/AVX512/TypeCasting.h" +#if defined EIGEN_VECTORIZE_AVX512FP16 #include "src/Core/arch/AVX512/TypeCastingFP16.h" +#endif +#include "src/Core/arch/SSE/Complex.h" +#include "src/Core/arch/AVX/Complex.h" +#include "src/Core/arch/AVX512/Complex.h" +#include "src/Core/arch/SSE/MathFunctions.h" +#include "src/Core/arch/AVX/MathFunctions.h" +#include "src/Core/arch/AVX512/MathFunctions.h" +#if defined EIGEN_VECTORIZE_AVX512FP16 #include "src/Core/arch/AVX512/MathFunctionsFP16.h" #endif +#include "src/Core/arch/AVX512/TrsmKernel.h" +#elif defined EIGEN_VECTORIZE_AVX +// Use AVX for floats and doubles, SSE for integers +#include "src/Core/arch/SSE/PacketMath.h" +#include "src/Core/arch/SSE/Reductions.h" +#include "src/Core/arch/SSE/TypeCasting.h" +#include "src/Core/arch/SSE/Complex.h" +#include "src/Core/arch/AVX/PacketMath.h" +#include "src/Core/arch/AVX/Reductions.h" +#include "src/Core/arch/AVX/TypeCasting.h" +#include "src/Core/arch/AVX/Complex.h" +#include "src/Core/arch/SSE/MathFunctions.h" +#include "src/Core/arch/AVX/MathFunctions.h" +#elif defined EIGEN_VECTORIZE_SSE +#include "src/Core/arch/SSE/PacketMath.h" +#include "src/Core/arch/SSE/Reductions.h" +#include "src/Core/arch/SSE/TypeCasting.h" +#include "src/Core/arch/SSE/MathFunctions.h" +#include "src/Core/arch/SSE/Complex.h" +#endif #if defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) #include "src/Core/arch/AltiVec/PacketMath.h" @@ -242,6 +274,18 @@ using std::ptrdiff_t; #include "src/Core/arch/SVE/PacketMath.h" #include "src/Core/arch/SVE/TypeCasting.h" #include "src/Core/arch/SVE/MathFunctions.h" +#elif defined EIGEN_VECTORIZE_RVV10 +#include "src/Core/arch/RVV10/PacketMath.h" +#include "src/Core/arch/RVV10/PacketMath4.h" +#include "src/Core/arch/RVV10/PacketMath2.h" +#include "src/Core/arch/RVV10/TypeCasting.h" +#include "src/Core/arch/RVV10/MathFunctions.h" +#if defined EIGEN_VECTORIZE_RVV10FP16 +#include "src/Core/arch/RVV10/PacketMathFP16.h" +#endif +#if defined EIGEN_VECTORIZE_RVV10BF16 +#include "src/Core/arch/RVV10/PacketMathBF16.h" +#endif #elif defined EIGEN_VECTORIZE_ZVECTOR #include "src/Core/arch/ZVector/PacketMath.h" #include "src/Core/arch/ZVector/MathFunctions.h" @@ -269,6 +313,8 @@ using std::ptrdiff_t; #endif #endif +#endif // #ifndef EIGEN_VECTORIZE_GENERIC + #include "src/Core/arch/Default/Settings.h" // This file provides generic implementations valid for scalar as well #include "src/Core/arch/Default/GenericPacketMathFunctions.h" @@ -304,6 +350,7 @@ using std::ptrdiff_t; #include "src/Core/Product.h" #include "src/Core/CoreEvaluators.h" #include "src/Core/AssignEvaluator.h" +#include "src/Core/RealView.h" #include "src/Core/Assign.h" #include "src/Core/ArrayBase.h" @@ -311,13 +358,12 @@ using std::ptrdiff_t; #include "src/Core/DenseStorage.h" #include "src/Core/NestByValue.h" -// #include "src/Core/ForceAlignedAccess.h" - #include "src/Core/ReturnByValue.h" #include "src/Core/NoAlias.h" #include "src/Core/PlainObjectBase.h" #include "src/Core/Matrix.h" #include "src/Core/Array.h" +#include "src/Core/StructuredBindings.h" #include "src/Core/Fill.h" #include "src/Core/CwiseTernaryOp.h" #include "src/Core/CwiseBinaryOp.h" @@ -378,23 +424,28 @@ using std::ptrdiff_t; #include "src/Core/CoreIterators.h" #include "src/Core/ConditionEstimator.h" +#if !defined(EIGEN_VECTORIZE_GENERIC) #if defined(EIGEN_VECTORIZE_VSX) #include "src/Core/arch/AltiVec/MatrixProduct.h" #elif defined EIGEN_VECTORIZE_NEON #include "src/Core/arch/NEON/GeneralBlockPanelKernel.h" #elif defined EIGEN_VECTORIZE_LSX #include "src/Core/arch/LSX/GeneralBlockPanelKernel.h" +#elif defined EIGEN_VECTORIZE_RVV10 +#include "src/Core/arch/RVV10/GeneralBlockPanelKernel.h" #endif #if defined(EIGEN_VECTORIZE_AVX512) #include "src/Core/arch/AVX512/GemmKernel.h" #endif +#endif #include "src/Core/Select.h" #include "src/Core/VectorwiseOp.h" #include "src/Core/PartialReduxEvaluator.h" #include "src/Core/Random.h" #include "src/Core/Replicate.h" +#include "src/Core/ConcatOp.h" #include "src/Core/Reverse.h" #include "src/Core/ArrayWrapper.h" #include "src/Core/StlIterators.h" @@ -414,6 +465,10 @@ using std::ptrdiff_t; #include "src/Core/Assign_MKL.h" #endif +#ifdef EIGEN_USE_AOCL_VML +#include "src/Core/Assign_AOCL.h" +#endif + #include "src/Core/GlobalFunctions.h" // IWYU pragma: end_exports diff --git a/Eigen/Dense b/Eigen/Dense index 5768910bd88..c90db7657a7 100644 --- a/Eigen/Dense +++ b/Eigen/Dense @@ -1,3 +1,13 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DENSE_MODULE_H +#define EIGEN_DENSE_MODULE_H + #include "Core" #include "LU" #include "Cholesky" @@ -5,3 +15,5 @@ #include "SVD" #include "Geometry" #include "Eigenvalues" + +#endif // EIGEN_DENSE_MODULE_H diff --git a/Eigen/Eigen b/Eigen/Eigen index 654c8dc6380..bb8f02f04b0 100644 --- a/Eigen/Eigen +++ b/Eigen/Eigen @@ -1,2 +1,14 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_EIGEN_MODULE_H +#define EIGEN_EIGEN_MODULE_H + #include "Dense" #include "Sparse" + +#endif // EIGEN_EIGEN_MODULE_H diff --git a/Eigen/Eigenvalues b/Eigen/Eigenvalues index 3b0bdee1715..f68eb85421b 100644 --- a/Eigen/Eigenvalues +++ b/Eigen/Eigenvalues @@ -11,16 +11,13 @@ #include "Core" #include "Cholesky" -#include "Jacobi" -#include "Householder" #include "LU" #include "Geometry" +#include "Sparse" // Needed by ComplexQZ. #include "src/Core/util/DisableStupidWarnings.h" /** \defgroup Eigenvalues_Module Eigenvalues module - * - * * * This module mainly provides various eigenvalue solvers. * This module also provides some MatrixBase methods, including: @@ -32,8 +29,6 @@ * \endcode */ -#include "src/misc/RealSvd2x2.h" - // IWYU pragma: begin_exports #include "src/Eigenvalues/Tridiagonalization.h" #include "src/Eigenvalues/RealSchur.h" @@ -44,11 +39,14 @@ #include "src/Eigenvalues/ComplexSchur.h" #include "src/Eigenvalues/ComplexEigenSolver.h" #include "src/Eigenvalues/RealQZ.h" +#include "src/Eigenvalues/ComplexQZ.h" #include "src/Eigenvalues/GeneralizedEigenSolver.h" #include "src/Eigenvalues/MatrixBaseEigenvalues.h" #ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_MKL #include "mkl_lapacke.h" +#elif defined(EIGEN_LAPACKE_SYSTEM) +#include #else #include "src/misc/lapacke.h" #endif diff --git a/Eigen/Geometry b/Eigen/Geometry index efe3e1fa339..c3ddb3d8a33 100644 --- a/Eigen/Geometry +++ b/Eigen/Geometry @@ -12,7 +12,6 @@ #include "SVD" #include "LU" -#include #include "src/Core/util/DisableStupidWarnings.h" @@ -48,10 +47,13 @@ #include "src/Geometry/AlignedBox.h" #include "src/Geometry/Umeyama.h" +#ifndef EIGEN_VECTORIZE_GENERIC +// TODO(rmlarsen): Make these work with generic vectorization if possible. // Use the SSE optimized version whenever possible. #if (defined EIGEN_VECTORIZE_SSE) || (defined EIGEN_VECTORIZE_NEON) #include "src/Geometry/arch/Geometry_SIMD.h" #endif +#endif // IWYU pragma: end_exports #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/Eigen/Householder b/Eigen/Householder index 5070e070e67..719edaffedb 100644 --- a/Eigen/Householder +++ b/Eigen/Householder @@ -22,8 +22,8 @@ // IWYU pragma: begin_exports #include "src/Householder/Householder.h" -#include "src/Householder/HouseholderSequence.h" #include "src/Householder/BlockHouseholder.h" +#include "src/Householder/HouseholderSequence.h" // IWYU pragma: end_exports #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/Eigen/KLUSupport b/Eigen/KLUSupport new file mode 100644 index 00000000000..6a5c59710c2 --- /dev/null +++ b/Eigen/KLUSupport @@ -0,0 +1,43 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_KLUSUPPORT_MODULE_H +#define EIGEN_KLUSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { +#include +#include +} + +/** \ingroup Support_modules + * \defgroup KLUSupport_Module KLUSupport module + * + * This module provides an interface to the KLU library which is part of the suitesparse package. It provides the following factorization class: + * - class KLU: a sparse LU factorization, well-suited for circuit simulation. + * + * \code + * #include + * \endcode + * + * In order to use this module, the klu and btf headers must be accessible from the include paths, and your binary must + * be linked to the klu library and its dependencies. The dependencies depend on how KLU has been compiled. For a + * cmake based project, you can use our FindKLU.cmake module to help you in this task. + * + */ + +// IWYU pragma: begin_exports +#include "src/KLUSupport/KLUSupport.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_KLUSUPPORT_MODULE_H diff --git a/Eigen/LU b/Eigen/LU index d80448039ef..ea54e353ec6 100644 --- a/Eigen/LU +++ b/Eigen/LU @@ -23,10 +23,10 @@ * \endcode */ +// IWYU pragma: begin_exports #include "src/misc/Kernel.h" #include "src/misc/Image.h" - -// IWYU pragma: begin_exports +#include "src/misc/RankRevealingBase.h" #include "src/LU/FullPivLU.h" #include "src/LU/PartialPivLU.h" #ifdef EIGEN_USE_LAPACKE @@ -36,9 +36,12 @@ #include "src/LU/Determinant.h" #include "src/LU/InverseImpl.h" +#ifndef EIGEN_VECTORIZE_GENERIC +// TODO(rmlarsen): Make these work with generic vectorization if possible. #if defined EIGEN_VECTORIZE_SSE || defined EIGEN_VECTORIZE_NEON #include "src/LU/arch/InverseSize4.h" #endif +#endif // IWYU pragma: end_exports #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/Eigen/PaStiXSupport b/Eigen/PaStiXSupport index dd1cfcb12de..59442316eff 100644 --- a/Eigen/PaStiXSupport +++ b/Eigen/PaStiXSupport @@ -36,7 +36,7 @@ extern "C" { * \endcode * * In order to use this module, the PaSTiX headers must be accessible from the include paths, and your binary must be - * linked to the PaSTiX library and its dependencies. This wrapper resuires PaStiX version 5.x compiled without MPI + * linked to the PaSTiX library and its dependencies. This wrapper requires PaStiX version 5.x compiled without MPI * support. The dependencies depend on how PaSTiX has been compiled. For a cmake based project, you can use our * FindPaSTiX.cmake module to help you in this task. * diff --git a/Eigen/QR b/Eigen/QR index c38b453b076..b29abce9ba0 100644 --- a/Eigen/QR +++ b/Eigen/QR @@ -11,14 +11,11 @@ #include "Core" #include "Cholesky" -#include "Jacobi" #include "Householder" #include "src/Core/util/DisableStupidWarnings.h" /** \defgroup QR_Module QR module - * - * * * This module provides various QR decompositions * This module also provides some MatrixBase methods, including: @@ -31,6 +28,8 @@ * \endcode */ +#include "src/misc/RankRevealingBase.h" + // IWYU pragma: begin_exports #include "src/QR/HouseholderQR.h" #include "src/QR/FullPivHouseholderQR.h" diff --git a/Eigen/QtAlignedMalloc b/Eigen/QtAlignedMalloc index 585f8e81ceb..6e15b26e67c 100644 --- a/Eigen/QtAlignedMalloc +++ b/Eigen/QtAlignedMalloc @@ -14,11 +14,11 @@ #include "src/Core/util/DisableStupidWarnings.h" -void *qMalloc(std::size_t size) { return Eigen::internal::aligned_malloc(size); } +inline void *qMalloc(std::size_t size) { return Eigen::internal::aligned_malloc(size); } -void qFree(void *ptr) { Eigen::internal::aligned_free(ptr); } +inline void qFree(void *ptr) { Eigen::internal::aligned_free(ptr); } -void *qRealloc(void *ptr, std::size_t size) { +inline void *qRealloc(void *ptr, std::size_t size) { void *newPtr = Eigen::internal::aligned_malloc(size); std::memcpy(newPtr, ptr, size); Eigen::internal::aligned_free(ptr); diff --git a/Eigen/SPQRSupport b/Eigen/SPQRSupport index c01dbe0093f..bfc2e7bfa70 100644 --- a/Eigen/SPQRSupport +++ b/Eigen/SPQRSupport @@ -38,4 +38,4 @@ #include "src/Core/util/ReenableStupidWarnings.h" -#endif +#endif // EIGEN_SPQRSUPPORT_MODULE_H diff --git a/Eigen/SVD b/Eigen/SVD index 2a013f825d7..ef5e36e825f 100644 --- a/Eigen/SVD +++ b/Eigen/SVD @@ -9,14 +9,10 @@ #define EIGEN_SVD_MODULE_H #include "QR" -#include "Householder" -#include "Jacobi" #include "src/Core/util/DisableStupidWarnings.h" /** \defgroup SVD_Module SVD module - * - * * * This module provides SVD decomposition for matrices (both real and complex). * Two decomposition algorithms are provided: @@ -33,7 +29,6 @@ */ // IWYU pragma: begin_exports -#include "src/misc/RealSvd2x2.h" #include "src/SVD/UpperBidiagonalization.h" #include "src/SVD/SVDBase.h" #include "src/SVD/JacobiSVD.h" @@ -41,6 +36,8 @@ #ifdef EIGEN_USE_LAPACKE #ifdef EIGEN_USE_MKL #include "mkl_lapacke.h" +#elif defined(EIGEN_LAPACKE_SYSTEM) +#include #else #include "src/misc/lapacke.h" #endif diff --git a/Eigen/SparseCore b/Eigen/SparseCore index 56a9401af34..6020e42855b 100644 --- a/Eigen/SparseCore +++ b/Eigen/SparseCore @@ -12,11 +12,7 @@ #include "src/Core/util/DisableStupidWarnings.h" -#include #include -#include -#include -#include #include /** diff --git a/Eigen/SparseQR b/Eigen/SparseQR index b4f1cad6bbb..1ad51923c87 100644 --- a/Eigen/SparseQR +++ b/Eigen/SparseQR @@ -35,4 +35,4 @@ #include "src/Core/util/ReenableStupidWarnings.h" -#endif +#endif // EIGEN_SPARSEQR_MODULE_H diff --git a/Eigen/SuperLUSupport b/Eigen/SuperLUSupport index 79e2222f40d..27e14d29eb1 100644 --- a/Eigen/SuperLUSupport +++ b/Eigen/SuperLUSupport @@ -16,6 +16,7 @@ #define EIGEN_EMPTY_WAS_ALREADY_DEFINED #endif +// Required by SuperLU headers, which expect int_t to be defined as a global typedef. typedef int int_t; #include #include diff --git a/Eigen/ThreadPool b/Eigen/ThreadPool new file mode 100644 index 00000000000..ac08bef0388 --- /dev/null +++ b/Eigen/ThreadPool @@ -0,0 +1,80 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_THREADPOOL_MODULE_H +#define EIGEN_THREADPOOL_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup ThreadPool_Module ThreadPool Module + * + * This module provides 2 threadpool implementations + * - a simple reference implementation + * - a faster non blocking implementation + * + * \code + * #include + * \endcode + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// There are non-parenthesized calls to "max" in the header, +// which trigger a check in test/main.h causing compilation to fail. +// We work around the check here by removing the check for max in +// the case where we have to emulate thread_local. +#ifdef max +#undef max +#endif +#include + +#include "src/Core/util/Meta.h" +#include "src/Core/util/MaxSizeVector.h" + +#ifndef EIGEN_MUTEX +#define EIGEN_MUTEX std::mutex +#endif +#ifndef EIGEN_MUTEX_LOCK +#define EIGEN_MUTEX_LOCK std::unique_lock +#endif +#ifndef EIGEN_CONDVAR +#define EIGEN_CONDVAR std::condition_variable +#endif + +// IWYU pragma: begin_exports +#include "src/ThreadPool/ThreadLocal.h" +#include "src/ThreadPool/ThreadYield.h" +#include "src/ThreadPool/ThreadCancel.h" +#include "src/ThreadPool/EventCount.h" +#include "src/ThreadPool/RunQueue.h" +#include "src/ThreadPool/ThreadPoolInterface.h" +#include "src/ThreadPool/ThreadEnvironment.h" +#include "src/ThreadPool/Barrier.h" +#include "src/ThreadPool/NonBlockingThreadPool.h" +#include "src/ThreadPool/CoreThreadPoolDevice.h" +#include "src/ThreadPool/ForkJoin.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_THREADPOOL_MODULE_H diff --git a/Eigen/UmfPackSupport b/Eigen/UmfPackSupport index 126344cba3f..28e386fad85 100644 --- a/Eigen/UmfPackSupport +++ b/Eigen/UmfPackSupport @@ -35,7 +35,7 @@ extern "C" { // IWYU pragma: begin_exports #include "src/UmfPackSupport/UmfPackSupport.h" -// IWYU pragma: endexports +// IWYU pragma: end_exports #include "src/Core/util/ReenableStupidWarnings.h" diff --git a/Eigen/Version b/Eigen/Version new file mode 100644 index 00000000000..c1083631856 --- /dev/null +++ b/Eigen/Version @@ -0,0 +1,21 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_VERSION_H +#define EIGEN_VERSION_H + +// The "WORLD" version will forever remain "3" for the "Eigen3" library. +#define EIGEN_WORLD_VERSION 3 +// As of Eigen3 5.0.0, we have moved to Semantic Versioning (semver.org). +#define EIGEN_MAJOR_VERSION 5 +#define EIGEN_MINOR_VERSION 0 +#define EIGEN_PATCH_VERSION 1 +#define EIGEN_PRERELEASE_VERSION "dev" +#define EIGEN_BUILD_VERSION "master" +#define EIGEN_VERSION_STRING "5.0.1-dev+master" + +#endif // EIGEN_VERSION_H diff --git a/Eigen/src/AccelerateSupport/AccelerateSupport.h b/Eigen/src/AccelerateSupport/AccelerateSupport.h index 13a26dfbb18..c944aeabd03 100644 --- a/Eigen/src/AccelerateSupport/AccelerateSupport.h +++ b/Eigen/src/AccelerateSupport/AccelerateSupport.h @@ -110,7 +110,7 @@ using AccelerateCholeskyAtA = AccelerateImpl struct AccelFactorizationDeleter { - void operator()(T* sym) { + void operator()(T* sym) const { if (sym) { SparseCleanup(*sym); delete sym; diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h index b1d801d34df..63aa5bd756c 100644 --- a/Eigen/src/Cholesky/LDLT.h +++ b/Eigen/src/Cholesky/LDLT.h @@ -84,7 +84,13 @@ class LDLT : public SolverBase > { * The default constructor is useful in cases in which the user intends to * perform decompositions via LDLT::compute(const MatrixType&). */ - LDLT() : m_matrix(), m_transpositions(), m_sign(internal::ZeroSign), m_isInitialized(false) {} + LDLT() + : m_matrix(), + m_l1_norm(0), + m_transpositions(), + m_sign(internal::ZeroSign), + m_isInitialized(false), + m_info(InvalidInput) {} /** \brief Default Constructor with memory preallocation * @@ -94,10 +100,12 @@ class LDLT : public SolverBase > { */ explicit LDLT(Index size) : m_matrix(size, size), + m_l1_norm(0), m_transpositions(size), m_temporary(size), m_sign(internal::ZeroSign), - m_isInitialized(false) {} + m_isInitialized(false), + m_info(InvalidInput) {} /** \brief Constructor with decomposition * @@ -108,10 +116,12 @@ class LDLT : public SolverBase > { template explicit LDLT(const EigenBase& matrix) : m_matrix(matrix.rows(), matrix.cols()), + m_l1_norm(0), m_transpositions(matrix.rows()), m_temporary(matrix.rows()), m_sign(internal::ZeroSign), - m_isInitialized(false) { + m_isInitialized(false), + m_info(InvalidInput) { compute(matrix.derived()); } @@ -125,10 +135,12 @@ class LDLT : public SolverBase > { template explicit LDLT(EigenBase& matrix) : m_matrix(matrix.derived()), + m_l1_norm(0), m_transpositions(matrix.rows()), m_temporary(matrix.rows()), m_sign(internal::ZeroSign), - m_isInitialized(false) { + m_isInitialized(false), + m_info(InvalidInput) { compute(matrix.derived()); } @@ -191,7 +203,7 @@ class LDLT : public SolverBase > { * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt() */ template - inline const Solve solve(const MatrixBase& b) const; + inline Solve solve(const MatrixBase& b) const; #endif template @@ -213,7 +225,7 @@ class LDLT : public SolverBase > { /** \returns the internal LDLT decomposition matrix * - * TODO: document the storage layout + * TODO: document the storage layout. */ inline const MatrixType& matrixLDLT() const { eigen_assert(m_isInitialized && "LDLT is not initialized."); @@ -477,19 +489,8 @@ LDLT& LDLT::compute(const EigenBase() + m_matrix.row(col).head(col).template lpNorm<1>(); - else - abs_col_sum = - m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>(); - if (abs_col_sum > m_l1_norm) m_l1_norm = abs_col_sum; - } + // Compute matrix L1 norm = max abs column sum over the implicit self-adjoint matrix. + m_l1_norm = m_matrix.template selfadjointView().l1Norm(); m_transpositions.resize(size); m_isInitialized = false; @@ -630,8 +631,8 @@ MatrixType LDLT::reconstructedMatrix() const { * \sa MatrixBase::ldlt() */ template -inline const LDLT::PlainObject, UpLo> -SelfAdjointView::ldlt() const { +inline LDLT::PlainObject, UpLo> SelfAdjointView::ldlt() + const { return LDLT(m_matrix); } @@ -640,7 +641,7 @@ SelfAdjointView::ldlt() const { * \sa SelfAdjointView::ldlt() */ template -inline const LDLT::PlainObject> MatrixBase::ldlt() const { +inline LDLT::PlainObject> MatrixBase::ldlt() const { return LDLT(derived()); } diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h index 7fa4fa2a0f6..9bffeae5b5c 100644 --- a/Eigen/src/Cholesky/LLT.h +++ b/Eigen/src/Cholesky/LLT.h @@ -86,7 +86,7 @@ class LLT : public SolverBase > { * The default constructor is useful in cases in which the user intends to * perform decompositions via LLT::compute(const MatrixType&). */ - LLT() : m_matrix(), m_isInitialized(false) {} + LLT() : m_matrix(), m_l1_norm(0), m_isInitialized(false), m_info(InvalidInput) {} /** \brief Default Constructor with memory preallocation * @@ -94,10 +94,11 @@ class LLT : public SolverBase > { * according to the specified problem \a size. * \sa LLT() */ - explicit LLT(Index size) : m_matrix(size, size), m_isInitialized(false) {} + explicit LLT(Index size) : m_matrix(size, size), m_l1_norm(0), m_isInitialized(false), m_info(InvalidInput) {} template - explicit LLT(const EigenBase& matrix) : m_matrix(matrix.rows(), matrix.cols()), m_isInitialized(false) { + explicit LLT(const EigenBase& matrix) + : m_matrix(matrix.rows(), matrix.cols()), m_l1_norm(0), m_isInitialized(false), m_info(InvalidInput) { compute(matrix.derived()); } @@ -109,7 +110,8 @@ class LLT : public SolverBase > { * \sa LLT(const EigenBase&) */ template - explicit LLT(EigenBase& matrix) : m_matrix(matrix.derived()), m_isInitialized(false) { + explicit LLT(EigenBase& matrix) + : m_matrix(matrix.derived()), m_l1_norm(0), m_isInitialized(false), m_info(InvalidInput) { compute(matrix.derived()); } @@ -137,7 +139,7 @@ class LLT : public SolverBase > { * \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt() */ template - inline const Solve solve(const MatrixBase& b) const; + inline Solve solve(const MatrixBase& b) const; #endif template @@ -402,19 +404,8 @@ LLT& LLT::compute(const EigenBase() + m_matrix.row(col).head(col).template lpNorm<1>(); - else - abs_col_sum = - m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>(); - if (abs_col_sum > m_l1_norm) m_l1_norm = abs_col_sum; - } + // Compute matrix L1 norm = max abs column sum over the implicit self-adjoint matrix. + m_l1_norm = m_matrix.template selfadjointView().l1Norm(); m_isInitialized = true; bool ok = Traits::inplace_decomposition(m_matrix); @@ -495,7 +486,7 @@ MatrixType LLT::reconstructedMatrix() const { * \sa SelfAdjointView::llt() */ template -inline const LLT::PlainObject> MatrixBase::llt() const { +inline LLT::PlainObject> MatrixBase::llt() const { return LLT(derived()); } @@ -504,7 +495,7 @@ inline const LLT::PlainObject> MatrixBase: * \sa SelfAdjointView::llt() */ template -inline const LLT::PlainObject, UpLo> SelfAdjointView::llt() +inline LLT::PlainObject, UpLo> SelfAdjointView::llt() const { return LLT(m_matrix); } diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h index 7e3c881aec9..dc3d6a3471d 100644 --- a/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/Eigen/src/CholmodSupport/CholmodSupport.h @@ -360,7 +360,7 @@ class CholmodBase : public SparseSolverBase { this->m_info = NumericalIssue; return; } - // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) + // TODO: optimize this copy by swapping when possible (be careful with alignment, etc.) // NOTE Actually, the copy can be avoided by calling cholmod_solve2 instead of cholmod_solve dest = Matrix::Map(reinterpret_cast(x_cd->x), b.rows(), b.cols()); @@ -386,7 +386,7 @@ class CholmodBase : public SparseSolverBase { this->m_info = NumericalIssue; return; } - // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) + // TODO: optimize this copy by swapping when possible (be careful with alignment, etc.) // NOTE cholmod_spsolve in fact just calls the dense solver for blocks of 4 columns at a time (similar to Eigen's // sparse solver) dest.derived() = viewAsEigen(*x_cs); diff --git a/Eigen/src/Core/ArithmeticSequence.h b/Eigen/src/Core/ArithmeticSequence.h index ae6373dda2d..65e7961d66f 100644 --- a/Eigen/src/Core/ArithmeticSequence.h +++ b/Eigen/src/Core/ArithmeticSequence.h @@ -178,11 +178,10 @@ auto seq(FirstType f, LastType l, IncrType incr) namespace placeholders { -/** \cpp11 - * \returns a symbolic ArithmeticSequence representing the last \a size elements with increment \a incr. +/** \returns a symbolic ArithmeticSequence representing the last \a size elements with increment \a incr. * * It is a shortcut for: \code seqN(last-(size-fix<1>)*incr, size, incr) \endcode - * + * \anchor Eigen_placeholders_lastN * \sa lastN(SizeType), seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType) */ template auto lastN(SizeType size, IncrType incr) @@ -190,8 +189,7 @@ auto lastN(SizeType size, IncrType incr) return seqN(Eigen::placeholders::last - (size - fix<1>()) * incr, size, incr); } -/** \cpp11 - * \returns a symbolic ArithmeticSequence representing the last \a size elements with a unit increment. +/** \returns a symbolic ArithmeticSequence representing the last \a size elements with a unit increment. * * It is a shortcut for: \code seq(last+fix<1>-size, last) \endcode * @@ -220,7 +218,7 @@ auto lastN(SizeType size) -> decltype(seqN(Eigen::placeholders::last + fix<1>() using Eigen::seqN; using Eigen::placeholders::all; using Eigen::placeholders::last; - using Eigen::placeholders::lastN; // c++11 only + using Eigen::placeholders::lastN; using Eigen::placeholders::lastp1; \endcode */ diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h index 57f3186b09b..28ff760e796 100644 --- a/Eigen/src/Core/Array.h +++ b/Eigen/src/Core/Array.h @@ -123,12 +123,12 @@ class Array : public PlainObjectBase::value) { Base::operator=(std::move(other)); return *this; @@ -141,7 +141,7 @@ class Array : public PlainObjectBase>& list) - : Base(list) {} + EIGEN_DEVICE_FUNC constexpr Array(const std::initializer_list>& list) : Base(list) {} #ifndef EIGEN_PARSED_BY_DOXYGEN template @@ -239,7 +236,7 @@ class Array : public PlainObjectBase&) */ template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array( + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Array( const EigenBase& other, std::enable_if_t::value, PrivateType> = PrivateType()) @@ -282,7 +279,7 @@ class Array : public PlainObjectBase` where `Rows` and `Cols` can be \c 2,\c 3,\c 4, or \c X for fixed or dynamic size. @@ -324,21 +321,17 @@ EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex, cd) #define EIGEN_MAKE_ARRAY_TYPEDEFS(Size, SizeSuffix) \ /** \ingroup arraytypedefs */ \ - /** \brief \cpp11 */ \ template \ using Array##SizeSuffix##SizeSuffix = Array; \ /** \ingroup arraytypedefs */ \ - /** \brief \cpp11 */ \ template \ using Array##SizeSuffix = Array; #define EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Size) \ /** \ingroup arraytypedefs */ \ - /** \brief \cpp11 */ \ template \ using Array##Size##X = Array; \ /** \ingroup arraytypedefs */ \ - /** \brief \cpp11 */ \ template \ using Array##X##Size = Array; diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h index 8465f54feda..dacc2393334 100644 --- a/Eigen/src/Core/ArrayBase.h +++ b/Eigen/src/Core/ArrayBase.h @@ -168,19 +168,16 @@ class ArrayBase : public DenseBase { } public: - EIGEN_DEVICE_FUNC ArrayBase& array() { return *this; } - EIGEN_DEVICE_FUNC const ArrayBase& array() const { return *this; } + EIGEN_DEVICE_FUNC constexpr ArrayBase& array() { return *this; } + EIGEN_DEVICE_FUNC constexpr const ArrayBase& array() const { return *this; } /** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array * \sa MatrixBase::array() */ - EIGEN_DEVICE_FUNC MatrixWrapper matrix() { return MatrixWrapper(derived()); } - EIGEN_DEVICE_FUNC const MatrixWrapper matrix() const { + EIGEN_DEVICE_FUNC constexpr MatrixWrapper matrix() { return MatrixWrapper(derived()); } + EIGEN_DEVICE_FUNC constexpr const MatrixWrapper matrix() const { return MatrixWrapper(derived()); } - // template - // inline void evalTo(Dest& dst) const { dst = matrix(); } - protected: EIGEN_DEFAULT_COPY_CONSTRUCTOR(ArrayBase) EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(ArrayBase) diff --git a/Eigen/src/Core/ArrayWrapper.h b/Eigen/src/Core/ArrayWrapper.h index c9a194e991f..fb05ab55d88 100644 --- a/Eigen/src/Core/ArrayWrapper.h +++ b/Eigen/src/Core/ArrayWrapper.h @@ -21,7 +21,7 @@ namespace Eigen { * \brief Expression of a mathematical vector or matrix as an array object * * This class is the return type of MatrixBase::array(), and most of the time - * this is the only way it is use. + * this is the only way it is used. * * \sa MatrixBase::array(), class MatrixWrapper */ @@ -54,7 +54,8 @@ class ArrayWrapper : public ArrayBase > { using Base::coeffRef; - EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC constexpr explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) + : m_expression(matrix) {} EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_expression.rows(); } EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_expression.cols(); } @@ -75,7 +76,7 @@ class ArrayWrapper : public ArrayBase > { dst = m_expression; } - EIGEN_DEVICE_FUNC const internal::remove_all_t& nestedExpression() const { + EIGEN_DEVICE_FUNC constexpr const internal::remove_all_t& nestedExpression() const { return m_expression; } @@ -96,7 +97,7 @@ class ArrayWrapper : public ArrayBase > { * \brief Expression of an array as a mathematical vector or matrix * * This class is the return type of ArrayBase::matrix(), and most of the time - * this is the only way it is use. + * this is the only way it is used. * * \sa MatrixBase::matrix(), class ArrayWrapper */ @@ -129,7 +130,7 @@ class MatrixWrapper : public MatrixBase > { using Base::coeffRef; - EIGEN_DEVICE_FUNC explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC constexpr explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {} EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_expression.rows(); } EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_expression.cols(); } @@ -145,7 +146,7 @@ class MatrixWrapper : public MatrixBase > { EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { return m_expression.coeffRef(index); } - EIGEN_DEVICE_FUNC const internal::remove_all_t& nestedExpression() const { + EIGEN_DEVICE_FUNC constexpr const internal::remove_all_t& nestedExpression() const { return m_expression; } diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 4b30f7bb626..3d30d868c23 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -19,7 +19,8 @@ namespace Eigen { template template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::lazyAssign(const DenseBase& other) { +EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& DenseBase::lazyAssign( + const DenseBase& other) { enum { SameType = internal::is_same::value }; EIGEN_STATIC_ASSERT_LVALUE(Derived) @@ -36,40 +37,43 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::lazyAssign(co template template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) { +EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& DenseBase::operator=( + const DenseBase& other) { internal::call_assignment(derived(), other.derived()); return derived(); } template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) { +EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) { internal::call_assignment(derived(), other.derived()); return derived(); } template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const MatrixBase& other) { +EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const MatrixBase& other) { internal::call_assignment(derived(), other.derived()); return derived(); } template template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const DenseBase& other) { +EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& MatrixBase::operator=( + const DenseBase& other) { internal::call_assignment(derived(), other.derived()); return derived(); } template template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const EigenBase& other) { +EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& MatrixBase::operator=( + const EigenBase& other) { internal::call_assignment(derived(), other.derived()); return derived(); } template template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=( +EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& MatrixBase::operator=( const ReturnByValue& other) { other.derived().evalTo(derived()); return derived(); diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 36f0a9d74de..3c30d2227a2 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -63,7 +63,7 @@ struct copy_using_evaluator_traits { static constexpr int RestrictedLinearSize = min_size_prefer_fixed(MaxSizeAtCompileTime, MaxPacketSize); static constexpr int OuterStride = outer_stride_at_compile_time::ret; - // TODO distinguish between linear traversal and inner-traversals + // TODO: distinguish between linear traversal and inner-traversal packet types. using LinearPacketType = typename find_best_packet::type; using InnerPacketType = typename find_best_packet::type; @@ -83,20 +83,36 @@ struct copy_using_evaluator_traits { (OuterStride != Dynamic) && (OuterStride % InnerPacketSize == 0) && (EIGEN_UNALIGNED_VECTORIZE || JointAlignment >= InnerRequiredAlignment); static constexpr bool MayLinearize = StorageOrdersAgree && (DstFlags & SrcFlags & LinearAccessBit); + static constexpr int CoeffReadCost = int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost); + static constexpr bool SmallAssignmentScalarPathIsCheap = + (SizeAtCompileTime != Dynamic) && (SizeAtCompileTime * CoeffReadCost <= EIGEN_UNROLLING_LIMIT); + /* Packet traversal has enough setup/tail overhead that it is not worth it + for very small fixed-size assignments when the scalar path can be fully + unrolled. More expensive RHS expressions can still amortize packet setup. */ + static constexpr int SmallAssignmentPacketThreshold = 3; + static constexpr int LinearPacketThreshold = SmallAssignmentScalarPathIsCheap ? SmallAssignmentPacketThreshold : 1; + static constexpr int LinearSizeThreshold = LinearPacketThreshold * LinearPacketSize; static constexpr bool MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess && (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment) || MaxSizeAtCompileTime == Dynamic) && - (MaxSizeAtCompileTime == Dynamic || MaxSizeAtCompileTime >= LinearPacketSize); - /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, - so it's only good for large enough sizes. */ - static constexpr int InnerSizeThreshold = (EIGEN_UNALIGNED_VECTORIZE ? 1 : 3) * InnerPacketSize; + (MaxSizeAtCompileTime == Dynamic || MaxSizeAtCompileTime >= LinearSizeThreshold); + /* Slice vectorization can be slow, so use MaxInnerSize rather than InnerSize: + a dynamic block in a fixed-size matrix can still have large slices. With + EIGEN_UNALIGNED_VECTORIZE and unrolling, one packet is still worthwhile for + non-vector slices. Cheap fixed-size vector blocks can otherwise fall back to + slice vectorization after the linear path is rejected, so use the same + conservative cutoff there. */ + static constexpr bool UseConservativeVectorInnerThreshold = IsVectorAtCompileTime && SmallAssignmentScalarPathIsCheap; + static constexpr int VectorInnerPacketThreshold = + (UseConservativeVectorInnerThreshold || !EIGEN_UNALIGNED_VECTORIZE) ? SmallAssignmentPacketThreshold : 1; + static constexpr int VectorInnerSizeThreshold = VectorInnerPacketThreshold * InnerPacketSize; + static constexpr int NonVectorInnerSizeThreshold = + (EIGEN_UNALIGNED_VECTORIZE ? 1 : SmallAssignmentPacketThreshold) * InnerPacketSize; + static constexpr int InnerSizeThreshold = + IsVectorAtCompileTime ? VectorInnerSizeThreshold : NonVectorInnerSizeThreshold; static constexpr bool MaySliceVectorize = MightVectorize && DstHasDirectAccess && (MaxInnerSizeAtCompileTime == Dynamic || MaxInnerSizeAtCompileTime >= InnerSizeThreshold); - /* slice vectorization can be slow, so we only want it if the slices are big, which is - indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block - in a fixed-size matrix - However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */ public: static constexpr int Traversal = SizeAtCompileTime == 0 ? AllAtOnceTraversal @@ -115,7 +131,6 @@ struct copy_using_evaluator_traits { private: static constexpr int ActualPacketSize = Vectorized ? unpacket_traits::size : 1; static constexpr int UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize; - static constexpr int CoeffReadCost = int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost); static constexpr bool MayUnrollCompletely = (SizeAtCompileTime != Dynamic) && (SizeAtCompileTime * CoeffReadCost <= UnrollingLimit); static constexpr bool MayUnrollInner = @@ -474,8 +489,8 @@ struct dense_assignment_loop_impl::alignment; - static constexpr bool Alignable = - (DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0); + static constexpr bool Alignable = (DstAlignment >= RequestedAlignment) || + (static_cast(RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0); static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment; static constexpr bool DstIsAligned = DstAlignment >= Alignment; static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment; @@ -587,8 +602,8 @@ struct dense_assignment_loop_impl static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment; static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar)); static constexpr int RequestedAlignment = unpacket_traits::alignment; - static constexpr bool Alignable = - (DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0); + static constexpr bool Alignable = (DstAlignment >= RequestedAlignment) || + (static_cast(RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0); static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment; static constexpr bool DstIsAligned = DstAlignment >= Alignment; static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment; @@ -654,15 +669,15 @@ struct dense_assignment_loop_impl class generic_dense_assignment_kernel { protected: - typedef typename DstEvaluatorTypeT::XprType DstXprType; - typedef typename SrcEvaluatorTypeT::XprType SrcXprType; + using DstXprType = typename DstEvaluatorTypeT::XprType; + using SrcXprType = typename SrcEvaluatorTypeT::XprType; public: - typedef DstEvaluatorTypeT DstEvaluatorType; - typedef SrcEvaluatorTypeT SrcEvaluatorType; - typedef typename DstEvaluatorType::Scalar Scalar; - typedef copy_using_evaluator_traits AssignmentTraits; - typedef typename AssignmentTraits::PacketType PacketType; + using DstEvaluatorType = DstEvaluatorTypeT; + using SrcEvaluatorType = SrcEvaluatorTypeT; + using Scalar = typename DstEvaluatorType::Scalar; + using AssignmentTraits = copy_using_evaluator_traits; + using PacketType = typename AssignmentTraits::PacketType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr generic_dense_assignment_kernel(DstEvaluatorType& dst, const SrcEvaluatorType& src, @@ -681,8 +696,8 @@ class generic_dense_assignment_kernel { EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_dstExpr.cols(); } EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return m_dstExpr.outerStride(); } - EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() noexcept { return m_dst; } - EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const noexcept { return m_src; } + EIGEN_DEVICE_FUNC constexpr DstEvaluatorType& dstEvaluator() noexcept { return m_dst; } + EIGEN_DEVICE_FUNC constexpr const SrcEvaluatorType& srcEvaluator() const noexcept { return m_src; } /// Assign src(row,col) to dst(row,col) through the assignment functor. EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeff(Index row, Index col) { @@ -690,7 +705,7 @@ class generic_dense_assignment_kernel { } /// \sa assignCoeff(Index,Index) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeff(Index index) { m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); } @@ -741,7 +756,7 @@ class generic_dense_assignment_kernel { } EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index rowIndexByOuterInner(Index outer, Index inner) { - typedef typename DstEvaluatorType::ExpressionTraits Traits; + using Traits = typename DstEvaluatorType::ExpressionTraits; return int(Traits::RowsAtCompileTime) == 1 ? 0 : int(Traits::ColsAtCompileTime) == 1 ? inner : int(DstEvaluatorType::Flags) & RowMajorBit ? outer @@ -749,7 +764,7 @@ class generic_dense_assignment_kernel { } EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index colIndexByOuterInner(Index outer, Index inner) { - typedef typename DstEvaluatorType::ExpressionTraits Traits; + using Traits = typename DstEvaluatorType::ExpressionTraits; return int(Traits::ColsAtCompileTime) == 1 ? 0 : int(Traits::RowsAtCompileTime) == 1 ? inner : int(DstEvaluatorType::Flags) & RowMajorBit ? inner @@ -762,7 +777,7 @@ class generic_dense_assignment_kernel { DstEvaluatorType& m_dst; const SrcEvaluatorType& m_src; const Functor& m_functor; - // TODO find a way to avoid the needs of the original expression + // TODO: find a way to avoid the needs of the original expression DstXprType& m_dstExpr; }; @@ -774,13 +789,13 @@ template { protected: - typedef generic_dense_assignment_kernel Base; + using Base = generic_dense_assignment_kernel; public: - typedef typename Base::Scalar Scalar; - typedef typename Base::DstXprType DstXprType; - typedef copy_using_evaluator_traits AssignmentTraits; - typedef typename AssignmentTraits::PacketType PacketType; + using Scalar = typename Base::Scalar; + using DstXprType = typename Base::DstXprType; + using AssignmentTraits = copy_using_evaluator_traits; + using PacketType = typename AssignmentTraits::PacketType; EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT& dst, const SrcEvaluatorTypeT& src, const Functor& func, DstXprType& dstExpr) @@ -804,15 +819,27 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprTyp const internal::assign_op& /*func*/) { Index dstRows = src.rows(); Index dstCols = src.cols(); - if (((dst.rows() != dstRows) || (dst.cols() != dstCols))) dst.resize(dstRows, dstCols); - eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols); + if (((dst.rows() != dstRows) || (dst.cols() != dstCols))) { +#ifdef EIGEN_NO_AUTOMATIC_RESIZING + eigen_assert( + (dst.size() == 0 || (DstXprType::IsVectorAtCompileTime ? (dst.size() == src.size()) + : (dst.rows() == dstRows && dst.cols() == dstCols))) && + "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); + if (dst.size() == 0) { + dst.resize(dstRows, dstCols); + } +#else + dst.resize(dstRows, dstCols); + eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols); +#endif + } } template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, const Functor& func) { - typedef evaluator DstEvaluatorType; - typedef evaluator SrcEvaluatorType; + using DstEvaluatorType = evaluator; + using SrcEvaluatorType = evaluator; SrcEvaluatorType srcEvaluator(src); @@ -822,14 +849,14 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_dense_assignment_loop( DstEvaluatorType dstEvaluator(dst); - typedef generic_dense_assignment_kernel Kernel; + using Kernel = generic_dense_assignment_kernel; Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); dense_assignment_loop::run(kernel); } template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) { +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) { call_dense_assignment_loop(dst, src, internal::assign_op()); } @@ -849,11 +876,11 @@ struct EigenBase2EigenBase {}; template struct AssignmentKind { - typedef EigenBase2EigenBase Kind; + using Kind = EigenBase2EigenBase; }; template <> struct AssignmentKind { - typedef Dense2Dense Kind; + using Kind = Dense2Dense; }; // This is the main assignment class @@ -908,11 +935,11 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Ds int(Dst::SizeAtCompileTime) != 1 }; - typedef std::conditional_t, Dst> ActualDstTypeCleaned; - typedef std::conditional_t, Dst&> ActualDstType; + using ActualDstTypeCleaned = std::conditional_t, Dst>; + using ActualDstType = std::conditional_t, Dst&>; ActualDstType actualDst(dst); - // TODO check whether this is the right place to perform these checks: + // TODO: check whether this is the right place to perform these checks: EIGEN_STATIC_ASSERT_LVALUE(Dst) EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned, Src) EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename ActualDstTypeCleaned::Scalar, typename Src::Scalar); @@ -923,9 +950,9 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Ds template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src, const Func& func) { - typedef evaluator DstEvaluatorType; - typedef evaluator SrcEvaluatorType; - typedef restricted_packet_dense_assignment_kernel Kernel; + using DstEvaluatorType = evaluator; + using SrcEvaluatorType = evaluator; + using Kernel = restricted_packet_dense_assignment_kernel; EIGEN_STATIC_ASSERT_LVALUE(Dst) EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename Dst::Scalar, typename Src::Scalar); @@ -947,7 +974,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Ds template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func) { - // TODO check whether this is the right place to perform these checks: + // TODO: check whether this is the right place to perform these checks: EIGEN_STATIC_ASSERT_LVALUE(Dst) EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst, Src) EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename Dst::Scalar, typename Src::Scalar); @@ -1007,7 +1034,7 @@ struct Assignment diff --git a/Eigen/src/Core/Assign_AOCL.h b/Eigen/src/Core/Assign_AOCL.h new file mode 100644 index 00000000000..da3ef7cea3a --- /dev/null +++ b/Eigen/src/Core/Assign_AOCL.h @@ -0,0 +1,301 @@ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + * + * Assign_AOCL.h - AOCL Vectorized Math Dispatch Layer for Eigen + * + * Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. + * + * Description: + * ------------ + * This file implements a high-performance dispatch layer that automatically + * routes Eigen's element-wise mathematical operations to AMD Optimizing CPU + * Libraries (AOCL) Vector Math Library (VML) functions when beneficial for + * performance. + * + * The dispatch system uses C++ template specialization to intercept Eigen's + * assignment operations and redirect them to AOCL's VRDA functions, which + * provide optimized implementations for AMD Zen architectures. + * + * Key Features: + * ------------- + * 1. Automatic Dispatch: Seamlessly routes supported operations to AOCL without + * requiring code changes in user applications + * + * 2. Performance Optimization: Uses AOCL VRDA functions optimized for Zen + * family processors with automatic SIMD instruction selection (AVX2, AVX-512) + * + * 3. Threshold-Based Activation: Only activates for vectors larger than + * EIGEN_AOCL_VML_THRESHOLD (default: 128 elements) to avoid overhead on + * small vectors + * + * 4. Precision-Specific Handling: + * - Double precision: AOCL VRDA vectorized functions + * - Single precision: Scalar fallback (preserves correctness) + * + * 5. Memory Layout Compatibility: Ensures direct memory access and compatible + * storage orders between source and destination for optimal performance + * + * Supported Operations: + * --------------------- + * UNARY OPERATIONS (vector → vector): + * - Transcendental: exp(), sin(), cos(), sqrt(), log(), log10(), log2() + * + * BINARY OPERATIONS (vector op vector → vector): + * - Arithmetic: +, *, pow() + * + * Template Specialization Mechanism: + * ----------------------------------- + * The system works by specializing Eigen's Assignment template for: + * 1. CwiseUnaryOp with scalar_*_op functors (unary operations) + * 2. CwiseBinaryOp with scalar_*_op functors (binary operations) + * 3. Dense2Dense assignment context with AOCL-compatible traits + * + * Dispatch conditions (all must be true): + * - Source and destination have DirectAccessBit (contiguous memory) + * - Compatible storage orders (both row-major or both column-major) + * - Vector size ≥ EIGEN_AOCL_VML_THRESHOLD or Dynamic size + * - Supported data type (currently double precision for VRDA) + * + * Integration Example: + * -------------------- + * // Standard Eigen code - no changes required + * VectorXd x = VectorXd::Random(10000); + * VectorXd y = VectorXd::Random(10000); + * VectorXd result; + * + * // These operations are automatically dispatched to AOCL: + * result = x.array().exp(); // → amd_vrda_exp() + * result = x.array().sin(); // → amd_vrda_sin() + * result = x.array() + y.array(); // → amd_vrda_add() + * result = x.array().pow(y.array()); // → amd_vrda_pow() + * + * Configuration: + * -------------- + * Required preprocessor definitions: + * - EIGEN_USE_AOCL_ALL or EIGEN_USE_AOCL_MT: Enable AOCL integration + * - EIGEN_USE_AOCL_VML: Enable Vector Math Library dispatch + * + * Compilation Requirements: + * ------------------------- + * Include paths: + * - AOCL headers: -I${AOCL_ROOT}/include + * - Eigen headers: -I/path/to/eigen + * + * Link libraries: + * - AOCL MathLib: -lamdlibm + * - Standard math: -lm + * + * Compiler flags: + * - Optimization: -O3 (required for inlining) + * - Architecture: -march=znver5 or -march=native + * - Vectorization: -mfma -mavx512f (if supported) + * + * Platform Support: + * ------------------ + * - Primary: Linux x86_64 with AMD Zen family processors + * - Compilers: GCC 8+, Clang 10+, AOCC (recommended) + * - AOCL Version: 4.0+ (with VRDA support) + * + * Error Handling: + * --------------- + * - Graceful fallback to scalar operations for unsupported configurations + * - Compile-time detection of AOCL availability + * - Runtime size and alignment validation with eigen_assert() + * + * Developer: + * ---------- + * Name: Sharad Saurabh Bhaskar + * Email: shbhaska@amd.com + * Organization: Advanced Micro Devices, Inc. + */ + + +#ifndef EIGEN_ASSIGN_AOCL_H +#define EIGEN_ASSIGN_AOCL_H + +namespace Eigen { +namespace internal { + +// Traits for unary operations. +template class aocl_assign_traits { +private: + enum { + DstHasDirectAccess = !!(Dst::Flags & DirectAccessBit), + SrcHasDirectAccess = !!(Src::Flags & DirectAccessBit), + StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), + InnerSize = Dst::IsVectorAtCompileTime ? int(Dst::SizeAtCompileTime) + : (Dst::Flags & RowMajorBit) ? int(Dst::ColsAtCompileTime) + : int(Dst::RowsAtCompileTime), + LargeEnough = + (InnerSize == Dynamic) || (InnerSize >= EIGEN_AOCL_VML_THRESHOLD) + }; + +public: + enum { + EnableAoclVML = DstHasDirectAccess && SrcHasDirectAccess && + StorageOrdersAgree && LargeEnough, + Traversal = LinearTraversal + }; +}; + +// Traits for binary operations (e.g., add, pow). +template +class aocl_assign_binary_traits { +private: + enum { + DstHasDirectAccess = !!(Dst::Flags & DirectAccessBit), + LhsHasDirectAccess = !!(Lhs::Flags & DirectAccessBit), + RhsHasDirectAccess = !!(Rhs::Flags & DirectAccessBit), + StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Lhs::IsRowMajor)) && + (int(Dst::IsRowMajor) == int(Rhs::IsRowMajor)), + InnerSize = Dst::IsVectorAtCompileTime ? int(Dst::SizeAtCompileTime) + : (Dst::Flags & RowMajorBit) ? int(Dst::ColsAtCompileTime) + : int(Dst::RowsAtCompileTime), + LargeEnough = + (InnerSize == Dynamic) || (InnerSize >= EIGEN_AOCL_VML_THRESHOLD) + }; + +public: + enum { + EnableAoclVML = DstHasDirectAccess && LhsHasDirectAccess && + RhsHasDirectAccess && StorageOrdersAgree && LargeEnough + }; +}; + +// Unary operation dispatch for float (scalar fallback). +#define EIGEN_AOCL_VML_UNARY_CALL_FLOAT(EIGENOP) \ + template \ + struct Assignment< \ + DstXprType, CwiseUnaryOp, SrcXprNested>, \ + assign_op, Dense2Dense, \ + std::enable_if_t< \ + aocl_assign_traits::EnableAoclVML>> { \ + typedef CwiseUnaryOp, SrcXprNested> \ + SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, \ + const assign_op &) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + Eigen::Index n = dst.size(); \ + if (n <= 0) \ + return; \ + const float *input = \ + reinterpret_cast(src.nestedExpression().data()); \ + float *output = reinterpret_cast(dst.data()); \ + for (Eigen::Index i = 0; i < n; ++i) { \ + output[i] = std::EIGENOP(input[i]); \ + } \ + } \ + }; + +// Unary operation dispatch for double (AOCL vectorized). +#define EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(EIGENOP, AOCLOP) \ + template \ + struct Assignment< \ + DstXprType, CwiseUnaryOp, SrcXprNested>, \ + assign_op, Dense2Dense, \ + std::enable_if_t< \ + aocl_assign_traits::EnableAoclVML>> { \ + typedef CwiseUnaryOp, SrcXprNested> \ + SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, \ + const assign_op &) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + Eigen::Index n = dst.size(); \ + eigen_assert(n <= INT_MAX && "AOCL does not support arrays larger than INT_MAX"); \ + if (n <= 0) \ + return; \ + const double *input = \ + reinterpret_cast(src.nestedExpression().data()); \ + double *output = reinterpret_cast(dst.data()); \ + int aocl_n = internal::convert_index(n); \ + AOCLOP(aocl_n, const_cast(input), output); \ + } \ + }; + +// Instantiate unary calls for float (scalar). +// EIGEN_AOCL_VML_UNARY_CALL_FLOAT(exp) + +// Instantiate unary calls for double (AOCL vectorized). +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(exp2, amd_vrda_exp2) +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(exp, amd_vrda_exp) +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(sin, amd_vrda_sin) +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(cos, amd_vrda_cos) +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(sqrt, amd_vrda_sqrt) +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(cbrt, amd_vrda_cbrt) +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(abs, amd_vrda_fabs) +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(log, amd_vrda_log) +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(log10, amd_vrda_log10) +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(log2, amd_vrda_log2) + +// Binary operation dispatch for float (scalar fallback). +#define EIGEN_AOCL_VML_BINARY_CALL_FLOAT(EIGENOP, STDFUNC) \ + template \ + struct Assignment< \ + DstXprType, \ + CwiseBinaryOp, LhsXprNested, \ + RhsXprNested>, \ + assign_op, Dense2Dense, \ + std::enable_if_t::EnableAoclVML>> { \ + typedef CwiseBinaryOp, LhsXprNested, \ + RhsXprNested> \ + SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, \ + const assign_op &) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + Eigen::Index n = dst.size(); \ + if (n <= 0) \ + return; \ + const float *lhs = reinterpret_cast(src.lhs().data()); \ + const float *rhs = reinterpret_cast(src.rhs().data()); \ + float *output = reinterpret_cast(dst.data()); \ + for (Eigen::Index i = 0; i < n; ++i) { \ + output[i] = STDFUNC(lhs[i], rhs[i]); \ + } \ + } \ + }; + +// Binary operation dispatch for double (AOCL vectorized). +#define EIGEN_AOCL_VML_BINARY_CALL_DOUBLE(EIGENOP, AOCLOP) \ + template \ + struct Assignment< \ + DstXprType, \ + CwiseBinaryOp, LhsXprNested, \ + RhsXprNested>, \ + assign_op, Dense2Dense, \ + std::enable_if_t::EnableAoclVML>> { \ + typedef CwiseBinaryOp, LhsXprNested, \ + RhsXprNested> \ + SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, \ + const assign_op &) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + Eigen::Index n = dst.size(); \ + eigen_assert(n <= INT_MAX && "AOCL does not support arrays larger than INT_MAX"); \ + if (n <= 0) \ + return; \ + const double *lhs = reinterpret_cast(src.lhs().data()); \ + const double *rhs = reinterpret_cast(src.rhs().data()); \ + double *output = reinterpret_cast(dst.data()); \ + int aocl_n = internal::convert_index(n); \ + AOCLOP(aocl_n, const_cast(lhs), const_cast(rhs), output); \ + } \ + }; + +// Instantiate binary calls for float (scalar). +// EIGEN_AOCL_VML_BINARY_CALL_FLOAT(sum, std::plus) // Using +// scalar_sum_op for addition EIGEN_AOCL_VML_BINARY_CALL_FLOAT(pow, std::pow) + +// Instantiate binary calls for double (AOCL vectorized). +EIGEN_AOCL_VML_BINARY_CALL_DOUBLE(sum, amd_vrda_add) // Using scalar_sum_op for addition +EIGEN_AOCL_VML_BINARY_CALL_DOUBLE(pow, amd_vrda_pow) +EIGEN_AOCL_VML_BINARY_CALL_DOUBLE(max, amd_vrda_fmax) +EIGEN_AOCL_VML_BINARY_CALL_DOUBLE(min, amd_vrda_fmin) + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_ASSIGN_AOCL_H diff --git a/Eigen/src/Core/Assign_MKL.h b/Eigen/src/Core/Assign_MKL.h index ad112200e0f..7636445cb05 100644 --- a/Eigen/src/Core/Assign_MKL.h +++ b/Eigen/src/Core/Assign_MKL.h @@ -56,11 +56,11 @@ class vml_assign_traits { : int(Dst::MaxRowsAtCompileTime), MaxSizeAtCompileTime = Dst::SizeAtCompileTime, - MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && + MightEnableVml = bool(StorageOrdersAgree) && bool(DstHasDirectAccess) && bool(SrcHasDirectAccess) && Src::InnerStrideAtCompileTime == 1 && Dst::InnerStrideAtCompileTime == 1, - MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit), - VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize, - LargeEnough = VmlSize == Dynamic || VmlSize >= EIGEN_MKL_VML_THRESHOLD + MightLinearize = bool(MightEnableVml) && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit), + VmlSize = bool(MightLinearize) ? MaxSizeAtCompileTime : InnerMaxSize, + LargeEnough = (VmlSize == Dynamic) || VmlSize >= EIGEN_MKL_VML_THRESHOLD }; public: diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index 39abff71873..7dcf909305e 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -121,14 +121,14 @@ class Block /** Column or Row constructor */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block(XprType& xpr, Index i) : Impl(xpr, i) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Block(XprType& xpr, Index i) : Impl(xpr, i) { eigen_assert((i >= 0) && (((BlockRows == 1) && (BlockCols == XprType::ColsAtCompileTime) && i < xpr.rows()) || ((BlockRows == XprType::RowsAtCompileTime) && (BlockCols == 1) && i < xpr.cols()))); } /** Fixed-size constructor */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block(XprType& xpr, Index startRow, Index startCol) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Block(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) { EIGEN_STATIC_ASSERT(RowsAtCompileTime != Dynamic && ColsAtCompileTime != Dynamic, THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE) @@ -138,8 +138,8 @@ class Block /** Dynamic-size constructor */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block(XprType& xpr, Index startRow, Index startCol, Index blockRows, - Index blockCols) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Block(XprType& xpr, Index startRow, Index startCol, Index blockRows, + Index blockCols) : Impl(xpr, startRow, startCol, blockRows, blockCols) { eigen_assert((RowsAtCompileTime == Dynamic || RowsAtCompileTime == blockRows) && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == blockCols)); @@ -175,11 +175,11 @@ class BlockImpl public: typedef Impl Base; EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index i) : Impl(xpr, i) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index startRow, Index startCol) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index i) : Impl(xpr, i) {} + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index startRow, Index startCol) : Impl(xpr, startRow, startCol) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, - Index blockCols) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index startRow, Index startCol, + Index blockRows, Index blockCols) : Impl(xpr, startRow, startCol, blockRows, blockCols) {} }; @@ -196,11 +196,9 @@ class BlockImpl_dense : public internal::dense_xpr_base EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE XprType& nestedExpression() { return m_xpr; } /** \sa MapBase::innerStride() */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index innerStride() const noexcept { + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return internal::traits::HasSameStorageOrderAsXprType ? m_xpr.innerStride() : m_xpr.outerStride(); } /** \sa MapBase::outerStride() */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index outerStride() const noexcept { + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return internal::traits::HasSameStorageOrderAsXprType ? m_xpr.outerStride() : m_xpr.innerStride(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr StorageIndex startRow() const noexcept { return m_startRow.value(); } + EIGEN_DEVICE_FUNC constexpr StorageIndex startRow() const noexcept { return m_startRow.value(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr StorageIndex startCol() const noexcept { return m_startCol.value(); } + EIGEN_DEVICE_FUNC constexpr StorageIndex startCol() const noexcept { return m_startCol.value(); } #ifndef __SUNPRO_CC - // FIXME sunstudio is not friendly with the above friend... - // META-FIXME there is no 'friend' keyword around here. Is this obsolete? + // Historical workaround for SunStudio's handling of the access specifier here. protected: #endif diff --git a/Eigen/src/Core/CommaInitializer.h b/Eigen/src/Core/CommaInitializer.h index c4141179013..4541f47a2bb 100644 --- a/Eigen/src/Core/CommaInitializer.h +++ b/Eigen/src/Core/CommaInitializer.h @@ -31,7 +31,7 @@ template struct CommaInitializer { typedef typename XprType::Scalar Scalar; - EIGEN_DEVICE_FUNC inline CommaInitializer(XprType& xpr, const Scalar& s) + EIGEN_DEVICE_FUNC constexpr CommaInitializer(XprType& xpr, const Scalar& s) : m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1) { eigen_assert(m_xpr.rows() > 0 && m_xpr.cols() > 0 && "Cannot comma-initialize a 0x0 matrix (operator<<)"); m_xpr.coeffRef(0, 0) = s; @@ -48,7 +48,6 @@ struct CommaInitializer { /* Copy/Move constructor which transfers ownership. This is crucial in * absence of return value optimization to avoid assertions during destruction. */ - // FIXME in C++11 mode this could be replaced by a proper RValue constructor EIGEN_DEVICE_FUNC inline CommaInitializer(const CommaInitializer& o) : m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) { // Mark original object as finished. In absence of R-value references we need to const_cast: diff --git a/Eigen/src/Core/ConcatOp.h b/Eigen/src/Core/ConcatOp.h new file mode 100644 index 00000000000..c01f984cf0a --- /dev/null +++ b/Eigen/src/Core/ConcatOp.h @@ -0,0 +1,343 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2026 Pavel Guzenfeld +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CONCAT_OP_H +#define EIGEN_CONCAT_OP_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template +struct traits> : traits { + typedef typename LhsType::Scalar Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::XprKind XprKind; + typedef typename ref_selector::type LhsTypeNested; + typedef typename ref_selector::type RhsTypeNested; + typedef std::remove_reference_t LhsTypeNested_; + typedef std::remove_reference_t RhsTypeNested_; + enum { + // For vertical concat (stacking rows): rows add up, cols must match + // For horizontal concat (stacking cols): cols add up, rows must match + LhsRows = int(LhsType::RowsAtCompileTime), + RhsRows = int(RhsType::RowsAtCompileTime), + LhsCols = int(LhsType::ColsAtCompileTime), + RhsCols = int(RhsType::ColsAtCompileTime), + + RowsAtCompileTime = Direction == Vertical + ? (LhsRows == Dynamic || RhsRows == Dynamic ? int(Dynamic) : LhsRows + RhsRows) + : size_prefer_fixed(LhsRows, RhsRows), + ColsAtCompileTime = Direction == Horizontal + ? (LhsCols == Dynamic || RhsCols == Dynamic ? int(Dynamic) : LhsCols + RhsCols) + : size_prefer_fixed(LhsCols, RhsCols), + + LhsMaxRows = int(LhsType::MaxRowsAtCompileTime), + RhsMaxRows = int(RhsType::MaxRowsAtCompileTime), + LhsMaxCols = int(LhsType::MaxColsAtCompileTime), + RhsMaxCols = int(RhsType::MaxColsAtCompileTime), + + MaxRowsAtCompileTime = + Direction == Vertical + ? (LhsMaxRows == Dynamic || RhsMaxRows == Dynamic ? int(Dynamic) : LhsMaxRows + RhsMaxRows) + : max_size_prefer_dynamic(LhsMaxRows, RhsMaxRows), + MaxColsAtCompileTime = + Direction == Horizontal + ? (LhsMaxCols == Dynamic || RhsMaxCols == Dynamic ? int(Dynamic) : LhsMaxCols + RhsMaxCols) + : max_size_prefer_dynamic(LhsMaxCols, RhsMaxCols), + + IsRowMajor = MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1 ? 1 + : MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1 ? 0 + : (int(LhsType::Flags) & RowMajorBit) ? 1 + : 0, + Flags = IsRowMajor ? RowMajorBit : 0 + }; +}; + +} // namespace internal + +/** + * \class Concat + * \ingroup Core_Module + * + * \brief Expression of the concatenation of two dense expressions + * + * \tparam Direction either \c Vertical or \c Horizontal + * \tparam LhsType the type of the left-hand side expression + * \tparam RhsType the type of the right-hand side expression + * + * This class represents an expression of the concatenation of two dense expressions, + * either vertically (stacking rows) or horizontally (stacking columns). + * + * It is the return type of hcat() and vcat() and typically this is the only way it is used. + * + * \sa hcat(), vcat() + */ +template +class Concat : public internal::dense_xpr_base>::type { + typedef typename internal::traits::LhsTypeNested LhsTypeNested; + typedef typename internal::traits::RhsTypeNested RhsTypeNested; + typedef typename internal::traits::LhsTypeNested_ LhsTypeNested_; + typedef typename internal::traits::RhsTypeNested_ RhsTypeNested_; + + public: + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Concat) + typedef internal::remove_all_t LhsNestedExpression; + typedef internal::remove_all_t RhsNestedExpression; + + template + EIGEN_DEVICE_FUNC constexpr inline Concat(const OriginalLhsType& lhs, const OriginalRhsType& rhs) + : m_lhs(lhs), m_rhs(rhs) { + EIGEN_STATIC_ASSERT((internal::is_same, OriginalLhsType>::value), + THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) + EIGEN_STATIC_ASSERT((internal::is_same, OriginalRhsType>::value), + THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) + EIGEN_STATIC_ASSERT( + (internal::is_same::value), + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + EIGEN_STATIC_ASSERT_SAME_XPR_KIND(LhsType, RhsType) + EIGEN_STATIC_ASSERT(Direction != Horizontal || int(LhsType::RowsAtCompileTime) == Dynamic || + int(RhsType::RowsAtCompileTime) == Dynamic || + int(LhsType::RowsAtCompileTime) == int(RhsType::RowsAtCompileTime), + YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES) + EIGEN_STATIC_ASSERT(Direction != Vertical || int(LhsType::ColsAtCompileTime) == Dynamic || + int(RhsType::ColsAtCompileTime) == Dynamic || + int(LhsType::ColsAtCompileTime) == int(RhsType::ColsAtCompileTime), + YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES) + if (Direction == Vertical) { + eigen_assert(lhs.cols() == rhs.cols() && "vcat: number of columns must match"); + } else { + eigen_assert(lhs.rows() == rhs.rows() && "hcat: number of rows must match"); + } + } + + EIGEN_DEVICE_FUNC constexpr Index rows() const { + return Direction == Vertical ? m_lhs.rows() + m_rhs.rows() : m_lhs.rows(); + } + EIGEN_DEVICE_FUNC constexpr Index cols() const { + return Direction == Horizontal ? m_lhs.cols() + m_rhs.cols() : m_lhs.cols(); + } + + EIGEN_DEVICE_FUNC constexpr const LhsTypeNested_& lhs() const { return m_lhs; } + EIGEN_DEVICE_FUNC constexpr const RhsTypeNested_& rhs() const { return m_rhs; } + + protected: + LhsTypeNested m_lhs; + RhsTypeNested m_rhs; +}; + +// Evaluator for Concat +namespace internal { + +template +struct evaluator> : evaluator_base> { + typedef Concat XprType; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + typedef typename nested_eval::type LhsNested; + typedef typename nested_eval::type RhsNested; + typedef remove_all_t LhsNestedCleaned; + typedef remove_all_t RhsNestedCleaned; + + enum { + CoeffReadCost = plain_enum_max(evaluator::CoeffReadCost, + evaluator::CoeffReadCost) + + NumTraits::AddCost, // cost of the branch + LhsFlags = evaluator::Flags, + RhsFlags = evaluator::Flags, + BothHavePacketAccess = (int(LhsFlags) & PacketAccessBit) && (int(RhsFlags) & PacketAccessBit), + BothHaveLinearAccess = (int(LhsFlags) & LinearAccessBit) && (int(RhsFlags) & LinearAccessBit), + IsRowMajor = int(traits::Flags) & RowMajorBit, + IsVectorAtCompileTime = XprType::IsVectorAtCompileTime, + Flags = (traits::Flags & RowMajorBit) | (BothHavePacketAccess ? PacketAccessBit : 0) | + (IsVectorAtCompileTime && BothHaveLinearAccess ? LinearAccessBit : 0), + Alignment = 0 // conservative: no alignment guarantees across boundary + }; + + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) + : m_lhs(xpr.lhs()), + m_rhs(xpr.rhs()), + m_lhsImpl(m_lhs), + m_rhsImpl(m_rhs), + m_lhsRows(xpr.lhs().rows()), + m_lhsCols(xpr.lhs().cols()) {} + + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + if (Direction == Vertical) { + if (row < m_lhsRows.value()) + return m_lhsImpl.coeff(row, col); + else + return m_rhsImpl.coeff(row - m_lhsRows.value(), col); + } else { + if (col < m_lhsCols.value()) + return m_lhsImpl.coeff(row, col); + else + return m_rhsImpl.coeff(row, col - m_lhsCols.value()); + } + } + + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + const Index boundary = Direction == Vertical ? m_lhsRows.value() : m_lhsCols.value(); + if (index < boundary) + return m_lhsImpl.coeff(index); + else + return m_rhsImpl.coeff(index - boundary); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + constexpr int packetSize = unpacket_traits::size; + if (Direction == Vertical) { + const Index boundary = m_lhsRows.value(); + if (row >= boundary) return m_rhsImpl.template packet(row - boundary, col); + // Column-major: inner=rows, packet extends along rows and may straddle the row boundary. + // Row-major: inner=cols, packet extends along cols — never crosses the row boundary. + if (!IsRowMajor && row + packetSize > boundary) return packetBoundary(row, col); + return m_lhsImpl.template packet(row, col); + } else { + const Index boundary = m_lhsCols.value(); + if (col >= boundary) return m_rhsImpl.template packet(row, col - boundary); + // Row-major: inner=cols, packet extends along cols and may straddle the col boundary. + // Column-major: inner=rows, packet extends along rows — never crosses the col boundary. + if (IsRowMajor && col + packetSize > boundary) return packetBoundary(row, col); + return m_lhsImpl.template packet(row, col); + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + constexpr int packetSize = unpacket_traits::size; + const Index boundary = Direction == Vertical ? m_lhsRows.value() : m_lhsCols.value(); + if (index >= boundary) return m_rhsImpl.template packet(index - boundary); + if (index + packetSize > boundary) return packetBoundaryLinear(index); + return m_lhsImpl.template packet(index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + if (Direction == Vertical) { + const Index boundary = m_lhsRows.value(); + if (row >= boundary) + return m_rhsImpl.template packetSegment(row - boundary, col, begin, count); + if (!IsRowMajor && row + begin + count > boundary) + return packetSegmentBoundary(row, col, begin, count); + return m_lhsImpl.template packetSegment(row, col, begin, count); + } else { + const Index boundary = m_lhsCols.value(); + if (col >= boundary) + return m_rhsImpl.template packetSegment(row, col - boundary, begin, count); + if (IsRowMajor && col + begin + count > boundary) + return packetSegmentBoundary(row, col, begin, count); + return m_lhsImpl.template packetSegment(row, col, begin, count); + } + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + const Index boundary = Direction == Vertical ? m_lhsRows.value() : m_lhsCols.value(); + if (index >= boundary) + return m_rhsImpl.template packetSegment(index - boundary, begin, count); + if (index + begin + count > boundary) return packetSegmentBoundaryLinear(index, begin, count); + return m_lhsImpl.template packetSegment(index, begin, count); + } + + protected: + typedef typename XprType::Scalar Scalar; + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetBoundary(Index row, Index col) const { + constexpr int packetSize = unpacket_traits::size; + EIGEN_ALIGN_MAX Scalar tmp[packetSize]; + for (int i = 0; i < packetSize; ++i) + tmp[i] = coeff(row + (Direction == Vertical ? i : 0), col + (Direction == Horizontal ? i : 0)); + return pload(tmp); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetBoundaryLinear(Index index) const { + constexpr int packetSize = unpacket_traits::size; + EIGEN_ALIGN_MAX Scalar tmp[packetSize]; + for (int i = 0; i < packetSize; ++i) tmp[i] = coeff(index + i); + return pload(tmp); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegmentBoundary(Index row, Index col, Index begin, + Index count) const { + constexpr int packetSize = unpacket_traits::size; + EIGEN_ALIGN_MAX Scalar tmp[packetSize]; + for (Index i = begin; i < begin + count; ++i) + tmp[i] = coeff(row + (Direction == Vertical ? i : 0), col + (Direction == Horizontal ? i : 0)); + return ploadSegment(tmp, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegmentBoundaryLinear(Index index, Index begin, + Index count) const { + constexpr int packetSize = unpacket_traits::size; + EIGEN_ALIGN_MAX Scalar tmp[packetSize]; + for (Index i = begin; i < begin + count; ++i) tmp[i] = coeff(index + i); + return ploadSegment(tmp, begin, count); + } + + LhsNested m_lhs; + RhsNested m_rhs; + evaluator m_lhsImpl; + evaluator m_rhsImpl; + const variable_if_dynamic m_lhsRows; + const variable_if_dynamic m_lhsCols; +}; + +} // namespace internal + +/** + * \relates Concat + * \returns an expression of \a lhs and \a rhs concatenated horizontally (side by side). + * + * Both arguments must have the same number of rows. + * To concatenate more than two expressions, chain calls: \c hcat(hcat(a, b), c). + * + * Example: \code + * Matrix2d A, B; + * auto C = hcat(A, B); // C is 2x4 + * \endcode + * + * \sa vcat(), Concat + */ +template +EIGEN_DEVICE_FUNC inline const Concat hcat(const DenseBase& lhs, const DenseBase& rhs) { + return Concat(lhs.derived(), rhs.derived()); +} + +/** + * \relates Concat + * \returns an expression of \a lhs and \a rhs concatenated vertically (stacked on top of each other). + * + * Both arguments must have the same number of columns. + * To concatenate more than two expressions, chain calls: \c vcat(vcat(a, b), c). + * + * Example: \code + * Matrix2d A, B; + * auto C = vcat(A, B); // C is 4x2 + * \endcode + * + * \sa hcat(), Concat + */ +template +EIGEN_DEVICE_FUNC inline const Concat vcat(const DenseBase& lhs, const DenseBase& rhs) { + return Concat(lhs.derived(), rhs.derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_CONCAT_OP_H diff --git a/Eigen/src/Core/ConditionEstimator.h b/Eigen/src/Core/ConditionEstimator.h index dd1770b1abc..df27be3bc6b 100644 --- a/Eigen/src/Core/ConditionEstimator.h +++ b/Eigen/src/Core/ConditionEstimator.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2016 Rasmus Munk Larsen (rmlarsen@google.com) +// Copyright (C) 2016 Rasmus Munk Larsen (rmlarsen@gmail.com) // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -40,18 +40,17 @@ struct rcond_compute_sign { * \a matrix that implements .solve() and .adjoint().solve() methods. * * This function implements Algorithms 4.1 and 5.1 from - * http://www.maths.manchester.ac.uk/~higham/narep/narep135.pdf - * which also forms the basis for the condition number estimators in - * LAPACK. Since at most 10 calls to the solve method of dec are - * performed, the total cost is O(dims^2), as opposed to O(dims^3) - * needed to compute the inverse matrix explicitly. + * Higham, "Experience with a Matrix Norm Estimator", + * SIAM J. Sci. Stat. Comput., 11(4):804-809, 1990. + * with Higham's alternating-sign safety-net estimate from + * Higham and Tisseur, "A Block Algorithm for Matrix 1-Norm Estimation, + * with an Application to 1-Norm Pseudospectra", SIAM J. Matrix Anal. Appl., + * 21(4):1185-1201, 2000. * - * The most common usage is in estimating the condition number - * ||matrix||_1 * ||inv(matrix)||_1. The first term ||matrix||_1 can be - * computed directly in O(n^2) operations. + * The Hager/Higham gradient ascent uses at most 5 iterations of 2 solves + * each, giving a total cost of O(n^2). * - * Supports the following decompositions: FullPivLU, PartialPivLU, LDLT, and - * LLT. + * Supports the following decompositions: FullPivLU, PartialPivLU, LDLT, LLT. * * \sa FullPivLU, PartialPivLU, LDLT, LLT. */ @@ -66,7 +65,7 @@ typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomp eigen_assert(dec.rows() == dec.cols()); const Index n = dec.rows(); - if (n == 0) return 0; + if (n == 0) return RealScalar(0); // Disable Index to float conversion warning #ifdef __INTEL_COMPILER @@ -80,14 +79,12 @@ typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomp // lower_bound is a lower bound on // ||inv(matrix)||_1 = sup_v ||inv(matrix) v||_1 / ||v||_1 - // and is the objective maximized by the ("super-") gradient ascent - // algorithm below. + // and is the objective maximized by the supergradient ascent algorithm below. RealScalar lower_bound = v.template lpNorm<1>(); if (n == 1) return lower_bound; - // Gradient ascent algorithm follows: We know that the optimum is achieved at - // one of the simplices v = e_i, so in each iteration we follow a - // super-gradient to move towards the optimal one. + // Gradient ascent: the optimum is achieved at a unit vector e_j. Each + // iteration follows the supergradient to find which unit vector to probe next. RealScalar old_lower_bound = lower_bound; Vector sign_vector(n); Vector old_sign_vector; @@ -96,21 +93,21 @@ typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomp for (int k = 0; k < 4; ++k) { sign_vector = internal::rcond_compute_sign::run(v); if (k > 0 && !is_complex && sign_vector == old_sign_vector) { - // Break if the solution stagnated. + // Break if the sign vector stagnated. break; } - // v_max_abs_index = argmax |real( inv(matrix)^T * sign_vector )| + // Supergradient: z = A^{-T} * sign(v), pick argmax |z_i|. v = dec.adjoint().solve(sign_vector); v.real().cwiseAbs().maxCoeff(&v_max_abs_index); if (v_max_abs_index == old_v_max_abs_index) { - // Break if the solution stagnated. + // Optimality: supergradient points to the same unit vector. break; } - // Move to the new simplex e_j, where j = v_max_abs_index. - v = dec.solve(Vector::Unit(n, v_max_abs_index)); // v = inv(matrix) * e_j. + // Probe the best unit vector: v = A^{-1} * e_j. + v = dec.solve(Vector::Unit(n, v_max_abs_index)); lower_bound = v.template lpNorm<1>(); if (lower_bound <= old_lower_bound) { - // Break if the gradient step did not increase the lower_bound. + // No improvement from the gradient step. break; } if (!is_complex) { @@ -119,25 +116,19 @@ typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomp old_v_max_abs_index = v_max_abs_index; old_lower_bound = lower_bound; } - // The following calculates an independent estimate of ||matrix||_1 by - // multiplying matrix by a vector with entries of slowly increasing - // magnitude and alternating sign: - // v_i = (-1)^{i} (1 + (i / (dim-1))), i = 0,...,dim-1. - // This improvement to Hager's algorithm above is due to Higham. It was - // added to make the algorithm more robust in certain corner cases where - // large elements in the matrix might otherwise escape detection due to - // exact cancellation (especially when op and op_adjoint correspond to a - // sequence of backsubstitutions and permutations), which could cause - // Hager's algorithm to vastly underestimate ||matrix||_1. + // Higham's alternating-sign estimate: an independent safety-net that catches + // cases where the gradient ascent converges to a local maximum due to exact + // cancellation patterns (especially with permutations and backsubstitutions). + // v_i = (-1)^i * (1 + i/(n-1)), then estimate = 2*||A^{-1}*v||_1 / (3*n). Scalar alternating_sign(RealScalar(1)); for (Index i = 0; i < n; ++i) { - // The static_cast is needed when Scalar is a complex and RealScalar implements expression templates + // The static_cast is needed when Scalar is complex and RealScalar uses expression templates. v[i] = alternating_sign * static_cast(RealScalar(1) + (RealScalar(i) / (RealScalar(n - 1)))); alternating_sign = -alternating_sign; } v = dec.solve(v); - const RealScalar alternate_lower_bound = (2 * v.template lpNorm<1>()) / (3 * RealScalar(n)); - return numext::maxi(lower_bound, alternate_lower_bound); + const RealScalar alt_est = (RealScalar(2) * v.template lpNorm<1>()) / (RealScalar(3) * RealScalar(n)); + return numext::maxi(lower_bound, alt_est); } /** \brief Reciprocal condition number estimator. diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 63f1895d2ab..ef1642c54ed 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -23,7 +23,7 @@ namespace internal { // Default assumes index based accessors template struct storage_kind_to_evaluator_kind { - typedef IndexBased Kind; + using Kind = IndexBased; }; // This class returns the evaluator shape from the expression storage kind. @@ -33,19 +33,19 @@ struct storage_kind_to_shape; template <> struct storage_kind_to_shape { - typedef DenseShape Shape; + using Shape = DenseShape; }; template <> struct storage_kind_to_shape { - typedef SolverShape Shape; + using Shape = SolverShape; }; template <> struct storage_kind_to_shape { - typedef PermutationShape Shape; + using Shape = PermutationShape; }; template <> struct storage_kind_to_shape { - typedef TranspositionsShape Shape; + using Shape = TranspositionsShape; }; // Evaluators have to be specialized with respect to various criteria such as: @@ -86,8 +86,8 @@ struct unary_evaluator; template struct evaluator_traits_base { // by default, get evaluator kind and shape from storage - typedef typename storage_kind_to_evaluator_kind::StorageKind>::Kind Kind; - typedef typename storage_kind_to_shape::StorageKind>::Shape Shape; + using Kind = typename storage_kind_to_evaluator_kind::StorageKind>::Kind; + using Shape = typename storage_kind_to_shape::StorageKind>::Shape; }; // Default evaluator traits @@ -95,40 +95,36 @@ template struct evaluator_traits : public evaluator_traits_base {}; template ::Shape> -struct evaluator_assume_aliasing { - static const bool value = false; -}; +struct evaluator_assume_aliasing : std::false_type {}; // By default, we assume a unary expression: template struct evaluator : public unary_evaluator { - typedef unary_evaluator Base; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const T& xpr) : Base(xpr) {} + using Base = unary_evaluator; + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit evaluator(const T& xpr) : Base(xpr) {} }; // TODO: Think about const-correctness template struct evaluator : evaluator { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const T& xpr) : evaluator(xpr) {} + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit evaluator(const T& xpr) : evaluator(xpr) {} }; // ---------- base class for all evaluators ---------- template struct evaluator_base { - // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle + // TODO: find a way to avoid propagating all these traits. They are currently only needed to handle // outer,inner indices. - typedef traits ExpressionTraits; + using ExpressionTraits = traits; enum { Alignment = 0 }; - // noncopyable: - // Don't make this class inherit noncopyable as this kills EBO (Empty Base Optimization) - // and make complex evaluator much larger than then should do. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator_base() = default; + // Spell out deleted copy operations instead of inheriting from an empty helper: + // an extra base can kill EBO and make complex evaluators larger than they should be. + EIGEN_DEVICE_FUNC constexpr evaluator_base() = default; - private: - EIGEN_DEVICE_FUNC evaluator_base(const evaluator_base&); - EIGEN_DEVICE_FUNC const evaluator_base& operator=(const evaluator_base&); + evaluator_base(const evaluator_base&) = delete; + evaluator_base& operator=(const evaluator_base&) = delete; }; // -------------------- Matrix and Array -------------------- @@ -142,23 +138,22 @@ struct evaluator_base { template class plainobjectbase_evaluator_data { public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) - : data(ptr) { + EIGEN_DEVICE_FUNC constexpr plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr) { #ifndef EIGEN_INTERNAL_DEBUGGING EIGEN_UNUSED_VARIABLE(outerStride); #endif eigen_internal_assert(outerStride == OuterStride); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index outerStride() const noexcept { return OuterStride; } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return OuterStride; } const Scalar* data; }; template class plainobjectbase_evaluator_data { public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) + EIGEN_DEVICE_FUNC constexpr plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr), m_outerStride(outerStride) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index outerStride() const { return m_outerStride; } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const { return m_outerStride; } const Scalar* data; protected: @@ -167,9 +162,9 @@ class plainobjectbase_evaluator_data { template struct evaluator> : evaluator_base { - typedef PlainObjectBase PlainObjectType; - typedef typename PlainObjectType::Scalar Scalar; - typedef typename PlainObjectType::CoeffReturnType CoeffReturnType; + using PlainObjectType = PlainObjectBase; + using Scalar = typename PlainObjectType::Scalar; + using CoeffReturnType = typename PlainObjectType::CoeffReturnType; enum { IsRowMajor = PlainObjectType::IsRowMajor, @@ -188,11 +183,11 @@ struct evaluator> : evaluator_base { : RowsAtCompileTime }; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator() : m_d(0, OuterStrideAtCompileTime) { + EIGEN_DEVICE_FUNC constexpr evaluator() : m_d(0, OuterStrideAtCompileTime) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr explicit evaluator(const PlainObjectType& m) + EIGEN_DEVICE_FUNC constexpr explicit evaluator(const PlainObjectType& m) : m_d(m.data(), IsVectorAtCompileTime ? 0 : m.outerStride()) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } @@ -257,7 +252,7 @@ struct evaluator> : evaluator_base { plainobjectbase_evaluator_data m_d; private: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index constexpr getIndex(Index row, Index col) const { + EIGEN_DEVICE_FUNC constexpr Index getIndex(Index row, Index col) const { return IsRowMajor ? row * m_d.outerStride() + col : row + col * m_d.outerStride(); } }; @@ -265,30 +260,28 @@ struct evaluator> : evaluator_base { template struct evaluator> : evaluator>> { - typedef Matrix XprType; + using XprType = Matrix; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator() = default; + EIGEN_DEVICE_FUNC constexpr evaluator() = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr explicit evaluator(const XprType& m) - : evaluator>(m) {} + EIGEN_DEVICE_FUNC constexpr explicit evaluator(const XprType& m) : evaluator>(m) {} }; template struct evaluator> : evaluator>> { - typedef Array XprType; + using XprType = Array; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator() = default; + EIGEN_DEVICE_FUNC constexpr evaluator() = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr explicit evaluator(const XprType& m) - : evaluator>(m) {} + EIGEN_DEVICE_FUNC constexpr explicit evaluator(const XprType& m) : evaluator>(m) {} }; // -------------------- Transpose -------------------- template struct unary_evaluator, IndexBased> : evaluator_base> { - typedef Transpose XprType; + using XprType = Transpose; enum { CoeffReadCost = evaluator::CoeffReadCost, @@ -296,20 +289,25 @@ struct unary_evaluator, IndexBased> : evaluator_base::Alignment }; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {} + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& t) + : m_argImpl(t.nestedExpression()) {} - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; + using Scalar = typename XprType::Scalar; + using CoeffReturnType = typename XprType::CoeffReturnType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(col, row); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index); } + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_argImpl.coeff(index); + } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(col, row); } + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { + return m_argImpl.coeffRef(col, row); + } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename XprType::Scalar& coeffRef(Index index) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE typename XprType::Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); } @@ -368,11 +366,12 @@ template ::value> struct nullary_wrapper { template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, + IndexType j) const { return op(i, j); } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); } @@ -389,7 +388,8 @@ struct nullary_wrapper { template struct nullary_wrapper { template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType = 0, IndexType = 0) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType = 0, + IndexType = 0) const { return op(); } template @@ -401,7 +401,8 @@ struct nullary_wrapper { template struct nullary_wrapper { template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j = 0) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, + IndexType j = 0) const { return op(i, j); } template @@ -416,7 +417,8 @@ struct nullary_wrapper { template struct nullary_wrapper { template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, + IndexType j) const { eigen_assert(i == 0 || j == 0); return op(i + j); } @@ -427,7 +429,7 @@ struct nullary_wrapper { } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); } template @@ -439,69 +441,11 @@ struct nullary_wrapper { template struct nullary_wrapper {}; -#if 0 && EIGEN_COMP_MSVC > 0 -// Disable this ugly workaround. This is now handled in traits::match, -// but this piece of code might still become handly if some other weird compilation -// errors pop up again. - -// MSVC exhibits a weird compilation error when -// compiling: -// Eigen::MatrixXf A = MatrixXf::Random(3,3); -// Ref R = 2.f*A; -// and that has_*ary_operator> have not been instantiated yet. -// The "problem" is that evaluator<2.f*A> is instantiated by traits::match<2.f*A> -// and at that time has_*ary_operator returns true regardless of T. -// Then nullary_wrapper is badly instantiated as nullary_wrapper<.,.,true,true,true>. -// The trick is thus to defer the proper instantiation of nullary_wrapper when coeff(), -// and packet() are really instantiated as implemented below: - -// This is a simple wrapper around Index to enforce the re-instantiation of -// has_*ary_operator when needed. -template struct nullary_wrapper_workaround_msvc { - nullary_wrapper_workaround_msvc(const T&); - operator T()const; -}; - -template -struct nullary_wrapper -{ - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { - return nullary_wrapper >::value, - has_unary_operator >::value, - has_binary_operator >::value>().operator()(op,i,j); - } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { - return nullary_wrapper >::value, - has_unary_operator >::value, - has_binary_operator >::value>().operator()(op,i); - } - - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { - return nullary_wrapper >::value, - has_unary_operator >::value, - has_binary_operator >::value>().template packetOp(op,i,j); - } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { - return nullary_wrapper >::value, - has_unary_operator >::value, - has_binary_operator >::value>().template packetOp(op,i); - } -}; -#endif // MSVC workaround - template struct evaluator> : evaluator_base> { - typedef CwiseNullaryOp XprType; - typedef remove_all_t PlainObjectTypeCleaned; + using XprType = CwiseNullaryOp; + using PlainObjectTypeCleaned = remove_all_t; enum { CoeffReadCost = functor_traits::Cost, @@ -513,19 +457,19 @@ struct evaluator> Alignment = AlignedMax }; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) : m_functor(n.functor()), m_wrapper() { + EIGEN_DEVICE_FUNC constexpr explicit evaluator(const XprType& n) : m_functor(n.functor()), m_wrapper() { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - typedef typename XprType::CoeffReturnType CoeffReturnType; + using CoeffReturnType = typename XprType::CoeffReturnType; template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(IndexType row, IndexType col) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(IndexType row, IndexType col) const { return m_wrapper(m_functor, row, col); } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(IndexType index) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(IndexType index) const { return m_wrapper(m_functor, index); } @@ -560,7 +504,7 @@ struct evaluator> template struct unary_evaluator, IndexBased> : evaluator_base> { - typedef CwiseUnaryOp XprType; + using XprType = CwiseUnaryOp; enum { CoeffReadCost = int(evaluator::CoeffReadCost) + int(functor_traits::Cost), @@ -570,18 +514,18 @@ struct unary_evaluator, IndexBased> : evaluator_b Alignment = evaluator::Alignment }; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& op) : m_d(op) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& op) : m_d(op) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - typedef typename XprType::CoeffReturnType CoeffReturnType; + using CoeffReturnType = typename XprType::CoeffReturnType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { return m_d.func()(m_d.argImpl.coeff(row, col)); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { return m_d.func()(m_d.argImpl.coeff(index)); } @@ -608,9 +552,9 @@ struct unary_evaluator, IndexBased> : evaluator_b protected: // this helper permits to completely eliminate the functor if it is empty struct Data { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Data(const XprType& xpr) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Data(const XprType& xpr) : op(xpr.functor()), argImpl(xpr.nestedExpression()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const UnaryOp& func() const { return op; } + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const UnaryOp& func() const { return op; } UnaryOp op; evaluator argImpl; }; @@ -639,7 +583,7 @@ struct unary_evaluator, ArgType>, In Alignment = evaluator::Alignment }; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_rows(xpr.rows()), m_cols(xpr.cols()) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -671,15 +615,15 @@ struct unary_evaluator, ArgType>, In Index actualCol = IsRowMajor ? col + offset : col; return m_argImpl.coeff(actualRow, actualCol); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE SrcType srcCoeff(Index index, Index offset) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE SrcType srcCoeff(Index index, Index offset) const { Index actualIndex = index + offset; return m_argImpl.coeff(actualIndex); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstType coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE DstType coeff(Index row, Index col) const { return cast(srcCoeff(row, col, 0)); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstType coeff(Index index) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE DstType coeff(Index index) const { return cast(srcCoeff(index, 0)); } @@ -707,7 +651,7 @@ struct unary_evaluator, ArgType>, In Index packetOffset = offset * PacketSize; Index actualRow = IsRowMajor ? row : row + packetOffset; Index actualCol = IsRowMajor ? col + packetOffset : col; - eigen_assert(check_array_bounds(actualRow, actualCol, 0, count) && "Array index out of bounds"); + eigen_assert(check_array_bounds(actualRow, actualCol, begin, count) && "Array index out of bounds"); return m_argImpl.template packetSegment(actualRow, actualCol, begin, count); } template @@ -715,8 +659,8 @@ struct unary_evaluator, ArgType>, In Index offset) const { constexpr int PacketSize = unpacket_traits::size; Index packetOffset = offset * PacketSize; - Index actualIndex = index + packetOffset + begin; - eigen_assert(check_array_bounds(actualIndex, 0, count) && "Array index out of bounds"); + Index actualIndex = index + packetOffset; + eigen_assert(check_array_bounds(actualIndex, begin, count) && "Array index out of bounds"); return m_argImpl.template packetSegment(actualIndex, begin, count); } @@ -958,16 +902,16 @@ struct unary_evaluator, ArgType>, In template struct evaluator> : public ternary_evaluator> { - typedef CwiseTernaryOp XprType; - typedef ternary_evaluator> Base; + using XprType = CwiseTernaryOp; + using Base = ternary_evaluator>; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} + EIGEN_DEVICE_FUNC constexpr explicit evaluator(const XprType& xpr) : Base(xpr) {} }; template struct ternary_evaluator, IndexBased, IndexBased> : evaluator_base> { - typedef CwiseTernaryOp XprType; + using XprType = CwiseTernaryOp; enum { CoeffReadCost = int(evaluator::CoeffReadCost) + int(evaluator::CoeffReadCost) + @@ -990,18 +934,18 @@ struct ternary_evaluator, IndexBased evaluator::Alignment) }; - EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) : m_d(xpr) { + EIGEN_DEVICE_FUNC constexpr explicit ternary_evaluator(const XprType& xpr) : m_d(xpr) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - typedef typename XprType::CoeffReturnType CoeffReturnType; + using CoeffReturnType = typename XprType::CoeffReturnType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { return m_d.func()(m_d.arg1Impl.coeff(row, col), m_d.arg2Impl.coeff(row, col), m_d.arg3Impl.coeff(row, col)); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { return m_d.func()(m_d.arg1Impl.coeff(index), m_d.arg2Impl.coeff(index), m_d.arg3Impl.coeff(index)); } @@ -1036,9 +980,9 @@ struct ternary_evaluator, IndexBased protected: // this helper permits to completely eliminate the functor if it is empty struct Data { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Data(const XprType& xpr) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Data(const XprType& xpr) : op(xpr.functor()), arg1Impl(xpr.arg1()), arg2Impl(xpr.arg2()), arg3Impl(xpr.arg3()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TernaryOp& func() const { return op; } + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const TernaryOp& func() const { return op; } TernaryOp op; evaluator arg1Impl; evaluator arg2Impl; @@ -1048,24 +992,35 @@ struct ternary_evaluator, IndexBased Data m_d; }; -// specialization for expressions like (a < b).select(c, d) to enable full vectorization template -struct evaluator, Arg1, Arg2, - CwiseBinaryOp, CmpLhsType, CmpRhsType>>> - : public ternary_evaluator< - CwiseTernaryOp, Arg1, Arg2, - CwiseBinaryOp, CmpLhsType, CmpRhsType>>> { +struct scalar_boolean_select_spec { using DummyTernaryOp = scalar_boolean_select_op; using DummyArg3 = CwiseBinaryOp, CmpLhsType, CmpRhsType>; using DummyXprType = CwiseTernaryOp; - using TernaryOp = scalar_boolean_select_op; - using Arg3 = CwiseBinaryOp, CmpLhsType, CmpRhsType>; + // only use the typed comparison if it is vectorized + static constexpr bool UseTyped = functor_traits>::PacketAccess; + using CondScalar = std::conditional_t; + + using TernaryOp = scalar_boolean_select_op; + using Arg3 = CwiseBinaryOp, CmpLhsType, CmpRhsType>; using XprType = CwiseTernaryOp; using Base = ternary_evaluator; +}; - EIGEN_DEVICE_FUNC explicit evaluator(const DummyXprType& xpr) +// specialization for expressions like (a < b).select(c, d) to enable full vectorization +template +struct evaluator, Arg1, Arg2, + CwiseBinaryOp, CmpLhsType, CmpRhsType>>> + : public scalar_boolean_select_spec::Base { + using Helper = scalar_boolean_select_spec; + using Base = typename Helper::Base; + using DummyXprType = typename Helper::DummyXprType; + using Arg3 = typename Helper::Arg3; + using XprType = typename Helper::XprType; + + EIGEN_DEVICE_FUNC constexpr explicit evaluator(const DummyXprType& xpr) : Base(XprType(xpr.arg1(), xpr.arg2(), Arg3(xpr.arg3().lhs(), xpr.arg3().rhs()))) {} }; @@ -1074,16 +1029,16 @@ struct evaluator, // this is a binary expression template struct evaluator> : public binary_evaluator> { - typedef CwiseBinaryOp XprType; - typedef binary_evaluator> Base; + using XprType = CwiseBinaryOp; + using Base = binary_evaluator>; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {} + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {} }; template struct binary_evaluator, IndexBased, IndexBased> : evaluator_base> { - typedef CwiseBinaryOp XprType; + using XprType = CwiseBinaryOp; enum { CoeffReadCost = @@ -1102,18 +1057,18 @@ struct binary_evaluator, IndexBased, IndexBase Alignment = plain_enum_min(evaluator::Alignment, evaluator::Alignment) }; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit binary_evaluator(const XprType& xpr) : m_d(xpr) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit binary_evaluator(const XprType& xpr) : m_d(xpr) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - typedef typename XprType::CoeffReturnType CoeffReturnType; + using CoeffReturnType = typename XprType::CoeffReturnType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { return m_d.func()(m_d.lhsImpl.coeff(row, col), m_d.rhsImpl.coeff(row, col)); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { return m_d.func()(m_d.lhsImpl.coeff(index), m_d.rhsImpl.coeff(index)); } @@ -1144,9 +1099,9 @@ struct binary_evaluator, IndexBased, IndexBase protected: // this helper permits to completely eliminate the functor if it is empty struct Data { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Data(const XprType& xpr) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Data(const XprType& xpr) : op(xpr.functor()), lhsImpl(xpr.lhs()), rhsImpl(xpr.rhs()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const BinaryOp& func() const { return op; } + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const BinaryOp& func() const { return op; } BinaryOp op; evaluator lhsImpl; evaluator rhsImpl; @@ -1160,46 +1115,46 @@ struct binary_evaluator, IndexBased, IndexBase template struct unary_evaluator, IndexBased> : evaluator_base> { - typedef CwiseUnaryView XprType; + using XprType = CwiseUnaryView; enum { CoeffReadCost = int(evaluator::CoeffReadCost) + int(functor_traits::Cost), Flags = (evaluator::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)), - Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost... + Alignment = 0 // FIXME: clarify why alignment is lost for CwiseUnaryView. }; - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_d(op) { + EIGEN_DEVICE_FUNC constexpr explicit unary_evaluator(const XprType& op) : m_d(op) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; + using Scalar = typename XprType::Scalar; + using CoeffReturnType = typename XprType::CoeffReturnType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { return m_d.func()(m_d.argImpl.coeff(row, col)); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { return m_d.func()(m_d.argImpl.coeff(index)); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { return m_d.func()(m_d.argImpl.coeffRef(row, col)); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_d.func()(m_d.argImpl.coeffRef(index)); } protected: // this helper permits to completely eliminate the functor if it is empty struct Data { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Data(const XprType& xpr) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Data(const XprType& xpr) : op(xpr.functor()), argImpl(xpr.nestedExpression()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const UnaryOp& func() const { return op; } + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const UnaryOp& func() const { return op; } UnaryOp op; evaluator argImpl; }; @@ -1209,25 +1164,25 @@ struct unary_evaluator, IndexBased> // -------------------- Map -------------------- -// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ? +// FIXME: consider using Derived::PlainObject for PlainObjectType. // but that might complicate template specialization template struct mapbase_evaluator; template struct mapbase_evaluator : evaluator_base { - typedef Derived XprType; - typedef typename XprType::PointerType PointerType; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; + using XprType = Derived; + using PointerType = typename XprType::PointerType; + using Scalar = typename XprType::Scalar; + using CoeffReturnType = typename XprType::CoeffReturnType; enum { - IsRowMajor = XprType::RowsAtCompileTime, + IsRowMajor = XprType::IsRowMajor, ColsAtCompileTime = XprType::ColsAtCompileTime, CoeffReadCost = NumTraits::ReadCost }; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit mapbase_evaluator(const XprType& map) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit mapbase_evaluator(const XprType& map) : m_data(const_cast(map.data())), m_innerStride(map.innerStride()), m_outerStride(map.outerStride()) { @@ -1237,19 +1192,21 @@ struct mapbase_evaluator : evaluator_base { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { return m_data[col * colStride() + row * rowStride()]; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { return m_data[index * m_innerStride.value()]; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { return m_data[col * colStride() + row * rowStride()]; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_data[index * m_innerStride.value()]; } + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + return m_data[index * m_innerStride.value()]; + } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { @@ -1298,10 +1255,10 @@ struct mapbase_evaluator : evaluator_base { } protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rowStride() const noexcept { + EIGEN_DEVICE_FUNC constexpr Index rowStride() const noexcept { return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index colStride() const noexcept { + EIGEN_DEVICE_FUNC constexpr Index colStride() const noexcept { return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); } @@ -1313,10 +1270,10 @@ struct mapbase_evaluator : evaluator_base { template struct evaluator> : public mapbase_evaluator, PlainObjectType> { - typedef Map XprType; - typedef typename XprType::Scalar Scalar; + using XprType = Map; + using Scalar = typename XprType::Scalar; // TODO: should check for smaller packet types once we can handle multi-sized packet types - typedef typename packet_traits::type PacketScalar; + using PacketScalar = typename packet_traits::type; enum { InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 @@ -1338,7 +1295,8 @@ struct evaluator> Alignment = int(MapOptions) & int(AlignedMask) }; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map) : mapbase_evaluator(map) {} + EIGEN_DEVICE_FUNC constexpr explicit evaluator(const XprType& map) + : mapbase_evaluator(map) {} }; // -------------------- Ref -------------------- @@ -1346,14 +1304,14 @@ struct evaluator> template struct evaluator> : public mapbase_evaluator, PlainObjectType> { - typedef Ref XprType; + using XprType = Ref; enum { Flags = evaluator>::Flags, Alignment = evaluator>::Alignment }; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& ref) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit evaluator(const XprType& ref) : mapbase_evaluator(ref) {} }; @@ -1366,10 +1324,10 @@ struct block_evaluator; template struct evaluator> : block_evaluator { - typedef Block XprType; - typedef typename XprType::Scalar Scalar; + using XprType = Block; + using Scalar = typename XprType::Scalar; // TODO: should check for smaller packet types once we can handle multi-sized packet types - typedef typename packet_traits::type PacketScalar; + using PacketScalar = typename packet_traits::type; enum { CoeffReadCost = evaluator::CoeffReadCost, @@ -1406,8 +1364,9 @@ struct evaluator> : 0, Alignment = plain_enum_min(evaluator::Alignment, Alignment0) }; - typedef block_evaluator block_evaluator_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& block) : block_evaluator_type(block) { + using block_evaluator_type = block_evaluator; + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit evaluator(const XprType& block) + : block_evaluator_type(block) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } }; @@ -1416,18 +1375,18 @@ struct evaluator> template struct block_evaluator : unary_evaluator> { - typedef Block XprType; + using XprType = Block; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit block_evaluator(const XprType& block) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit block_evaluator(const XprType& block) : unary_evaluator(block) {} }; template struct unary_evaluator, IndexBased> : evaluator_base> { - typedef Block XprType; + using XprType = Block; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& block) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& block) : m_argImpl(block.nestedExpression()), m_startRow(block.startRow()), m_startCol(block.startCol()), @@ -1437,8 +1396,8 @@ struct unary_evaluator, IndexBa : block.startCol() * block.nestedExpression().rows() + block.startRow()) : 0) {} - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; + using Scalar = typename XprType::Scalar; + using CoeffReturnType = typename XprType::CoeffReturnType; enum { RowsAtCompileTime = XprType::RowsAtCompileTime, @@ -1446,19 +1405,19 @@ struct unary_evaluator, IndexBa bool(evaluator::Flags & LinearAccessBit) }; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { return linear_coeff_impl(index, bool_constant()); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return linear_coeffRef_impl(index, bool_constant()); } @@ -1469,10 +1428,9 @@ struct unary_evaluator, IndexBa template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { - if (ForwardLinearAccess) - return m_argImpl.template packet(m_linear_offset.value() + index); - else - return packet(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + EIGEN_IF_CONSTEXPR(ForwardLinearAccess) + return m_argImpl.template packet(m_linear_offset.value() + index); + else return packet(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } template @@ -1482,11 +1440,10 @@ struct unary_evaluator, IndexBa template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { - if (ForwardLinearAccess) - return m_argImpl.template writePacket(m_linear_offset.value() + index, x); - else - return writePacket(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0, - x); + EIGEN_IF_CONSTEXPR(ForwardLinearAccess) + return m_argImpl.template writePacket(m_linear_offset.value() + index, x); + else return writePacket(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0, x); } template @@ -1497,11 +1454,10 @@ struct unary_evaluator, IndexBa template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { - if (ForwardLinearAccess) - return m_argImpl.template packetSegment(m_linear_offset.value() + index, begin, count); - else - return packetSegment(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0, - begin, count); + EIGEN_IF_CONSTEXPR(ForwardLinearAccess) + return m_argImpl.template packetSegment(m_linear_offset.value() + index, begin, count); + else return packetSegment(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0, begin, count); } template @@ -1514,29 +1470,28 @@ struct unary_evaluator, IndexBa template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin, Index count) { - if (ForwardLinearAccess) - return m_argImpl.template writePacketSegment(m_linear_offset.value() + index, x, begin, - count); - else - return writePacketSegment(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0, x, begin, count); + EIGEN_IF_CONSTEXPR(ForwardLinearAccess) + return m_argImpl.template writePacketSegment(m_linear_offset.value() + index, x, begin, + count); + else return writePacketSegment(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0, x, begin, count); } protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType linear_coeff_impl(Index index, internal::true_type /* ForwardLinearAccess */) const { return m_argImpl.coeff(m_linear_offset.value() + index); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType linear_coeff_impl(Index index, internal::false_type /* not ForwardLinearAccess */) const { return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& linear_coeffRef_impl(Index index, - internal::true_type /* ForwardLinearAccess */) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& linear_coeffRef_impl( + Index index, internal::true_type /* ForwardLinearAccess */) { return m_argImpl.coeffRef(m_linear_offset.value() + index); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& linear_coeffRef_impl( + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& linear_coeffRef_impl( Index index, internal::false_type /* not ForwardLinearAccess */) { return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } @@ -1554,10 +1509,10 @@ template struct block_evaluator : mapbase_evaluator, typename Block::PlainObject> { - typedef Block XprType; - typedef typename XprType::Scalar Scalar; + using XprType = Block; + using Scalar = typename XprType::Scalar; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit block_evaluator(const XprType& block) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit block_evaluator(const XprType& block) : mapbase_evaluator(block) { eigen_internal_assert((internal::is_constant_evaluated() || (std::uintptr_t(block.data()) % plain_enum_max(1, evaluator::Alignment)) == 0) && @@ -1565,60 +1520,16 @@ struct block_evaluator -struct evaluator> - : evaluator_base> { - typedef Select XprType; - enum { - CoeffReadCost = evaluator::CoeffReadCost + - plain_enum_max(evaluator::CoeffReadCost, evaluator::CoeffReadCost), - - Flags = (unsigned int)evaluator::Flags & evaluator::Flags & HereditaryBits, - - Alignment = plain_enum_min(evaluator::Alignment, evaluator::Alignment) - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& select) - : m_conditionImpl(select.conditionMatrix()), m_thenImpl(select.thenMatrix()), m_elseImpl(select.elseMatrix()) { - EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); - } - - typedef typename XprType::CoeffReturnType CoeffReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - if (m_conditionImpl.coeff(row, col)) - return m_thenImpl.coeff(row, col); - else - return m_elseImpl.coeff(row, col); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - if (m_conditionImpl.coeff(index)) - return m_thenImpl.coeff(index); - else - return m_elseImpl.coeff(index); - } - - protected: - evaluator m_conditionImpl; - evaluator m_thenImpl; - evaluator m_elseImpl; -}; - // -------------------- Replicate -------------------- template struct unary_evaluator> : evaluator_base> { - typedef Replicate XprType; - typedef typename XprType::CoeffReturnType CoeffReturnType; + using XprType = Replicate; + using CoeffReturnType = typename XprType::CoeffReturnType; enum { Factor = (RowFactor == Dynamic || ColFactor == Dynamic) ? Dynamic : RowFactor * ColFactor }; - typedef typename nested_eval::type ArgTypeNested; - typedef remove_all_t ArgTypeNestedCleaned; + using ArgTypeNested = typename nested_eval::type; + using ArgTypeNestedCleaned = remove_all_t; enum { CoeffReadCost = evaluator::CoeffReadCost, @@ -1629,13 +1540,13 @@ struct unary_evaluator> Alignment = evaluator::Alignment }; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& replicate) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& replicate) : m_arg(replicate.nestedExpression()), m_argImpl(m_arg), m_rows(replicate.nestedExpression().rows()), m_cols(replicate.nestedExpression().cols()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { // try to avoid using modulo; this is a pure optimization strategy const Index actual_row = traits::RowsAtCompileTime == 1 ? 0 : RowFactor == 1 ? row : row % m_rows.value(); const Index actual_col = traits::ColsAtCompileTime == 1 ? 0 : ColFactor == 1 ? col : col % m_cols.value(); @@ -1643,7 +1554,7 @@ struct unary_evaluator> return m_argImpl.coeff(actual_row, actual_col); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { // try to avoid using modulo; this is a pure optimization strategy const Index actual_index = traits::RowsAtCompileTime == 1 ? (ColFactor == 1 ? index : index % m_cols.value()) @@ -1687,7 +1598,7 @@ struct unary_evaluator> } protected: - const ArgTypeNested m_arg; + ArgTypeNested m_arg; evaluator m_argImpl; const variable_if_dynamic m_rows; const variable_if_dynamic m_cols; @@ -1700,7 +1611,7 @@ struct unary_evaluator> template struct evaluator_wrapper_base : evaluator_base { - typedef remove_all_t ArgType; + using ArgType = remove_all_t; enum { CoeffReadCost = evaluator::CoeffReadCost, Flags = evaluator::Flags, @@ -1709,18 +1620,22 @@ struct evaluator_wrapper_base : evaluator_base { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} - typedef typename ArgType::Scalar Scalar; - typedef typename ArgType::CoeffReturnType CoeffReturnType; + using Scalar = typename ArgType::Scalar; + using CoeffReturnType = typename ArgType::CoeffReturnType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(row, col); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index); } + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_argImpl.coeff(index); + } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(row, col); } + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { + return m_argImpl.coeffRef(row, col); + } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); } + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { @@ -1770,17 +1685,17 @@ struct evaluator_wrapper_base : evaluator_base { template struct unary_evaluator> : evaluator_wrapper_base> { - typedef MatrixWrapper XprType; + using XprType = MatrixWrapper; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& wrapper) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& wrapper) : evaluator_wrapper_base>(wrapper.nestedExpression()) {} }; template struct unary_evaluator> : evaluator_wrapper_base> { - typedef ArrayWrapper XprType; + using XprType = ArrayWrapper; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& wrapper) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& wrapper) : evaluator_wrapper_base>(wrapper.nestedExpression()) {} }; @@ -1792,9 +1707,9 @@ struct reverse_packet_cond; template struct unary_evaluator> : evaluator_base> { - typedef Reverse XprType; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; + using XprType = Reverse; + using Scalar = typename XprType::Scalar; + using CoeffReturnType = typename XprType::CoeffReturnType; enum { IsRowMajor = XprType::IsRowMajor, @@ -1807,7 +1722,7 @@ struct unary_evaluator> : evaluator_base::CoeffReadCost, // let's enable LinearAccess only with vectorization because of the product overhead - // FIXME enable DirectAccess with negative strides? + // FIXME: consider enabling DirectAccess with negative strides. Flags0 = evaluator::Flags, LinearAccess = ((Direction == BothDirections) && (int(Flags0) & PacketAccessBit)) || @@ -1817,27 +1732,27 @@ struct unary_evaluator> : evaluator_base> : evaluator_base struct evaluator> : evaluator_base> { - typedef Diagonal XprType; + using XprType = Diagonal; enum { CoeffReadCost = evaluator::CoeffReadCost, - Flags = - (unsigned int)(evaluator::Flags & (HereditaryBits | DirectAccessBit) & ~RowMajorBit) | LinearAccessBit, + Flags = static_cast(evaluator::Flags & (HereditaryBits | DirectAccessBit) & ~RowMajorBit) | + LinearAccessBit, Alignment = 0 }; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& diagonal) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit evaluator(const XprType& diagonal) : m_argImpl(diagonal.nestedExpression()), m_index(diagonal.index()) {} - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; + using Scalar = typename XprType::Scalar; + using CoeffReturnType = typename XprType::CoeffReturnType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index) const { return m_argImpl.coeff(row + rowOffset(), row + colOffset()); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index + rowOffset(), index + colOffset()); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index) { return m_argImpl.coeffRef(row + rowOffset(), row + colOffset()); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index + rowOffset(), index + colOffset()); } @@ -1987,12 +1902,8 @@ struct evaluator> : evaluator_base m_index; private: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rowOffset() const { - return m_index.value() > 0 ? 0 : -m_index.value(); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index colOffset() const { - return m_index.value() > 0 ? m_index.value() : 0; - } + EIGEN_DEVICE_FUNC constexpr Index rowOffset() const { return m_index.value() > 0 ? 0 : -m_index.value(); } + EIGEN_DEVICE_FUNC constexpr Index colOffset() const { return m_index.value() > 0 ? m_index.value() : 0; } }; //---------------------------------------------------------------------- @@ -2012,7 +1923,7 @@ struct traits> : public traits {}; template class EvalToTemp : public dense_xpr_base>::type { public: - typedef typename dense_xpr_base::type Base; + using Base = typename dense_xpr_base::type; EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp) explicit EvalToTemp(const ArgType& arg) : m_arg(arg) {} @@ -2029,16 +1940,18 @@ class EvalToTemp : public dense_xpr_base>::type { template struct evaluator> : public evaluator { - typedef EvalToTemp XprType; - typedef typename ArgType::PlainObject PlainObject; - typedef evaluator Base; + using XprType = EvalToTemp; + using PlainObject = typename ArgType::PlainObject; + using Base = evaluator; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : m_result(xpr.arg()) { + EIGEN_DEVICE_FUNC constexpr explicit evaluator(const XprType& xpr) : m_result(xpr.arg()) { internal::construct_at(this, m_result); } // This constructor is used when nesting an EvalTo evaluator in another evaluator - EIGEN_DEVICE_FUNC evaluator(const ArgType& arg) : m_result(arg) { internal::construct_at(this, m_result); } + EIGEN_DEVICE_FUNC constexpr evaluator(const ArgType& arg) : m_result(arg) { + internal::construct_at(this, m_result); + } protected: PlainObject m_result; diff --git a/Eigen/src/Core/CoreIterators.h b/Eigen/src/Core/CoreIterators.h index f62cf238e75..3143726867b 100644 --- a/Eigen/src/Core/CoreIterators.h +++ b/Eigen/src/Core/CoreIterators.h @@ -57,7 +57,7 @@ class InnerIterator { m_iter.operator+=(i); return *this; } - EIGEN_STRONG_INLINE InnerIterator operator+(Index i) { + EIGEN_STRONG_INLINE InnerIterator operator+(Index i) const { InnerIterator result(*this); result += i; return result; diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index e2b2da5a643..27fd4340f8a 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -98,33 +98,33 @@ class CwiseBinaryOp : public CwiseBinaryOpImpl RhsNested_; #if EIGEN_COMP_MSVC - // Required for Visual Studio or the Copy constructor will probably not get inlined! + // Required for Visual Studio, which may fail to inline the copy constructor otherwise. EIGEN_STRONG_INLINE CwiseBinaryOp(const CwiseBinaryOp&) = default; #endif - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, - const BinaryOp& func = BinaryOp()) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, + const BinaryOp& func = BinaryOp()) : m_lhs(aLhs), m_rhs(aRhs), m_functor(func) { eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols()); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const noexcept { + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { // return the fixed size type if available to enable compile time optimizations return internal::traits>::RowsAtCompileTime == Dynamic ? m_rhs.rows() : m_lhs.rows(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept { + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { // return the fixed size type if available to enable compile time optimizations return internal::traits>::ColsAtCompileTime == Dynamic ? m_rhs.cols() : m_lhs.cols(); } /** \returns the left hand side nested expression */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const LhsNested_& lhs() const { return m_lhs; } + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const LhsNested_& lhs() const { return m_lhs; } /** \returns the right hand side nested expression */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const RhsNested_& rhs() const { return m_rhs; } + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const RhsNested_& rhs() const { return m_rhs; } /** \returns the functor representing the binary operation */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const BinaryOp& functor() const { return m_functor; } + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const BinaryOp& functor() const { return m_functor; } protected: LhsNested m_lhs; @@ -145,7 +145,7 @@ class CwiseBinaryOpImpl : public internal::generic_xpr_base template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator-=(const MatrixBase& other) { +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Derived& MatrixBase::operator-=(const MatrixBase& other) { call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } @@ -156,7 +156,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator-=(c */ template template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator+=(const MatrixBase& other) { +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Derived& MatrixBase::operator+=(const MatrixBase& other) { call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 13a542a023f..bf6b6f30499 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -50,7 +50,7 @@ struct traits > : traits::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp) - EIGEN_DEVICE_FUNC CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp()) + EIGEN_DEVICE_FUNC constexpr CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp()) : m_rows(rows), m_cols(cols), m_functor(func) { eigen_assert(rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)); } - EIGEN_DEVICE_FUNC CwiseNullaryOp(Index size, const NullaryOp& func = NullaryOp()) + EIGEN_DEVICE_FUNC constexpr CwiseNullaryOp(Index size, const NullaryOp& func = NullaryOp()) : CwiseNullaryOp(RowsAtCompileTime == 1 ? 1 : size, RowsAtCompileTime == 1 ? size : 1, func) { EIGEN_STATIC_ASSERT(CwiseNullaryOp::IsVectorAtCompileTime, YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows.value(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols.value(); } + EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows.value(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols.value(); } /** \returns the functor representing the nullary operation */ - EIGEN_DEVICE_FUNC const NullaryOp& functor() const { return m_functor; } + EIGEN_DEVICE_FUNC constexpr const NullaryOp& functor() const { return m_functor; } protected: const internal::variable_if_dynamic m_rows; @@ -94,7 +94,7 @@ class CwiseNullaryOp : public internal::dense_xpr_base::Constant(Index rows, Index cols, const Scalar& value) { * \only_for_vectors * * This variant is meant to be used for dynamic-size vector types. For fixed-size types, - * it is redundant to pass \a size as argument, so Zero() should be used + * it is redundant to pass \a size as argument, so Constant(const Scalar&) should be used * instead. * * The template parameter \a CustomNullaryOp is the type of the functor. @@ -235,8 +235,7 @@ DenseBase::Constant(const Scalar& value) { * \sa LinSpaced(Index,const Scalar&, const Scalar&), setLinSpaced(Index,const Scalar&,const Scalar&) */ template -EIGEN_DEPRECATED EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase< - Derived>::RandomAccessLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType DenseBase::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) return DenseBase::NullaryExpr(size, internal::linspaced_op(low, high, size)); @@ -247,8 +246,7 @@ DenseBase::LinSpaced(Sequential_t, Index size, const Scalar& low, const * \sa LinSpaced(const Scalar&, const Scalar&) */ template -EIGEN_DEPRECATED EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase< - Derived>::RandomAccessLinSpacedReturnType +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType DenseBase::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) diff --git a/Eigen/src/Core/CwiseTernaryOp.h b/Eigen/src/Core/CwiseTernaryOp.h index 9bb0d4075c8..87377917dca 100644 --- a/Eigen/src/Core/CwiseTernaryOp.h +++ b/Eigen/src/Core/CwiseTernaryOp.h @@ -118,7 +118,7 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl>::RowsAtCompileTime == Dynamic && @@ -130,7 +130,7 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl>::ColsAtCompileTime == Dynamic && @@ -144,13 +144,13 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl::type XprTypeNested; typedef internal::remove_all_t NestedExpression; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit CwiseUnaryOp(const XprType& xpr, + const UnaryOp& func = UnaryOp()) : m_xpr(xpr), m_functor(func) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const noexcept { return m_xpr.rows(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept { return m_xpr.cols(); } + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_xpr.cols(); } /** \returns the functor representing the unary operation */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const UnaryOp& functor() const { return m_functor; } + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const UnaryOp& functor() const { return m_functor; } /** \returns the nested expression */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const internal::remove_all_t& nestedExpression() const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const internal::remove_all_t& nestedExpression() + const { return m_xpr; } /** \returns the nested expression */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::remove_all_t& nestedExpression() { return m_xpr; } + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE internal::remove_all_t& nestedExpression() { + return m_xpr; + } protected: XprTypeNested m_xpr; diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h index 7dd7623fc48..384c8b1542b 100644 --- a/Eigen/src/Core/CwiseUnaryView.h +++ b/Eigen/src/Core/CwiseUnaryView.h @@ -100,6 +100,7 @@ class CwiseUnaryViewImpl EIGEN_DENSE_PUBLIC_INTERFACE(Derived) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl) + using Base::coeffRef; using Base::data; EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); } @@ -140,22 +141,24 @@ class CwiseUnaryView : public internal::CwiseUnaryViewImpl::non_const_type MatrixTypeNested; typedef internal::remove_all_t NestedExpression; - explicit EIGEN_DEVICE_FUNC inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp()) + explicit EIGEN_DEVICE_FUNC constexpr inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp()) : m_matrix(mat), m_functor(func) {} EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const noexcept { return m_matrix.rows(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept { return m_matrix.cols(); } + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_matrix.cols(); } /** \returns the functor representing unary operation */ - EIGEN_DEVICE_FUNC const ViewOp& functor() const { return m_functor; } + EIGEN_DEVICE_FUNC constexpr const ViewOp& functor() const { return m_functor; } /** \returns the nested expression */ - EIGEN_DEVICE_FUNC const internal::remove_all_t& nestedExpression() const { return m_matrix; } + EIGEN_DEVICE_FUNC constexpr const internal::remove_all_t& nestedExpression() const { + return m_matrix; + } /** \returns the nested expression */ - EIGEN_DEVICE_FUNC std::remove_reference_t& nestedExpression() { return m_matrix; } + EIGEN_DEVICE_FUNC constexpr std::remove_reference_t& nestedExpression() { return m_matrix; } protected: MatrixTypeNested m_matrix; diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 4f6894280e1..a24fc4766d2 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -260,21 +260,21 @@ class DenseBase /** Copies \a other into *this. \returns a reference to *this. */ template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase& other); + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& operator=(const DenseBase& other); /** Special case of the template operator=, in order to prevent the compiler * from generating a default operator= (issue hit with g++ 4.1) */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase& other); + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& operator=(const DenseBase& other); template - EIGEN_DEVICE_FUNC Derived& operator=(const EigenBase& other); + EIGEN_DEVICE_FUNC constexpr Derived& operator=(const EigenBase& other); template - EIGEN_DEVICE_FUNC Derived& operator+=(const EigenBase& other); + EIGEN_DEVICE_FUNC constexpr Derived& operator+=(const EigenBase& other); template - EIGEN_DEVICE_FUNC Derived& operator-=(const EigenBase& other); + EIGEN_DEVICE_FUNC constexpr Derived& operator-=(const EigenBase& other); template EIGEN_DEVICE_FUNC Derived& operator=(const ReturnByValue& func); @@ -283,7 +283,7 @@ class DenseBase * Copies \a other into *this without evaluating other. \returns a reference to *this. */ template /** \deprecated */ - EIGEN_DEPRECATED EIGEN_DEVICE_FUNC Derived& lazyAssign(const DenseBase& other); + EIGEN_DEPRECATED EIGEN_DEVICE_FUNC constexpr Derived& lazyAssign(const DenseBase& other); EIGEN_DEVICE_FUNC CommaInitializer operator<<(const Scalar& s); @@ -306,12 +306,12 @@ class DenseBase EIGEN_DEVICE_FUNC static const ConstantReturnType Constant(Index size, const Scalar& value); EIGEN_DEVICE_FUNC static const ConstantReturnType Constant(const Scalar& value); - EIGEN_DEPRECATED EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(Sequential_t, Index size, - const Scalar& low, - const Scalar& high); - EIGEN_DEPRECATED EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(Sequential_t, - const Scalar& low, - const Scalar& high); + EIGEN_DEPRECATED_WITH_REASON("The method may result in accuracy loss. Use .EqualSpaced() instead.") + EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(Sequential_t, Index size, const Scalar& low, + const Scalar& high); + EIGEN_DEPRECATED_WITH_REASON("The method may result in accuracy loss. Use .EqualSpaced() instead.") + EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(Sequential_t, const Scalar& low, + const Scalar& high); EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(Index size, const Scalar& low, const Scalar& high); @@ -348,13 +348,13 @@ class DenseBase EIGEN_DEVICE_FUNC Derived& setRandom(); template - EIGEN_DEVICE_FUNC bool isApprox(const DenseBase& other, - const RealScalar& prec = NumTraits::dummy_precision()) const; - EIGEN_DEVICE_FUNC bool isMuchSmallerThan(const RealScalar& other, - const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC constexpr bool isApprox(const DenseBase& other, + const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC constexpr bool isMuchSmallerThan( + const RealScalar& other, const RealScalar& prec = NumTraits::dummy_precision()) const; template - EIGEN_DEVICE_FUNC bool isMuchSmallerThan(const DenseBase& other, - const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC constexpr bool isMuchSmallerThan( + const DenseBase& other, const RealScalar& prec = NumTraits::dummy_precision()) const; EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits::dummy_precision()) const; @@ -366,8 +366,13 @@ class DenseBase EIGEN_DEVICE_FUNC inline bool hasNaN() const; EIGEN_DEVICE_FUNC inline bool allFinite() const; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*=(const Scalar& other); - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator/=(const Scalar& other); + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& operator*=(const Scalar& other); + template ::value, typename = std::enable_if_t> + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& operator*=(const RealScalar& other); + + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& operator/=(const Scalar& other); + template ::value, typename = std::enable_if_t> + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& operator/=(const RealScalar& other); typedef internal::add_const_on_value_type_t::type> EvalReturnType; /** \returns the matrix or vector obtained by evaluating this expression. @@ -404,7 +409,7 @@ class DenseBase call_assignment(derived(), other.derived(), internal::swap_assign_op()); } - EIGEN_DEVICE_FUNC inline const NestByValue nestByValue() const; + EIGEN_DEVICE_FUNC constexpr inline const NestByValue nestByValue() const; EIGEN_DEVICE_FUNC inline const ForceAlignedAccess forceAlignedAccess() const; EIGEN_DEVICE_FUNC inline ForceAlignedAccess forceAlignedAccess(); template @@ -419,49 +424,21 @@ class DenseBase EIGEN_DEVICE_FUNC Scalar prod() const; - template + // The default PropagateFast gives undefined behavior on NaN inputs but the fastest code. + template EIGEN_DEVICE_FUNC typename internal::traits::Scalar minCoeff() const; - template + template EIGEN_DEVICE_FUNC typename internal::traits::Scalar maxCoeff() const; - // By default, the fastest version with undefined NaN propagation semantics is - // used. - // TODO(rmlarsen): Replace with default template argument when we move to - // c++11 or beyond. - EIGEN_DEVICE_FUNC inline typename internal::traits::Scalar minCoeff() const { - return minCoeff(); - } - EIGEN_DEVICE_FUNC inline typename internal::traits::Scalar maxCoeff() const { - return maxCoeff(); - } - - template + template EIGEN_DEVICE_FUNC typename internal::traits::Scalar minCoeff(IndexType* row, IndexType* col) const; - template + template EIGEN_DEVICE_FUNC typename internal::traits::Scalar maxCoeff(IndexType* row, IndexType* col) const; - template + template EIGEN_DEVICE_FUNC typename internal::traits::Scalar minCoeff(IndexType* index) const; - template + template EIGEN_DEVICE_FUNC typename internal::traits::Scalar maxCoeff(IndexType* index) const; - // TODO(rmlarsen): Replace these methods with a default template argument. - template - EIGEN_DEVICE_FUNC inline typename internal::traits::Scalar minCoeff(IndexType* row, IndexType* col) const { - return minCoeff(row, col); - } - template - EIGEN_DEVICE_FUNC inline typename internal::traits::Scalar maxCoeff(IndexType* row, IndexType* col) const { - return maxCoeff(row, col); - } - template - EIGEN_DEVICE_FUNC inline typename internal::traits::Scalar minCoeff(IndexType* index) const { - return minCoeff(index); - } - template - EIGEN_DEVICE_FUNC inline typename internal::traits::Scalar maxCoeff(IndexType* index) const { - return maxCoeff(index); - } - template EIGEN_DEVICE_FUNC Scalar redux(const BinaryOp& func) const; @@ -519,25 +496,25 @@ class DenseBase static const RandomReturnType Random(); template - inline EIGEN_DEVICE_FUNC - CwiseTernaryOp::Scalar, - typename DenseBase::Scalar, Scalar>, - ThenDerived, ElseDerived, Derived> - select(const DenseBase& thenMatrix, const DenseBase& elseMatrix) const; + inline EIGEN_DEVICE_FUNC constexpr CwiseTernaryOp< + internal::scalar_boolean_select_op::Scalar, + typename DenseBase::Scalar, Scalar>, + ThenDerived, ElseDerived, Derived> + select(const DenseBase& thenMatrix, const DenseBase& elseMatrix) const; template - inline EIGEN_DEVICE_FUNC - CwiseTernaryOp::Scalar, - typename DenseBase::Scalar, Scalar>, - ThenDerived, typename DenseBase::ConstantReturnType, Derived> - select(const DenseBase& thenMatrix, const typename DenseBase::Scalar& elseScalar) const; + inline EIGEN_DEVICE_FUNC constexpr CwiseTernaryOp< + internal::scalar_boolean_select_op::Scalar, + typename DenseBase::Scalar, Scalar>, + ThenDerived, typename DenseBase::ConstantReturnType, Derived> + select(const DenseBase& thenMatrix, const typename DenseBase::Scalar& elseScalar) const; template - inline EIGEN_DEVICE_FUNC - CwiseTernaryOp::Scalar, - typename DenseBase::Scalar, Scalar>, - typename DenseBase::ConstantReturnType, ElseDerived, Derived> - select(const typename DenseBase::Scalar& thenScalar, const DenseBase& elseMatrix) const; + inline EIGEN_DEVICE_FUNC constexpr CwiseTernaryOp< + internal::scalar_boolean_select_op::Scalar, + typename DenseBase::Scalar, Scalar>, + typename DenseBase::ConstantReturnType, ElseDerived, Derived> + select(const typename DenseBase::Scalar& thenScalar, const DenseBase& elseMatrix) const; template RealScalar lpNorm() const; @@ -575,12 +552,12 @@ class DenseBase #else typedef std::conditional_t<(Flags & DirectAccessBit) == DirectAccessBit, internal::pointer_based_stl_iterator, - internal::generic_randaccess_stl_iterator > + internal::generic_randaccess_stl_iterator> iterator_type; typedef std::conditional_t<(Flags & DirectAccessBit) == DirectAccessBit, internal::pointer_based_stl_iterator, - internal::generic_randaccess_stl_iterator > + internal::generic_randaccess_stl_iterator> const_iterator_type; // Stl-style iterators are supported only for vectors. @@ -597,12 +574,20 @@ class DenseBase inline const_iterator end() const; inline const_iterator cend() const; + using RealViewReturnType = std::conditional_t::IsComplex, RealView, Derived&>; + using ConstRealViewReturnType = + std::conditional_t::IsComplex, RealView, const Derived&>; + + EIGEN_DEVICE_FUNC RealViewReturnType realView(); + EIGEN_DEVICE_FUNC ConstRealViewReturnType realView() const; + #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase #define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL #define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND) #define EIGEN_DOC_UNARY_ADDONS(X, Y) #include "../plugins/CommonCwiseUnaryOps.inc" #include "../plugins/BlockMethods.inc" +// Defines operator()(const RowIndices&, const ColIndices&) and other indexed view methods. #include "../plugins/IndexedViewMethods.inc" #include "../plugins/ReshapedMethods.inc" #ifdef EIGEN_DENSEBASE_PLUGIN diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h index 377df574ffd..c5284169423 100644 --- a/Eigen/src/Core/DenseCoeffsBase.h +++ b/Eigen/src/Core/DenseCoeffsBase.h @@ -67,14 +67,14 @@ class DenseCoeffsBase : public EigenBase { using Base::rows; using Base::size; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const { return int(Derived::RowsAtCompileTime) == 1 ? 0 : int(Derived::ColsAtCompileTime) == 1 ? inner : int(Derived::Flags) & RowMajorBit ? outer : inner; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const { return int(Derived::ColsAtCompileTime) == 1 ? 0 : int(Derived::RowsAtCompileTime) == 1 ? inner : int(Derived::Flags) & RowMajorBit ? inner @@ -95,12 +95,12 @@ class DenseCoeffsBase : public EigenBase { * * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC constexpr CoeffReturnType coeff(Index row, Index col) const { eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); return internal::evaluator(derived()).coeff(row, col); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeffByOuterInner(Index outer, Index inner) const { + EIGEN_DEVICE_FUNC constexpr CoeffReturnType coeffByOuterInner(Index outer, Index inner) const { return coeff(rowIndexByOuterInner(outer, inner), colIndexByOuterInner(outer, inner)); } @@ -108,11 +108,19 @@ class DenseCoeffsBase : public EigenBase { * * \sa operator()(Index,Index), operator[](Index) */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType operator()(Index row, Index col) const { + EIGEN_DEVICE_FUNC constexpr CoeffReturnType operator()(Index row, Index col) const { eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); return coeff(row, col); } +#ifdef EIGEN_MULTIDIMENSIONAL_SUBSCRIPT + /** \returns the coefficient at given the given row and column. + * + * \sa operator[](Index,Index), operator[](Index) + */ + EIGEN_DEVICE_FUNC constexpr CoeffReturnType operator[](Index row, Index col) const { return operator()(row, col); } +#endif + /** Short version: don't use this function, use * \link operator[](Index) const \endlink instead. * @@ -128,7 +136,7 @@ class DenseCoeffsBase : public EigenBase { * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index index) const { + EIGEN_DEVICE_FUNC constexpr CoeffReturnType coeff(Index index) const { EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) eigen_internal_assert(index >= 0 && index < size()); @@ -143,7 +151,7 @@ class DenseCoeffsBase : public EigenBase { * z() const, w() const */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType operator[](Index index) const { + EIGEN_DEVICE_FUNC constexpr CoeffReturnType operator[](Index index) const { EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) eigen_assert(index >= 0 && index < size()); @@ -160,32 +168,32 @@ class DenseCoeffsBase : public EigenBase { * z() const, w() const */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType operator()(Index index) const { + EIGEN_DEVICE_FUNC constexpr CoeffReturnType operator()(Index index) const { eigen_assert(index >= 0 && index < size()); return coeff(index); } /** equivalent to operator[](0). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType x() const { return (*this)[0]; } + EIGEN_DEVICE_FUNC constexpr CoeffReturnType x() const { return (*this)[0]; } /** equivalent to operator[](1). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType y() const { + EIGEN_DEVICE_FUNC constexpr CoeffReturnType y() const { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 2, OUT_OF_RANGE_ACCESS); return (*this)[1]; } /** equivalent to operator[](2). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType z() const { + EIGEN_DEVICE_FUNC constexpr CoeffReturnType z() const { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 3, OUT_OF_RANGE_ACCESS); return (*this)[2]; } /** equivalent to operator[](3). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType w() const { + EIGEN_DEVICE_FUNC constexpr CoeffReturnType w() const { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 4, OUT_OF_RANGE_ACCESS); return (*this)[3]; } @@ -303,12 +311,12 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && row < rows() && col >= 0 && col < cols()); return internal::evaluator(derived()).coeffRef(row, col); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRefByOuterInner(Index outer, Index inner) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& coeffRefByOuterInner(Index outer, Index inner) { return coeffRef(rowIndexByOuterInner(outer, inner), colIndexByOuterInner(outer, inner)); } @@ -316,12 +324,19 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && row < rows() && col >= 0 && col < cols()); return coeffRef(row, col); } +#ifdef EIGEN_MULTIDIMENSIONAL_SUBSCRIPT + /** \returns a reference to the coefficient at given the given row and column. + * + * \sa operator[](Index) + */ + EIGEN_DEVICE_FUNC constexpr Scalar& operator[](Index row, Index col) { return operator()(row, col); } +#endif + /** Short version: don't use this function, use * \link operator[](Index) \endlink instead. * @@ -337,7 +352,7 @@ class DenseCoeffsBase : public DenseCoeffsBase::Flags & LinearAccessBit, THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) eigen_internal_assert(index >= 0 && index < size()); @@ -351,7 +366,7 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && index < size()); @@ -367,32 +382,32 @@ class DenseCoeffsBase : public DenseCoeffsBase= 0 && index < size()); return coeffRef(index); } /** equivalent to operator[](0). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& x() { return (*this)[0]; } + EIGEN_DEVICE_FUNC constexpr Scalar& x() { return (*this)[0]; } /** equivalent to operator[](1). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& y() { + EIGEN_DEVICE_FUNC constexpr Scalar& y() { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 2, OUT_OF_RANGE_ACCESS); return (*this)[1]; } /** equivalent to operator[](2). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& z() { + EIGEN_DEVICE_FUNC constexpr Scalar& z() { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 3, OUT_OF_RANGE_ACCESS); return (*this)[2]; } /** equivalent to operator[](3). */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& w() { + EIGEN_DEVICE_FUNC constexpr Scalar& w() { EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 4, OUT_OF_RANGE_ACCESS); return (*this)[3]; } diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h index d62586c99b5..8f2d1b12022 100644 --- a/Eigen/src/Core/DenseStorage.h +++ b/Eigen/src/Core/DenseStorage.h @@ -54,7 +54,7 @@ template struct plain_array { - T array[Size]; + // on some 32-bit platforms, stack-allocated arrays are aligned to 4 bytes, not the preferred alignment of T + EIGEN_ALIGN_TO_BOUNDARY(alignof(T)) T array[Size]; #if defined(EIGEN_NO_DEBUG) || defined(EIGEN_TESTING_PLAINOBJECT_CTOR) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() = default; + EIGEN_DEVICE_FUNC constexpr plain_array() = default; #else EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() { EIGEN_MAKE_STACK_ALLOCATION_ASSERT(Size * sizeof(T)) } #endif }; -template -struct plain_array { - T array[1]; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() = default; -}; - template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap_plain_array(plain_array& a, plain_array& b, @@ -97,8 +92,8 @@ class DenseStorage_impl { public: #ifndef EIGEN_DENSE_STORAGE_CTOR_PLUGIN - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(const DenseStorage_impl&) = default; #else EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size) @@ -108,19 +103,18 @@ class DenseStorage_impl { smart_copy(other.m_data.array, other.m_data.array + Size, m_data.array); } #endif - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index /*cols*/) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index /*cols*/) {} + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) { numext::swap(m_data, other.m_data); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/, - Index /*cols*/) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index /*cols*/) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * Cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; } + EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index /*size*/, Index /*rows*/, Index /*cols*/) {} + EIGEN_DEVICE_FUNC constexpr void resize(Index /*size*/, Index /*rows*/, Index /*cols*/) {} + EIGEN_DEVICE_FUNC constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC constexpr Index size() const { return Rows * Cols; } + EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; } }; template class DenseStorage_impl { @@ -128,7 +122,7 @@ class DenseStorage_impl { Index m_rows = 0; public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) : m_rows(other.m_rows) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) @@ -137,7 +131,7 @@ class DenseStorage_impl { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index rows, Index /*cols*/) : m_rows(rows) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) - EIGEN_UNUSED_VARIABLE(size) + EIGEN_UNUSED_VARIABLE(size); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); @@ -148,17 +142,13 @@ class DenseStorage_impl { swap_plain_array(m_data, other.m_data, size(), other.size()); numext::swap(m_rows, other.m_rows); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index /*cols*/) { - m_rows = rows; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index /*cols*/) { - m_rows = rows; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * Cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; } + EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index /*size*/, Index rows, Index /*cols*/) { m_rows = rows; } + EIGEN_DEVICE_FUNC constexpr void resize(Index /*size*/, Index rows, Index /*cols*/) { m_rows = rows; } + EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC constexpr Index size() const { return m_rows * Cols; } + EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; } }; template class DenseStorage_impl { @@ -166,7 +156,7 @@ class DenseStorage_impl { Index m_cols = 0; public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) : m_cols(other.m_cols) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) @@ -175,7 +165,7 @@ class DenseStorage_impl { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index /*rows*/, Index cols) : m_cols(cols) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) - EIGEN_UNUSED_VARIABLE(size) + EIGEN_UNUSED_VARIABLE(size); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); @@ -186,17 +176,13 @@ class DenseStorage_impl { swap_plain_array(m_data, other.m_data, size(), other.size()); numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/, Index cols) { - m_cols = cols; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index cols) { - m_cols = cols; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * m_cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; } + EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index /*size*/, Index /*rows*/, Index cols) { m_cols = cols; } + EIGEN_DEVICE_FUNC constexpr void resize(Index /*size*/, Index /*rows*/, Index cols) { m_cols = cols; } + EIGEN_DEVICE_FUNC constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC constexpr Index size() const { return Rows * m_cols; } + EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; } }; template class DenseStorage_impl { @@ -205,7 +191,7 @@ class DenseStorage_impl { Index m_cols = 0; public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) : m_rows(other.m_rows), m_cols(other.m_cols) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) @@ -214,7 +200,7 @@ class DenseStorage_impl { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index rows, Index cols) : m_rows(rows), m_cols(cols) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) - EIGEN_UNUSED_VARIABLE(size) + EIGEN_UNUSED_VARIABLE(size); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); @@ -227,87 +213,72 @@ class DenseStorage_impl { numext::swap(m_rows, other.m_rows); numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index cols) { + EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index /*size*/, Index rows, Index cols) { m_rows = rows; m_cols = cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index cols) { + EIGEN_DEVICE_FUNC constexpr void resize(Index /*size*/, Index rows, Index cols) { m_rows = rows; m_cols = cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * m_cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; } + EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC constexpr Index size() const { return m_rows * m_cols; } + EIGEN_DEVICE_FUNC constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data.array; } }; // null matrix variants template class DenseStorage_impl { public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index /*cols*/) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl&) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/, - Index /*cols*/) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index /*cols*/) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * Cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; } + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index /*cols*/) {} + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage_impl&) {} + EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index /*size*/, Index /*rows*/, Index /*cols*/) {} + EIGEN_DEVICE_FUNC constexpr void resize(Index /*size*/, Index /*rows*/, Index /*cols*/) {} + EIGEN_DEVICE_FUNC constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC constexpr Index size() const { return Rows * Cols; } + EIGEN_DEVICE_FUNC constexpr T* data() { return nullptr; } + EIGEN_DEVICE_FUNC constexpr const T* data() const { return nullptr; } }; template class DenseStorage_impl { Index m_rows = 0; public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index rows, Index /*cols*/) - : m_rows(rows) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { - numext::swap(m_rows, other.m_rows); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index /*cols*/) { - m_rows = rows; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index /*cols*/) { - m_rows = rows; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * Cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; } + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(Index /*size*/, Index rows, Index /*cols*/) : m_rows(rows) {} + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage_impl& other) noexcept { numext::swap(m_rows, other.m_rows); } + EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index /*size*/, Index rows, Index /*cols*/) { m_rows = rows; } + EIGEN_DEVICE_FUNC constexpr void resize(Index /*size*/, Index rows, Index /*cols*/) { m_rows = rows; } + EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC constexpr Index size() const { return m_rows * Cols; } + EIGEN_DEVICE_FUNC constexpr T* data() { return nullptr; } + EIGEN_DEVICE_FUNC constexpr const T* data() const { return nullptr; } }; template class DenseStorage_impl { Index m_cols = 0; public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index cols) - : m_cols(cols) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { - numext::swap(m_cols, other.m_cols); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/, Index cols) { - m_cols = cols; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index cols) { - m_cols = cols; - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * m_cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; } + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index cols) : m_cols(cols) {} + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage_impl& other) noexcept { numext::swap(m_cols, other.m_cols); } + EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index /*size*/, Index /*rows*/, Index cols) { m_cols = cols; } + EIGEN_DEVICE_FUNC constexpr void resize(Index /*size*/, Index /*rows*/, Index cols) { m_cols = cols; } + EIGEN_DEVICE_FUNC constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC constexpr Index size() const { return Rows * m_cols; } + EIGEN_DEVICE_FUNC constexpr T* data() { return nullptr; } + EIGEN_DEVICE_FUNC constexpr const T* data() const { return nullptr; } }; template class DenseStorage_impl { @@ -315,28 +286,27 @@ class DenseStorage_impl { Index m_cols = 0; public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index rows, Index cols) - : m_rows(rows), m_cols(cols) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(Index /*size*/, Index rows, Index cols) : m_rows(rows), m_cols(cols) {} + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage_impl& other) noexcept { numext::swap(m_rows, other.m_rows); numext::swap(m_cols, other.m_cols); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index cols) { + EIGEN_DEVICE_FUNC constexpr void conservativeResize(Index /*size*/, Index rows, Index cols) { m_rows = rows; m_cols = cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index cols) { + EIGEN_DEVICE_FUNC constexpr void resize(Index /*size*/, Index rows, Index cols) { m_rows = rows; m_cols = cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * m_cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; } + EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC constexpr Index size() const { return m_rows * m_cols; } + EIGEN_DEVICE_FUNC constexpr T* data() { return nullptr; } + EIGEN_DEVICE_FUNC constexpr const T* data() const { return nullptr; } }; // fixed-size matrix with dynamic memory allocation not currently supported template @@ -350,7 +320,7 @@ class DenseStorage_impl { public: static constexpr int Size = Dynamic; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) : m_data(conditional_aligned_new_auto(other.size())), m_rows(other.m_rows) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) @@ -360,7 +330,7 @@ class DenseStorage_impl { : m_data(conditional_aligned_new_auto(size)), m_rows(rows) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept : m_data(other.m_data), m_rows(other.m_rows) { other.m_data = nullptr; other.m_rows = 0; @@ -371,11 +341,11 @@ class DenseStorage_impl { smart_copy(other.m_data, other.m_data + other.size(), m_data); return *this; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept { + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept { this->swap(other); return *this; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { + EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage_impl& other) noexcept { numext::swap(m_data, other.m_data); numext::swap(m_rows, other.m_rows); } @@ -392,11 +362,11 @@ class DenseStorage_impl { } m_rows = rows; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * Cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data; } + EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC constexpr Index size() const { return m_rows * Cols; } + EIGEN_DEVICE_FUNC constexpr T* data() { return m_data; } + EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data; } }; template class DenseStorage_impl { @@ -406,7 +376,7 @@ class DenseStorage_impl { public: static constexpr int Size = Dynamic; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) : m_data(conditional_aligned_new_auto(other.size())), m_cols(other.m_cols) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) @@ -416,7 +386,7 @@ class DenseStorage_impl { : m_data(conditional_aligned_new_auto(size)), m_cols(cols) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept : m_data(other.m_data), m_cols(other.m_cols) { other.m_data = nullptr; other.m_cols = 0; @@ -427,11 +397,11 @@ class DenseStorage_impl { smart_copy(other.m_data, other.m_data + other.size(), m_data); return *this; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept { + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept { this->swap(other); return *this; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { + EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage_impl& other) noexcept { numext::swap(m_data, other.m_data); numext::swap(m_cols, other.m_cols); } @@ -448,11 +418,11 @@ class DenseStorage_impl { } m_cols = cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * m_cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data; } + EIGEN_DEVICE_FUNC constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC constexpr Index size() const { return Rows * m_cols; } + EIGEN_DEVICE_FUNC constexpr T* data() { return m_data; } + EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data; } }; template class DenseStorage_impl { @@ -463,7 +433,7 @@ class DenseStorage_impl { public: static constexpr int Size = Dynamic; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl() = default; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) : m_data(conditional_aligned_new_auto(other.size())), m_rows(other.m_rows), m_cols(other.m_cols) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) @@ -473,7 +443,7 @@ class DenseStorage_impl { : m_data(conditional_aligned_new_auto(size)), m_rows(rows), m_cols(cols) { EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) { other.m_data = nullptr; other.m_rows = 0; @@ -485,11 +455,11 @@ class DenseStorage_impl { smart_copy(other.m_data, other.m_data + other.size(), m_data); return *this; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept { + EIGEN_DEVICE_FUNC constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept { this->swap(other); return *this; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { + EIGEN_DEVICE_FUNC constexpr void swap(DenseStorage_impl& other) noexcept { numext::swap(m_data, other.m_data); numext::swap(m_rows, other.m_rows); numext::swap(m_cols, other.m_cols); @@ -509,11 +479,11 @@ class DenseStorage_impl { m_rows = rows; m_cols = cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * m_cols; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data; } + EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC constexpr Index size() const { return m_rows * m_cols; } + EIGEN_DEVICE_FUNC constexpr T* data() { return m_data; } + EIGEN_DEVICE_FUNC constexpr const T* data() const { return m_data; } }; template struct use_default_move { @@ -542,15 +512,14 @@ class DenseStorage : public internal::DenseStorage_impl; public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage() = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(const DenseStorage&) = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(Index size, Index rows, Index cols) - : Base(size, rows, cols) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(const DenseStorage&) = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage() = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage&) = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage(Index size, Index rows, Index cols) : Base(size, rows, cols) {} + EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(const DenseStorage&) = default; // if DenseStorage meets the requirements of use_default_move, then use the move construction and move assignment // operation defined in DenseStorage_impl, or the compiler-generated version if none is defined - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(DenseStorage&&) = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(DenseStorage&&) = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage(DenseStorage&&) = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(DenseStorage&&) = default; }; template class DenseStorage @@ -558,16 +527,15 @@ class DenseStorage using Base = internal::DenseStorage_impl; public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage() = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(const DenseStorage&) = default; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(Index size, Index rows, Index cols) - : Base(size, rows, cols) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(const DenseStorage&) = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage() = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage(const DenseStorage&) = default; + EIGEN_DEVICE_FUNC constexpr DenseStorage(Index size, Index rows, Index cols) : Base(size, rows, cols) {} + EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(const DenseStorage&) = default; // if DenseStorage does not meet the requirements of use_default_move, then defer to the copy construction and copy // assignment behavior - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(DenseStorage&& other) + EIGEN_DEVICE_FUNC constexpr DenseStorage(DenseStorage&& other) : DenseStorage(static_cast(other)) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(DenseStorage&& other) { + EIGEN_DEVICE_FUNC constexpr DenseStorage& operator=(DenseStorage&& other) { *this = other; return *this; } diff --git a/Eigen/src/Core/DeviceWrapper.h b/Eigen/src/Core/DeviceWrapper.h index 012dce10d1d..1a326ae3bae 100644 --- a/Eigen/src/Core/DeviceWrapper.h +++ b/Eigen/src/Core/DeviceWrapper.h @@ -87,7 +87,7 @@ template struct dense_assignment_loop_with_device { using Base = dense_assignment_loop; - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Device&) { Base::run(kernel); } + static EIGEN_DEVICE_FUNC constexpr void run(Kernel& kernel, Device&) { Base::run(kernel); } }; // entry point for a generic expression with device @@ -104,7 +104,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(De using ActualDstType = std::conditional_t, Dst&>; ActualDstType actualDst(dst.derived()); - // TODO check whether this is the right place to perform these checks: + // TODO: check whether this is the right place to perform these checks: EIGEN_STATIC_ASSERT_LVALUE(Dst) EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned, Src) EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename ActualDstTypeCleaned::Scalar, typename Src::Scalar); diff --git a/Eigen/src/Core/Diagonal.h b/Eigen/src/Core/Diagonal.h index ff8611c6071..61a47842e01 100644 --- a/Eigen/src/Core/Diagonal.h +++ b/Eigen/src/Core/Diagonal.h @@ -71,14 +71,14 @@ class Diagonal : public internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal) - EIGEN_DEVICE_FUNC explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) + EIGEN_DEVICE_FUNC constexpr explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) { eigen_assert(a_index <= m_matrix.cols() && -a_index <= m_matrix.rows()); } EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal) - EIGEN_DEVICE_FUNC inline Index rows() const { + EIGEN_DEVICE_FUNC constexpr inline Index rows() const { return m_index.value() < 0 ? numext::mini(m_matrix.cols(), m_matrix.rows() + m_index.value()) : numext::mini(m_matrix.rows(), m_matrix.cols() - m_index.value()); } @@ -91,8 +91,12 @@ class Diagonal : public internal::dense_xpr_base::value, Scalar, const Scalar> ScalarWithConstIfNotLvalue; - EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); } - EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(m_matrix.coeffRef(rowOffset(), colOffset())); } + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { + return rows() > 0 ? &(m_matrix.coeffRef(rowOffset(), colOffset())) : nullptr; + } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { + return rows() > 0 ? &(m_matrix.coeffRef(rowOffset(), colOffset())) : nullptr; + } EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index) { EIGEN_STATIC_ASSERT_LVALUE(MatrixType) @@ -120,11 +124,12 @@ class Diagonal : public internal::dense_xpr_base& nestedExpression() const { + EIGEN_DEVICE_FUNC constexpr inline const internal::remove_all_t& nestedExpression() + const { return m_matrix; } - EIGEN_DEVICE_FUNC inline Index index() const { return m_index.value(); } + EIGEN_DEVICE_FUNC constexpr inline Index index() const { return m_index.value(); } protected: typename internal::ref_selector::non_const_type m_matrix; @@ -132,15 +137,11 @@ class Diagonal : public internal::dense_xpr_base 0 ? m_index.value() : -m_index.value(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rowOffset() const noexcept { - return m_index.value() > 0 ? 0 : -m_index.value(); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index colOffset() const noexcept { - return m_index.value() > 0 ? m_index.value() : 0; - } + EIGEN_DEVICE_FUNC constexpr Index rowOffset() const noexcept { return m_index.value() > 0 ? 0 : -m_index.value(); } + EIGEN_DEVICE_FUNC constexpr Index colOffset() const noexcept { return m_index.value() > 0 ? m_index.value() : 0; } // trigger a compile-time error if someone try to call packet template typename MatrixType::PacketReturnType packet(Index) const; @@ -157,13 +158,13 @@ class Diagonal : public internal::dense_xpr_base -EIGEN_DEVICE_FUNC inline typename MatrixBase::DiagonalReturnType MatrixBase::diagonal() { +EIGEN_DEVICE_FUNC constexpr typename MatrixBase::DiagonalReturnType MatrixBase::diagonal() { return DiagonalReturnType(derived()); } /** This is the const version of diagonal(). */ template -EIGEN_DEVICE_FUNC inline const typename MatrixBase::ConstDiagonalReturnType MatrixBase::diagonal() +EIGEN_DEVICE_FUNC constexpr const typename MatrixBase::ConstDiagonalReturnType MatrixBase::diagonal() const { return ConstDiagonalReturnType(derived()); } @@ -180,13 +181,14 @@ EIGEN_DEVICE_FUNC inline const typename MatrixBase::ConstDiagonalReturn * * \sa MatrixBase::diagonal(), class Diagonal */ template -EIGEN_DEVICE_FUNC inline Diagonal MatrixBase::diagonal(Index index) { +EIGEN_DEVICE_FUNC constexpr Diagonal MatrixBase::diagonal(Index index) { return Diagonal(derived(), index); } /** This is the const version of diagonal(Index). */ template -EIGEN_DEVICE_FUNC inline const Diagonal MatrixBase::diagonal(Index index) const { +EIGEN_DEVICE_FUNC constexpr const Diagonal MatrixBase::diagonal( + Index index) const { return Diagonal(derived(), index); } @@ -203,14 +205,14 @@ EIGEN_DEVICE_FUNC inline const Diagonal MatrixBase< * \sa MatrixBase::diagonal(), class Diagonal */ template template -EIGEN_DEVICE_FUNC inline Diagonal MatrixBase::diagonal() { +EIGEN_DEVICE_FUNC constexpr Diagonal MatrixBase::diagonal() { return Diagonal(derived()); } /** This is the const version of diagonal(). */ template template -EIGEN_DEVICE_FUNC inline const Diagonal MatrixBase::diagonal() const { +EIGEN_DEVICE_FUNC constexpr const Diagonal MatrixBase::diagonal() const { return Diagonal(derived()); } diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index 52630d9297e..dda6c8c2fe9 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -61,7 +61,7 @@ class DiagonalBase : public EigenBase { /** * Constructs a dense matrix from \c *this. Note, this directly returns a dense matrix type, * not an expression. - * \returns A dense matrix, with its diagonal entries set from the the derived object. */ + * \returns A dense matrix, with its diagonal entries set from the derived object. */ EIGEN_DEVICE_FUNC DenseMatrixType toDenseMatrix() const { return derived(); } /** \returns a reference to the derived object's vector of diagonal coefficients. */ @@ -184,21 +184,22 @@ class DiagonalMatrix : public DiagonalBase - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DiagonalMatrix(const Scalar& a0, const Scalar& a1, const Scalar& a2, - const ArgTypes&... args) + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE DiagonalMatrix(const Scalar& a0, const Scalar& a1, const Scalar& a2, + const ArgTypes&... args) : m_diagonal(a0, a1, a2, args...) {} /** \brief Constructs a DiagonalMatrix and initializes it by elements given by an initializer list of initializer - * lists \cpp11 + * lists */ EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE DiagonalMatrix( const std::initializer_list>& list) : m_diagonal(list) {} /** \brief Constructs a DiagonalMatrix from an r-value diagonal vector type */ - EIGEN_DEVICE_FUNC explicit inline DiagonalMatrix(DiagonalVectorType&& diag) : m_diagonal(std::move(diag)) {} + EIGEN_DEVICE_FUNC constexpr explicit inline DiagonalMatrix(DiagonalVectorType&& diag) : m_diagonal(std::move(diag)) {} /** Copy constructor. */ template - EIGEN_DEVICE_FUNC inline DiagonalMatrix(const DiagonalBase& other) : m_diagonal(other.diagonal()) {} + EIGEN_DEVICE_FUNC constexpr inline DiagonalMatrix(const DiagonalBase& other) + : m_diagonal(other.diagonal()) {} #ifndef EIGEN_PARSED_BY_DOXYGEN /** copy constructor. prevent a default copy constructor from hiding the other templated constructor */ @@ -234,7 +236,8 @@ class DiagonalMatrix : public DiagonalBase - EIGEN_DEVICE_FUNC explicit inline DiagonalMatrix(const MatrixBase& other) : m_diagonal(other) {} + EIGEN_DEVICE_FUNC constexpr explicit inline DiagonalMatrix(const MatrixBase& other) + : m_diagonal(other) {} /** Copy operator. */ template @@ -325,10 +328,11 @@ class DiagonalWrapper : public DiagonalBase #endif /** Constructor from expression of diagonal coefficients to wrap. */ - EIGEN_DEVICE_FUNC explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {} + EIGEN_DEVICE_FUNC constexpr explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) + : m_diagonal(a_diagonal) {} /** \returns a const reference to the wrapped expression of diagonal coefficients. */ - EIGEN_DEVICE_FUNC const DiagonalVectorType& diagonal() const { return m_diagonal; } + EIGEN_DEVICE_FUNC constexpr const DiagonalVectorType& diagonal() const { return m_diagonal; } protected: typename DiagonalVectorType::Nested m_diagonal; @@ -344,7 +348,7 @@ class DiagonalWrapper : public DiagonalBase * \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal() **/ template -EIGEN_DEVICE_FUNC inline const DiagonalWrapper MatrixBase::asDiagonal() const { +EIGEN_DEVICE_FUNC constexpr const DiagonalWrapper MatrixBase::asDiagonal() const { return DiagonalWrapper(derived()); } @@ -372,6 +376,55 @@ bool MatrixBase::isDiagonal(const RealScalar& prec) const { return true; } +/** \returns DiagonalWrapper. + * + * Example: \include MatrixBase_diagonalView.cpp + * Output: \verbinclude MatrixBase_diagonalView.out + * + * \sa diagonalView() + */ + +/** This is the non-const version of diagonalView() with DiagIndex_ . */ +template +template +EIGEN_DEVICE_FUNC constexpr DiagonalWrapper> MatrixBase::diagonalView() { + typedef Diagonal DiagType; + typedef DiagonalWrapper ReturnType; + DiagType diag(this->derived()); + return ReturnType(diag); +} + +/** This is the const version of diagonalView() with DiagIndex_ . */ +template +template +EIGEN_DEVICE_FUNC constexpr DiagonalWrapper> MatrixBase::diagonalView() + const { + typedef Diagonal DiagType; + typedef DiagonalWrapper ReturnType; + DiagType diag(this->derived()); + return ReturnType(diag); +} + +/** This is the non-const version of diagonalView() with dynamic index. */ +template +EIGEN_DEVICE_FUNC constexpr DiagonalWrapper> MatrixBase::diagonalView( + Index index) { + typedef Diagonal DiagType; + typedef DiagonalWrapper ReturnType; + DiagType diag(this->derived(), index); + return ReturnType(diag); +} + +/** This is the const version of diagonalView() with dynamic index. */ +template +EIGEN_DEVICE_FUNC constexpr DiagonalWrapper> MatrixBase::diagonalView( + Index index) const { + typedef Diagonal DiagType; + typedef DiagonalWrapper ReturnType; + DiagType diag(this->derived(), index); + return ReturnType(diag); +} + namespace internal { template <> diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index 059527c85f8..d0a30dd8210 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -20,15 +20,14 @@ namespace internal { template ::Scalar> struct squared_norm_impl { using Real = typename NumTraits::Real; - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Real run(const Derived& a) { - Scalar result = a.unaryExpr(squared_norm_functor()).sum(); - return numext::real(result) + numext::imag(result); + static EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Real run(const Derived& a) { + return a.realView().cwiseAbs2().sum(); } }; template struct squared_norm_impl { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(const Derived& a) { return a.any(); } + static EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE bool run(const Derived& a) { return a.any(); } }; } // end namespace internal @@ -46,7 +45,7 @@ struct squared_norm_impl { */ template template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE typename ScalarBinaryOpTraits::Scalar, typename internal::traits::Scalar>::ReturnType MatrixBase::dot(const MatrixBase& other) const { @@ -57,19 +56,19 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE /** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the squared Frobenius norm. * In both cases, it consists in the sum of the square of all the matrix entries. - * For vectors, this is also equals to the dot product of \c *this with itself. + * For vectors, this is also equal to the dot product of \c *this with itself. * * \sa dot(), norm(), lpNorm() */ template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits::Scalar>::Real +EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE typename NumTraits::Scalar>::Real MatrixBase::squaredNorm() const { return internal::squared_norm_impl::run(derived()); } /** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm. * In both cases, it consists in the square root of the sum of the square of all the matrix entries. - * For vectors, this is also equals to the square root of the dot product of \c *this with itself. + * For vectors, this is also equal to the square root of the dot product of \c *this with itself. * * \sa lpNorm(), dot(), squaredNorm() */ diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h index c9a6e88e2f7..dfe4a640431 100644 --- a/Eigen/src/Core/EigenBase.h +++ b/Eigen/src/Core/EigenBase.h @@ -53,7 +53,7 @@ struct EigenBase { EIGEN_DEVICE_FUNC inline constexpr Derived& const_cast_derived() const { return *static_cast(const_cast(this)); } - EIGEN_DEVICE_FUNC inline const Derived& const_derived() const { return *static_cast(this); } + EIGEN_DEVICE_FUNC constexpr inline const Derived& const_derived() const { return *static_cast(this); } /** \returns the number of rows. \sa cols(), RowsAtCompileTime */ EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return derived().rows(); } @@ -65,13 +65,13 @@ struct EigenBase { /** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */ template - EIGEN_DEVICE_FUNC inline void evalTo(Dest& dst) const { + EIGEN_DEVICE_FUNC constexpr inline void evalTo(Dest& dst) const { derived().evalTo(dst); } /** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */ template - EIGEN_DEVICE_FUNC inline void addTo(Dest& dst) const { + EIGEN_DEVICE_FUNC constexpr inline void addTo(Dest& dst) const { // This is the default implementation, // derived class can reimplement it in a more optimized way. typename Dest::PlainObject res(rows(), cols()); @@ -81,7 +81,7 @@ struct EigenBase { /** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */ template - EIGEN_DEVICE_FUNC inline void subTo(Dest& dst) const { + EIGEN_DEVICE_FUNC constexpr inline void subTo(Dest& dst) const { // This is the default implementation, // derived class can reimplement it in a more optimized way. typename Dest::PlainObject res(rows(), cols()); @@ -91,7 +91,7 @@ struct EigenBase { /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */ template - EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const { + EIGEN_DEVICE_FUNC constexpr inline void applyThisOnTheRight(Dest& dst) const { // This is the default implementation, // derived class can reimplement it in a more optimized way. dst = dst * this->derived(); @@ -99,7 +99,7 @@ struct EigenBase { /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */ template - EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const { + EIGEN_DEVICE_FUNC constexpr inline void applyThisOnTheLeft(Dest& dst) const { // This is the default implementation, // derived class can reimplement it in a more optimized way. dst = this->derived() * dst; @@ -125,21 +125,21 @@ struct EigenBase { */ template template -EIGEN_DEVICE_FUNC Derived& DenseBase::operator=(const EigenBase& other) { +EIGEN_DEVICE_FUNC constexpr Derived& DenseBase::operator=(const EigenBase& other) { call_assignment(derived(), other.derived()); return derived(); } template template -EIGEN_DEVICE_FUNC Derived& DenseBase::operator+=(const EigenBase& other) { +EIGEN_DEVICE_FUNC constexpr Derived& DenseBase::operator+=(const EigenBase& other) { call_assignment(derived(), other.derived(), internal::add_assign_op()); return derived(); } template template -EIGEN_DEVICE_FUNC Derived& DenseBase::operator-=(const EigenBase& other) { +EIGEN_DEVICE_FUNC constexpr Derived& DenseBase::operator-=(const EigenBase& other) { call_assignment(derived(), other.derived(), internal::sub_assign_op()); return derived(); } diff --git a/Eigen/src/Core/Fill.h b/Eigen/src/Core/Fill.h index 9d4ecd445a4..ccbeb50f0c7 100644 --- a/Eigen/src/Core/Fill.h +++ b/Eigen/src/Core/Fill.h @@ -20,11 +20,14 @@ namespace internal { template struct eigen_fill_helper : std::false_type {}; +// Only enable std::fill_n for trivially copyable scalars. GCC's libstdc++ +// fill_n pessimizes non-trivially-copyable types (extra moves per iteration), +// causing measurable regressions for types like AutoDiffScalar (issue #2956). template -struct eigen_fill_helper> : std::true_type {}; +struct eigen_fill_helper> : std::is_trivially_copyable {}; template -struct eigen_fill_helper> : std::true_type {}; +struct eigen_fill_helper> : std::is_trivially_copyable {}; template struct eigen_fill_helper> : eigen_fill_helper {}; @@ -60,12 +63,12 @@ struct eigen_fill_impl { using Func = scalar_constant_op; using PlainObject = typename Xpr::PlainObject; using Constant = typename PlainObject::ConstantReturnType; - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst, const Scalar& val) { + static EIGEN_DEVICE_FUNC constexpr void run(Xpr& dst, const Scalar& val) { const Constant src(dst.rows(), dst.cols(), val); run(dst, src); } template - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst, const SrcXpr& src) { + static EIGEN_DEVICE_FUNC constexpr void run(Xpr& dst, const SrcXpr& src) { call_dense_assignment_loop(dst, src, assign_op()); } }; @@ -78,8 +81,9 @@ template struct eigen_fill_impl { using Scalar = typename Xpr::Scalar; static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const Scalar& val) { + const Scalar val_copy = val; using std::fill_n; - fill_n(dst.data(), dst.size(), val); + fill_n(dst.data(), dst.size(), val_copy); } template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const SrcXpr& src) { @@ -92,8 +96,10 @@ struct eigen_fill_impl { template struct eigen_memset_helper { - static constexpr bool value = - std::is_trivially_copyable::value && eigen_fill_helper::value; + using Scalar = typename Xpr::Scalar; + static constexpr bool value = std::is_trivially_copyable::value && + !static_cast(NumTraits::RequireInitialization) && + eigen_fill_helper::value; }; template @@ -101,12 +107,12 @@ struct eigen_zero_impl { using Scalar = typename Xpr::Scalar; using PlainObject = typename Xpr::PlainObject; using Zero = typename PlainObject::ZeroReturnType; - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst) { + static EIGEN_DEVICE_FUNC constexpr void run(Xpr& dst) { const Zero src(dst.rows(), dst.cols()); run(dst, src); } template - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst, const SrcXpr& src) { + static EIGEN_DEVICE_FUNC constexpr void run(Xpr& dst, const SrcXpr& src) { call_dense_assignment_loop(dst, src, assign_op()); } }; @@ -114,17 +120,15 @@ struct eigen_zero_impl { template struct eigen_zero_impl { using Scalar = typename Xpr::Scalar; - static constexpr size_t max_bytes = (std::numeric_limits::max)(); static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst) { - const size_t num_bytes = dst.size() * sizeof(Scalar); - if (num_bytes == 0) return; + const std::ptrdiff_t num_bytes = dst.size() * static_cast(sizeof(Scalar)); + if (num_bytes <= 0) return; void* dst_ptr = static_cast(dst.data()); #ifndef EIGEN_NO_DEBUG - if (num_bytes > max_bytes) throw_std_bad_alloc(); eigen_assert((dst_ptr != nullptr) && "null pointer dereference error!"); #endif EIGEN_USING_STD(memset); - memset(dst_ptr, 0, num_bytes); + memset(dst_ptr, 0, static_cast(num_bytes)); } template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const SrcXpr& src) { diff --git a/Eigen/src/Core/FindCoeff.h b/Eigen/src/Core/FindCoeff.h index 0102e8af3ab..b2645d8e83e 100644 --- a/Eigen/src/Core/FindCoeff.h +++ b/Eigen/src/Core/FindCoeff.h @@ -34,11 +34,11 @@ struct max_coeff_functor { template struct max_coeff_functor { - EIGEN_DEVICE_FUNC inline Scalar compareCoeff(const Scalar& incumbent, const Scalar& candidate) { + EIGEN_DEVICE_FUNC inline Scalar compareCoeff(const Scalar& incumbent, const Scalar& candidate) const { return (candidate > incumbent) || ((candidate != candidate) && (incumbent == incumbent)); } template - EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) { + EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) const { return pandnot(pcmp_lt_or_nan(incumbent, candidate), pisnan(incumbent)); } template @@ -79,11 +79,11 @@ struct min_coeff_functor { template struct min_coeff_functor { - EIGEN_DEVICE_FUNC inline Scalar compareCoeff(const Scalar& incumbent, const Scalar& candidate) { + EIGEN_DEVICE_FUNC inline Scalar compareCoeff(const Scalar& incumbent, const Scalar& candidate) const { return (candidate < incumbent) || ((candidate != candidate) && (incumbent == incumbent)); } template - EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) { + EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) const { return pandnot(pcmp_lt_or_nan(candidate, incumbent), pisnan(incumbent)); } template @@ -173,6 +173,10 @@ struct find_coeff_loop { Index& inner) { Index outerSize = eval.outerSize(); Index innerSize = eval.innerSize(); + if (innerSize < PacketSize) { + ScalarImpl::run(eval, func, result, outer, inner); + return; + } Index packetEnd = numext::round_down(innerSize, PacketSize); /* initialization performed in calling function */ @@ -229,6 +233,10 @@ struct find_coeff_loop { static EIGEN_DEVICE_FUNC inline void run(const Evaluator& eval, Func& func, Scalar& result, Index& index) { Index size = eval.size(); + if (size < PacketSize) { + ScalarImpl::run(eval, func, result, index); + return; + } Index packetEnd = numext::round_down(size, PacketSize); /* initialization performed in calling function */ diff --git a/Eigen/src/Core/ForceAlignedAccess.h b/Eigen/src/Core/ForceAlignedAccess.h index 55beab35a19..4f69c20f93a 100644 --- a/Eigen/src/Core/ForceAlignedAccess.h +++ b/Eigen/src/Core/ForceAlignedAccess.h @@ -39,7 +39,7 @@ class ForceAlignedAccess : public internal::dense_xpr_base::type Base; EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess) - EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC explicit constexpr ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_expression.rows(); } EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_expression.cols(); } @@ -103,25 +103,6 @@ inline ForceAlignedAccess MatrixBase::forceAlignedAccess() { return ForceAlignedAccess(derived()); } -/** \returns an expression of *this with forced aligned access if \a Enable is true. - * \sa forceAlignedAccess(), class ForceAlignedAccess - */ -template -template -inline add_const_on_value_type_t, Derived&>> -MatrixBase::forceAlignedAccessIf() const { - return derived(); // FIXME This should not work but apparently is never used -} - -/** \returns an expression of *this with forced aligned access if \a Enable is true. - * \sa forceAlignedAccess(), class ForceAlignedAccess - */ -template -template -inline std::conditional_t, Derived&> MatrixBase::forceAlignedAccessIf() { - return derived(); // FIXME This should not work but apparently is never used -} - } // end namespace Eigen #endif // EIGEN_FORCEALIGNEDACCESS_H diff --git a/Eigen/src/Core/Fuzzy.h b/Eigen/src/Core/Fuzzy.h index ed6b4ffead7..eaa553c93cd 100644 --- a/Eigen/src/Core/Fuzzy.h +++ b/Eigen/src/Core/Fuzzy.h @@ -86,8 +86,8 @@ struct isMuchSmallerThan_scalar_selector { */ template template -EIGEN_DEVICE_FUNC bool DenseBase::isApprox(const DenseBase& other, - const RealScalar& prec) const { +EIGEN_DEVICE_FUNC constexpr bool DenseBase::isApprox(const DenseBase& other, + const RealScalar& prec) const { return internal::isApprox_selector::run(derived(), other.derived(), prec); } @@ -105,8 +105,8 @@ EIGEN_DEVICE_FUNC bool DenseBase::isApprox(const DenseBase&, RealScalar) const */ template -EIGEN_DEVICE_FUNC bool DenseBase::isMuchSmallerThan(const typename NumTraits::Real& other, - const RealScalar& prec) const { +EIGEN_DEVICE_FUNC constexpr bool DenseBase::isMuchSmallerThan(const typename NumTraits::Real& other, + const RealScalar& prec) const { return internal::isMuchSmallerThan_scalar_selector::run(derived(), other, prec); } @@ -122,8 +122,8 @@ EIGEN_DEVICE_FUNC bool DenseBase::isMuchSmallerThan(const typename NumT */ template template -EIGEN_DEVICE_FUNC bool DenseBase::isMuchSmallerThan(const DenseBase& other, - const RealScalar& prec) const { +EIGEN_DEVICE_FUNC constexpr bool DenseBase::isMuchSmallerThan(const DenseBase& other, + const RealScalar& prec) const { return internal::isMuchSmallerThan_object_selector::run(derived(), other.derived(), prec); } diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index e4c51d2a6f6..707611a82ff 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -89,7 +89,7 @@ struct product_type { /* The following allows to select the kind of product at compile time * based on the three dimensions of the product. * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */ -// FIXME I'm not sure the current mapping is the ideal one. +// FIXME: the current compile-time product-type mapping may not be optimal. template struct product_type_selector { enum { ret = OuterProduct }; @@ -193,12 +193,11 @@ struct product_type_selector { * Implementation of Inner Vector Vector Product ***********************************************************************/ -// FIXME : maybe the "inner product" could return a Scalar -// instead of a 1x1 matrix ?? -// Pro: more natural for the user -// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix -// product ends up to a row-vector times col-vector product... To tackle this use -// case, we could have a specialization for Block with: operator=(Scalar x); +// FIXME: consider returning a Scalar instead of a 1x1 matrix for inner products. +// Pro: more natural for the user. +// Con: in a meta-unrolled algorithm a matrix-matrix product may reduce to a +// row-vector times column-vector product. To handle this, we could specialize +// Block with operator=(Scalar x). /*********************************************************************** * Implementation of Outer Vector Vector Product @@ -208,7 +207,7 @@ struct product_type_selector { * Implementation of General Matrix Vector Product ***********************************************************************/ -/* According to the shape/flags of the matrix we have to distinghish 3 different cases: +/* According to the shape/flags of the matrix we have to distinguish 3 different cases: * 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine * 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine * 3 - all other cases are handled using a simple loop along the outer-storage direction. @@ -229,7 +228,7 @@ struct gemv_static_vector_if; template struct gemv_static_vector_if { - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Scalar* data() { + EIGEN_DEVICE_FUNC constexpr Scalar* data() { eigen_internal_assert(false && "should never be called"); return 0; } @@ -237,19 +236,19 @@ struct gemv_static_vector_if { template struct gemv_static_vector_if { - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Scalar* data() { return 0; } + EIGEN_DEVICE_FUNC constexpr Scalar* data() { return 0; } }; template struct gemv_static_vector_if { #if EIGEN_MAX_STATIC_ALIGN_BYTES != 0 internal::plain_array m_data; - EIGEN_STRONG_INLINE constexpr Scalar* data() { return m_data.array; } + constexpr Scalar* data() { return m_data.array; } #else // Some architectures cannot align on the stack, // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. internal::plain_array m_data; - EIGEN_STRONG_INLINE constexpr Scalar* data() { + constexpr Scalar* data() { return reinterpret_cast((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) + EIGEN_MAX_ALIGN_BYTES); } @@ -293,7 +292,7 @@ struct gemv_dense_selector { typedef std::conditional_t ActualDest; enum { - // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 + // FIXME: find a way to allow an inner stride on the result if packet_traits::size==1 // on, the other hand it is good for the cache to pack the vector anyways... EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime == 1), ComplexByReal = (NumTraits::IsComplex) && (!NumTraits::IsComplex), @@ -376,7 +375,7 @@ struct gemv_dense_selector { ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs); enum { - // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 + // FIXME: find a way to allow an inner stride on the result if packet_traits::size==1 // on, the other hand it is good for the cache to pack the vector anyways... DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime == 1 || ActualRhsTypeCleaned::MaxSizeAtCompileTime == 0 @@ -417,7 +416,7 @@ struct gemv_dense_selector { static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) { EIGEN_STATIC_ASSERT((!nested_eval::Evaluate), EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE); - // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, + // TODO: if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, // otherwise use a temp typename nested_eval::type actual_rhs(rhs); const Index size = rhs.rows(); diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index e1d62fa170d..4ef92dbf132 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -57,15 +57,14 @@ struct default_packet_traits { HasConj = 1, HasSetLinear = 1, HasSign = 1, + HasAbsDiff = 1, // By default, the nearest integer functions (rint, round, floor, ceil, trunc) are enabled for all scalar and packet // types HasRound = 1, HasArg = 0, - HasAbsDiff = 0, - HasBlend = 0, // This flag is used to indicate whether packet comparison is supported. - // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true. + // pcmp_eq and pcmp_lt should be defined for it to be true. HasCmp = 0, HasDiv = 0, @@ -88,6 +87,8 @@ struct default_packet_traits { HasATanh = 0, HasSinh = 0, HasCosh = 0, + HasASinh = 0, + HasACosh = 0, HasTanh = 0, HasLGamma = 0, HasDiGamma = 0, @@ -117,6 +118,7 @@ struct packet_traits : default_packet_traits { enum { HasAdd = 0, HasSub = 0, + HasAbsDiff = 0, HasMul = 0, HasNegate = 0, HasAbs = 0, @@ -131,17 +133,18 @@ struct packet_traits : default_packet_traits { template struct packet_traits : packet_traits {}; +struct default_unpacket_traits { + enum { vectorizable = false, masked_load_available = false, masked_store_available = false }; +}; + template -struct unpacket_traits { +struct unpacket_traits : default_unpacket_traits { typedef T type; typedef T half; typedef typename numext::get_integer_by_size::signed_type integer_packet; enum { size = 1, alignment = alignof(T), - vectorizable = false, - masked_load_available = false, - masked_store_available = false }; }; @@ -253,6 +256,12 @@ struct preinterpret_generic { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; } }; +template +struct preinterpret_generic::as_real, ComplexPacket, false> { + using RealPacket = typename unpacket_traits::as_real; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealPacket run(const ComplexPacket& a) { return a.v; } +}; + /** \internal \returns reinterpret_cast(a) */ template EIGEN_DEVICE_FUNC inline Target preinterpret(const Packet& a) { @@ -426,30 +435,6 @@ EIGEN_DEVICE_FUNC inline Packet pzero(const Packet& a) { return pzero_impl::run(a); } -/** \internal \returns a <= b as a bit mask */ -template -EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) { - return a <= b ? ptrue(a) : pzero(a); -} - -/** \internal \returns a < b as a bit mask */ -template -EIGEN_DEVICE_FUNC inline Packet pcmp_lt(const Packet& a, const Packet& b) { - return a < b ? ptrue(a) : pzero(a); -} - -/** \internal \returns a == b as a bit mask */ -template -EIGEN_DEVICE_FUNC inline Packet pcmp_eq(const Packet& a, const Packet& b) { - return a == b ? ptrue(a) : pzero(a); -} - -/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */ -template -EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) { - return a >= b ? pzero(a) : ptrue(a); -} - template struct bit_and { EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a & b; } @@ -576,6 +561,30 @@ EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) { return pand(a, pnot(b)); } +/** \internal \returns a < b as a bit mask */ +template +EIGEN_DEVICE_FUNC inline Packet pcmp_lt(const Packet& a, const Packet& b) { + return a < b ? ptrue(a) : pzero(a); +} + +/** \internal \returns a == b as a bit mask */ +template +EIGEN_DEVICE_FUNC inline Packet pcmp_eq(const Packet& a, const Packet& b) { + return a == b ? ptrue(a) : pzero(a); +} + +/** \internal \returns a <= b as a bit mask */ +template +EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) { + return por(pcmp_eq(a, b), pcmp_lt(a, b)); +} + +/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */ +template +EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) { + return a >= b ? pzero(a) : ptrue(a); +} + // In the general case, use bitwise select. template ::value> struct pselect_impl { @@ -603,7 +612,7 @@ EIGEN_DEVICE_FUNC inline bool pselect(const bool& cond, const bool& a, con return cond ? a : b; } -/** \internal \returns the min or of \a a and \a b (coeff-wise) +/** \internal \returns the min or max of \a a and \a b (coeff-wise) If either \a a or \a b are NaN, the result is implementation defined. */ template struct pminmax_impl { @@ -641,7 +650,7 @@ struct pminmax_impl { #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) [](const Type& aa, const Type& bb) { return Func(aa, bb); } /** \internal \returns the min of \a a and \a b (coeff-wise). - If \a a or \b b is NaN, the return value is implementation defined. */ + If \a a or \a b is NaN, the return value is implementation defined. */ template EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) { return numext::mini(a, b); @@ -656,7 +665,7 @@ EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) { } /** \internal \returns the max of \a a and \a b (coeff-wise) - If \a a or \b b is NaN, the return value is implementation defined. */ + If \a a or \a b is NaN, the return value is implementation defined. */ template EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) { return numext::maxi(a, b); @@ -742,9 +751,15 @@ EIGEN_DEVICE_FUNC inline Packet pldexp(const Packet& a, const Packet& exponent) /** \internal \returns the min of \a a and \a b (coeff-wise) */ template -EIGEN_DEVICE_FUNC inline Packet pabsdiff(const Packet& a, const Packet& b) { +EIGEN_DEVICE_FUNC inline std::enable_if_t::type>::IsInteger, Packet> +pabsdiff(const Packet& a, const Packet& b) { return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b)); } +template +EIGEN_DEVICE_FUNC inline std::enable_if_t::type>::IsInteger, Packet> +pabsdiff(const Packet& a, const Packet& b) { + return pabs(psub(a, b)); +} /** \internal \returns a packet version of \a *from, from must be properly aligned */ template @@ -808,10 +823,24 @@ EIGEN_DEVICE_FUNC inline Packet pset1(const typename unpacket_traits::ty template EIGEN_DEVICE_FUNC inline Packet pset1frombits(BitsType a); +template ::value, int> = 0> +EIGEN_DEVICE_FUNC inline Scalar pload1_scalar(const Scalar* a) { + Scalar scalar; + EIGEN_USING_STD(memcpy) + memcpy(&scalar, a, sizeof(Scalar)); + return scalar; +} + +template ::value, int> = 0> +EIGEN_DEVICE_FUNC inline Scalar pload1_scalar(const Scalar* a) { + return Scalar(*a); +} + /** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */ template EIGEN_DEVICE_FUNC inline Packet pload1(const typename unpacket_traits::type* a) { - return pset1(*a); + using Scalar = typename unpacket_traits::type; + return pset1(pload1_scalar(a)); } /** \internal \returns a packet with elements of \a *from duplicated. @@ -821,7 +850,7 @@ EIGEN_DEVICE_FUNC inline Packet pload1(const typename unpacket_traits::t */ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ploaddup(const typename unpacket_traits::type* from) { - return *from; + return pload1(from); } /** \internal \returns a packet with elements of \a *from quadrupled. @@ -997,12 +1026,26 @@ EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) { return a; } -/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */ +/** \internal \returns \a a with real and imaginary parts flipped (for complex types only) */ template EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) { return Packet(numext::imag(a), numext::real(a)); } +/** \internal \returns \a a with real part duplicated (for complex types only) */ +// TODO(rmlarsen): Define and use in all complex backends. +template +EIGEN_DEVICE_FUNC inline Packet pdupreal(const Packet& a) { + return Packet(numext::real(a), numext::real(a)); +} + +/** \internal \returns \a a with imaginary part duplicated (for complex types only) */ +// TODO(rmlarsen): Define and use in all complex backends. +template +EIGEN_DEVICE_FUNC inline Packet pdupimag(const Packet& a) { + return Packet(numext::imag(a), numext::imag(a)); +} + /************************** * Special math functions ***************************/ @@ -1091,6 +1134,20 @@ EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh(const Packet& return atanh(a); } +/** \internal \returns the inverse hyperbolic sine of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasinh(const Packet& a) { + EIGEN_USING_STD(asinh); + return asinh(a); +} + +/** \internal \returns the inverse hyperbolic cosine of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacosh(const Packet& a) { + EIGEN_USING_STD(acosh); + return acosh(a); +} + /** \internal \returns the exp of \a a (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) { @@ -1219,7 +1276,7 @@ EIGEN_DEVICE_FUNC inline typename unpacket_traits::type pfirst(const Pac template EIGEN_DEVICE_FUNC inline std::conditional_t<(unpacket_traits::size % 8) == 0, typename unpacket_traits::half, Packet> -predux_half_dowto4(const Packet& a) { +predux_half(const Packet& a) { return a; } @@ -1302,9 +1359,7 @@ EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_max(const /** \internal \returns true if all coeffs of \a a means "true" * It is supposed to be called on values returned by pcmp_*. */ -// not needed yet -// template EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a) -// { return bool(a); } +// TODO: implement predux_all when needed. /** \internal \returns true if any coeffs of \a a means "true" * It is supposed to be called on values returned by pcmp_*. @@ -1337,27 +1392,27 @@ struct pmadd_impl { return psub(c, pmul(a, b)); } static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) { - return pnegate(pmadd(a, b, c)); + return pnegate(padd(pmul(a, b), c)); } }; template struct pmadd_impl::value && NumTraits::IsSigned>> { static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmadd(const Scalar& a, const Scalar& b, const Scalar& c) { - return numext::fma(a, b, c); + return numext::madd(a, b, c); } static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmsub(const Scalar& a, const Scalar& b, const Scalar& c) { - return numext::fma(a, b, Scalar(-c)); + return numext::madd(a, b, Scalar(-c)); } static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmadd(const Scalar& a, const Scalar& b, const Scalar& c) { - return numext::fma(Scalar(-a), b, c); + return numext::madd(Scalar(-a), b, c); } static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmsub(const Scalar& a, const Scalar& b, const Scalar& c) { - return -Scalar(numext::fma(a, b, c)); + return -Scalar(numext::madd(a, b, c)); } }; -// FMA instructions. +// Multiply-add instructions. /** \internal \returns a * b + c (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b, const Packet& c) { @@ -1471,26 +1526,11 @@ struct PacketBlock { Packet packet[N]; }; -template -EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& /*kernel*/) { +template +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& /*kernel*/) { // Nothing to do in the scalar case, i.e. a 1x1 matrix. } -/*************************************************************************** - * Selector, i.e. vector of N boolean values used to select (i.e. blend) - * words from 2 packets. - ***************************************************************************/ -template -struct Selector { - bool select[N]; -}; - -template -EIGEN_DEVICE_FUNC inline Packet pblend(const Selector::size>& ifPacket, - const Packet& thenPacket, const Packet& elsePacket) { - return ifPacket.select[0] ? thenPacket : elsePacket; -} - /** \internal \returns 1 / a (coeff-wise) */ template EIGEN_DEVICE_FUNC inline Packet preciprocal(const Packet& a) { @@ -1596,9 +1636,10 @@ EIGEN_DEVICE_FUNC inline Packet ploaduSegment(const typename unpacket_traits::type; constexpr Index PacketSize = unpacket_traits::size; eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range"); - Scalar aux[PacketSize]; - memset(static_cast(aux), 0x00, sizeof(Scalar) * PacketSize); - smart_copy(from + begin, from + begin + count, aux + begin); + Scalar aux[PacketSize] = {}; + for (Index k = begin; k < begin + count; k++) { + aux[k] = from[k]; + } return ploadu(aux); } @@ -1619,7 +1660,9 @@ EIGEN_DEVICE_FUNC inline void pstoreuSegment(Scalar* to, const Packet& from, Ind eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range"); Scalar aux[PacketSize]; pstoreu(aux, from); - smart_copy(aux + begin, aux + begin + count, to + begin); + for (Index k = begin; k < begin + count; k++) { + to[k] = aux[k]; + } } /** \internal copy the packet \a from in the range [begin, begin + count) to \a *to. diff --git a/Eigen/src/Core/GlobalFunctions.h b/Eigen/src/Core/GlobalFunctions.h index df1098e27e6..c32aac9acc5 100644 --- a/Eigen/src/Core/GlobalFunctions.h +++ b/Eigen/src/Core/GlobalFunctions.h @@ -130,12 +130,12 @@ using GlobalUnaryPowReturnType = std::enable_if_t< */ #ifdef EIGEN_PARSED_BY_DOXYGEN template -EIGEN_DEVICE_FUNC inline const GlobalUnaryPowReturnType pow(const Eigen::ArrayBase& x, - const ScalarExponent& exponent); +EIGEN_DEVICE_FUNC constexpr inline const GlobalUnaryPowReturnType pow( + const Eigen::ArrayBase& x, const ScalarExponent& exponent); #else template -EIGEN_DEVICE_FUNC inline const GlobalUnaryPowReturnType pow(const Eigen::ArrayBase& x, - const ScalarExponent& exponent) { +EIGEN_DEVICE_FUNC constexpr inline const GlobalUnaryPowReturnType pow( + const Eigen::ArrayBase& x, const ScalarExponent& exponent) { return GlobalUnaryPowReturnType( x.derived(), internal::scalar_unary_pow_op(exponent)); } diff --git a/Eigen/src/Core/IO.h b/Eigen/src/Core/IO.h index 0a1b583d6cc..e2362fcdc55 100644 --- a/Eigen/src/Core/IO.h +++ b/Eigen/src/Core/IO.h @@ -65,7 +65,7 @@ struct IOFormat { fill(_fill), precision(_precision), flags(_flags) { - // TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline + // TODO: check if rowPrefix, rowSuffix or rowSeparator contains a newline // don't add rowSpacer if columns are not to be aligned if ((flags & DontAlignCols)) return; int i = int(matPrefix.length()) - 1; diff --git a/Eigen/src/Core/IndexedView.h b/Eigen/src/Core/IndexedView.h index 358239ca86a..150565ad6a0 100644 --- a/Eigen/src/Core/IndexedView.h +++ b/Eigen/src/Core/IndexedView.h @@ -59,7 +59,7 @@ struct traits> : traits { ReturnAsBlock = (!ReturnAsScalar) && IsBlockAlike, ReturnAsIndexedView = (!ReturnAsScalar) && (!ReturnAsBlock), - // FIXME we deal with compile-time strides if and only if we have DirectAccessBit flag, + // FIXME: we deal with compile-time strides if and only if we have DirectAccessBit flag, // but this is too strict regarding negative strides... DirectAccessMask = (int(InnerIncr) != Undefined && int(OuterIncr) != Undefined && InnerIncr >= 0 && OuterIncr >= 0) ? DirectAccessBit @@ -259,26 +259,27 @@ struct unary_evaluator, IndexBased> Alignment = 0 }; - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_xpr(xpr) { + EIGEN_DEVICE_FUNC constexpr explicit unary_evaluator(const XprType& xpr) + : m_argImpl(xpr.nestedExpression()), m_xpr(xpr) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows() && m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols()); return m_argImpl.coeff(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows() && m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols()); return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { EIGEN_STATIC_ASSERT_LVALUE(XprType) Index row = XprType::RowsAtCompileTime == 1 ? 0 : index; Index col = XprType::RowsAtCompileTime == 1 ? index : 0; @@ -287,7 +288,7 @@ struct unary_evaluator, IndexBased> return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const { Index row = XprType::RowsAtCompileTime == 1 ? 0 : index; Index col = XprType::RowsAtCompileTime == 1 ? index : 0; eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows() && @@ -295,7 +296,7 @@ struct unary_evaluator, IndexBased> return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index index) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index index) const { Index row = XprType::RowsAtCompileTime == 1 ? 0 : index; Index col = XprType::RowsAtCompileTime == 1 ? index : 0; eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows() && @@ -308,6 +309,10 @@ struct unary_evaluator, IndexBased> const XprType& m_xpr; }; +// Catch assignments to an IndexedView. +template +struct evaluator_assume_aliasing> : std::true_type {}; + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/InnerProduct.h b/Eigen/src/Core/InnerProduct.h index 9849d9b1fe4..9c57e7a7aa2 100644 --- a/Eigen/src/Core/InnerProduct.h +++ b/Eigen/src/Core/InnerProduct.h @@ -142,31 +142,36 @@ struct inner_product_impl { const UnsignedIndex numPackets = size / PacketSize; const UnsignedIndex numRemPackets = (packetEnd - quadEnd) / PacketSize; - Packet presult0, presult1, presult2, presult3; - - presult0 = eval.template packet(0 * PacketSize); - if (numPackets >= 2) presult1 = eval.template packet(1 * PacketSize); - if (numPackets >= 3) presult2 = eval.template packet(2 * PacketSize); - if (numPackets >= 4) { - presult3 = eval.template packet(3 * PacketSize); - - for (UnsignedIndex k = 4 * PacketSize; k < quadEnd; k += 4 * PacketSize) { - presult0 = eval.packet(presult0, k + 0 * PacketSize); - presult1 = eval.packet(presult1, k + 1 * PacketSize); - presult2 = eval.packet(presult2, k + 2 * PacketSize); - presult3 = eval.packet(presult3, k + 3 * PacketSize); + Packet presult0 = eval.template packet(0 * PacketSize); + if (numPackets >= 2) { + Packet presult1 = eval.template packet(1 * PacketSize); + if (numPackets >= 3) { + Packet presult2 = eval.template packet(2 * PacketSize); + if (numPackets >= 4) { + Packet presult3 = eval.template packet(3 * PacketSize); + + for (UnsignedIndex k = 4 * PacketSize; k < quadEnd; k += 4 * PacketSize) { + presult0 = eval.packet(presult0, k + 0 * PacketSize); + presult1 = eval.packet(presult1, k + 1 * PacketSize); + presult2 = eval.packet(presult2, k + 2 * PacketSize); + presult3 = eval.packet(presult3, k + 3 * PacketSize); + } + + if (numRemPackets >= 1) { + presult0 = eval.packet(presult0, quadEnd + 0 * PacketSize); + if (numRemPackets >= 2) { + presult1 = eval.packet(presult1, quadEnd + 1 * PacketSize); + if (numRemPackets == 3) presult2 = eval.packet(presult2, quadEnd + 2 * PacketSize); + } + } + + presult2 = padd(presult2, presult3); + } + presult1 = padd(presult1, presult2); } - - if (numRemPackets >= 1) presult0 = eval.packet(presult0, quadEnd + 0 * PacketSize); - if (numRemPackets >= 2) presult1 = eval.packet(presult1, quadEnd + 1 * PacketSize); - if (numRemPackets == 3) presult2 = eval.packet(presult2, quadEnd + 2 * PacketSize); - - presult2 = padd(presult2, presult3); + presult0 = padd(presult0, presult1); } - if (numPackets >= 3) presult1 = padd(presult1, presult2); - if (numPackets >= 2) presult0 = padd(presult0, presult1); - Scalar result = predux(presult0); for (UnsignedIndex k = packetEnd; k < size; k++) { result = eval.coeff(result, k); @@ -211,8 +216,14 @@ struct scalar_inner_product_op { static constexpr bool PacketAccess = false; }; +// Partial specialization for packet access if and only if +// LhsScalar == RhsScalar == ScalarBinaryOpTraits::ReturnType. template -struct scalar_inner_product_op { +struct scalar_inner_product_op< + Scalar, + std::enable_if_t::ReturnType, Scalar>::value, + Scalar>, + Conj> { using result_type = Scalar; using conj_helper = conditional_conj; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a, const Scalar& b) const { diff --git a/Eigen/src/Core/Inverse.h b/Eigen/src/Core/Inverse.h index 79fc3ab6a5a..855e3b3e2d4 100644 --- a/Eigen/src/Core/Inverse.h +++ b/Eigen/src/Core/Inverse.h @@ -49,12 +49,12 @@ class Inverse : public InverseImpl:: typedef typename internal::ref_selector::type Nested; typedef internal::remove_all_t NestedExpression; - explicit EIGEN_DEVICE_FUNC Inverse(const XprType& xpr) : m_xpr(xpr) {} + explicit EIGEN_DEVICE_FUNC constexpr Inverse(const XprType& xpr) : m_xpr(xpr) {} EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_xpr.cols(); } EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_xpr.rows(); } - EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; } + EIGEN_DEVICE_FUNC constexpr const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; } protected: XprTypeNested m_xpr; diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index c740da72603..33b62c2028b 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -100,7 +100,7 @@ class Map : public MapBase > { typedef typename Base::PointerType PointerType; typedef PointerType PointerArgType; - EIGEN_DEVICE_FUNC inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; } + EIGEN_DEVICE_FUNC constexpr inline PointerType cast_to_pointer_type(PointerArgType ptr) const { return ptr; } EIGEN_DEVICE_FUNC constexpr Index innerStride() const { return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; @@ -120,7 +120,7 @@ class Map : public MapBase > { * \param dataPtr pointer to the array to map * \param stride optional Stride object, passing the strides. */ - EIGEN_DEVICE_FUNC explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType()) + EIGEN_DEVICE_FUNC constexpr explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType()) : Base(cast_to_pointer_type(dataPtr)), m_stride(stride) {} /** Constructor in the dynamic-size vector case. @@ -129,7 +129,7 @@ class Map : public MapBase > { * \param size the size of the vector expression * \param stride optional Stride object, passing the strides. */ - EIGEN_DEVICE_FUNC inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType()) + EIGEN_DEVICE_FUNC constexpr inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType()) : Base(cast_to_pointer_type(dataPtr), size), m_stride(stride) {} /** Constructor in the dynamic-size matrix case. @@ -139,7 +139,8 @@ class Map : public MapBase > { * \param cols the number of columns of the matrix expression * \param stride optional Stride object, passing the strides. */ - EIGEN_DEVICE_FUNC inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType()) + EIGEN_DEVICE_FUNC constexpr inline Map(PointerArgType dataPtr, Index rows, Index cols, + const StrideType& stride = StrideType()) : Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride) {} EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map) diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index 5e3d746bcd2..3e117253a93 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -97,23 +97,23 @@ class MapBase : public internal::dense_xpr_basem_data[colId * colStride() + rowId * rowStride()]; } /** \copydoc PlainObjectBase::coeffRef(Index) const */ - EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { + EIGEN_DEVICE_FUNC constexpr inline const Scalar& coeffRef(Index index) const { EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) return this->m_data[index * innerStride()]; } @@ -132,14 +132,14 @@ class MapBase : public internal::dense_xpr_base(); } /** \internal Constructor for dynamically sized vectors */ - EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) + EIGEN_DEVICE_FUNC constexpr inline MapBase(PointerType dataPtr, Index vecSize) : m_data(dataPtr), m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)), m_cols(ColsAtCompileTime == Dynamic ? vecSize : Index(ColsAtCompileTime)) { @@ -150,7 +150,7 @@ class MapBase : public internal::dense_xpr_base= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) && cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols))); @@ -238,11 +238,11 @@ class MapBase : public MapBasem_data; } // no const-cast here so non-const-correct code will give a compile error - EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col) { + EIGEN_DEVICE_FUNC constexpr inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col) { return this->m_data[col * colStride() + row * rowStride()]; } - EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue& coeffRef(Index index) { + EIGEN_DEVICE_FUNC constexpr inline ScalarWithConstIfNotLvalue& coeffRef(Index index) { EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) return this->m_data[index * innerStride()]; } @@ -258,9 +258,9 @@ class MapBase : public MapBase(this->m_data + index * innerStride(), val); } - EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {} - EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {} - EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {} + EIGEN_DEVICE_FUNC constexpr explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {} + EIGEN_DEVICE_FUNC constexpr inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {} + EIGEN_DEVICE_FUNC constexpr inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {} EIGEN_DEVICE_FUNC Derived& operator=(const MapBase& other) { ReadOnlyMapBase::Base::operator=(other); diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 481e057d03e..83e22147dc2 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -11,7 +11,7 @@ #ifndef EIGEN_MATHFUNCTIONS_H #define EIGEN_MATHFUNCTIONS_H -// TODO this should better be moved to NumTraits +// TODO: consider moving these constants to NumTraits. // Source: WolframAlpha #define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L #define EIGEN_LOG2E 1.442695040888963407359924681001892137426645954152985934135449406931109219L @@ -74,7 +74,7 @@ struct global_math_functions_filtering_base< template ::IsComplex> struct real_default_impl { typedef typename NumTraits::Real RealScalar; - EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { return x; } + EIGEN_DEVICE_FUNC static constexpr RealScalar run(const Scalar& x) { return x; } }; template @@ -170,18 +170,24 @@ struct imag_ref_default_impl { template struct imag_ref_default_impl { - EIGEN_DEVICE_FUNC constexpr static Scalar run(Scalar&) { return Scalar(0); } - EIGEN_DEVICE_FUNC constexpr static const Scalar run(const Scalar&) { return Scalar(0); } + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC constexpr static inline RealScalar run(Scalar&) { return RealScalar(0); } + EIGEN_DEVICE_FUNC constexpr static inline RealScalar run(const Scalar&) { return RealScalar(0); } }; template struct imag_ref_impl : imag_ref_default_impl::IsComplex> {}; -template +template ::IsComplex> struct imag_ref_retval { typedef typename NumTraits::Real& type; }; +template +struct imag_ref_retval { + typedef typename NumTraits::Real type; +}; + } // namespace internal namespace numext { @@ -222,7 +228,7 @@ namespace internal { template ::IsComplex> struct conj_default_impl { - EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { return x; } + EIGEN_DEVICE_FUNC static constexpr Scalar run(const Scalar& x) { return x; } }; template @@ -287,7 +293,7 @@ struct sqrt_impl { // Complex sqrt defined in MathFunctionsImpl.h. template -EIGEN_DEVICE_FUNC ComplexT complex_sqrt(const ComplexT& a_x); +EIGEN_DEVICE_FUNC constexpr ComplexT complex_sqrt(const ComplexT& a_x); // Custom implementation is faster than `std::sqrt`, works on // GPU, and correctly handles special cases (unlike MSVC). @@ -307,7 +313,7 @@ struct rsqrt_impl; // Complex rsqrt defined in MathFunctionsImpl.h. template -EIGEN_DEVICE_FUNC ComplexT complex_rsqrt(const ComplexT& a_x); +EIGEN_DEVICE_FUNC constexpr ComplexT complex_rsqrt(const ComplexT& a_x); template struct rsqrt_impl> { @@ -390,7 +396,7 @@ struct cast_impl EIGEN_DEVICE_FUNC inline NewType cast(const OldType& x) { @@ -504,7 +510,7 @@ struct expm1_retval { // Complex log defined in MathFunctionsImpl.h. template -EIGEN_DEVICE_FUNC ComplexT complex_log(const ComplexT& z); +EIGEN_DEVICE_FUNC constexpr ComplexT complex_log(const ComplexT& z); template struct log_impl { @@ -832,8 +838,8 @@ EIGEN_DEVICE_FUNC std::enable_if_t<(std::numeric_limits::has_infinity && !Num template EIGEN_DEVICE_FUNC -std::enable_if_t::has_quiet_NaN || std::numeric_limits::has_signaling_NaN), bool> -isnan_impl(const T&) { + std::enable_if_t::has_quiet_NaN || std::numeric_limits::has_signaling_NaN), bool> + isnan_impl(const T&) { return false; } @@ -895,6 +901,37 @@ struct sign_retval { typedef Scalar type; }; +template ::IsComplex != 0), + bool IsInteger = (NumTraits::IsInteger != 0)> +struct copysign_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& a, const Scalar& b) { + EIGEN_USING_STD(copysign); + return Scalar(copysign(a, b)); + } +}; + +template +struct copysign_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& a, const Scalar& b) { + EIGEN_USING_STD(copysign); + return Scalar(copysign(numext::real(a), numext::real(b)), copysign(numext::imag(a), numext::imag(b))); + } +}; + +template +struct copysign_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& a, const Scalar& b) { + EIGEN_IF_CONSTEXPR(!NumTraits::IsSigned) return a; + const Scalar abs_a = a < Scalar(0) ? -a : a; + return b < Scalar(0) ? -abs_a : abs_a; + } +}; + +template +struct copysign_retval { + typedef Scalar type; +}; + // suppress "unary minus operator applied to unsigned type, result still unsigned" warnings on MSVC // note: `0 - a` is distinct from `-a` when Scalar is a floating point type and `a` is zero @@ -941,23 +978,43 @@ struct nearest_integer_impl { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_trunc(const Scalar& x) { return x; } }; +// Extra namespace to prevent leaking std::fma into Eigen::internal. +namespace has_fma_detail { + +template +struct has_fma_impl : public std::false_type {}; + +using std::fma; + +template +struct has_fma_impl< + T, std::enable_if_t(), std::declval(), std::declval()))>::value>> + : public std::true_type {}; + +} // namespace has_fma_detail + +template +struct has_fma : public has_fma_detail::has_fma_impl {}; + // Default implementation. -template +template struct fma_impl { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar& a, const Scalar& b, const Scalar& c) { - return a * b + c; - } + static_assert(has_fma::value, "No function fma(...) for type. Please provide an implementation."); }; -// ADL version if it exists. +// STD or ADL version if it exists. template -struct fma_impl< - T, - std::enable_if_t(), std::declval(), std::declval()))>::value>> { - static T run(const T& a, const T& b, const T& c) { return fma(a, b, c); } +struct fma_impl::value>> { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T run(const T& a, const T& b, const T& c) { + using std::fma; + return fma(a, b, c); + } }; #if defined(EIGEN_GPUCC) +template <> +struct has_fma : public true_type {}; + template <> struct fma_impl { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float run(const float& a, const float& b, const float& c) { @@ -965,6 +1022,9 @@ struct fma_impl { } }; +template <> +struct has_fma : public true_type {}; + template <> struct fma_impl { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double run(const double& a, const double& b, const double& c) { @@ -973,6 +1033,23 @@ struct fma_impl { }; #endif +// Basic multiply-add. +template +struct madd_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar& x, const Scalar& y, const Scalar& z) { + return x * y + z; + } +}; + +#if EIGEN_SCALAR_MADD_USE_FMA +template +struct madd_impl::value>> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar& x, const Scalar& y, const Scalar& z) { + return fma_impl::run(x, y, z); + } +}; +#endif + } // end namespace internal /**************************************************************************** @@ -983,13 +1060,13 @@ namespace numext { #if (!defined(EIGEN_GPUCC) || defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC)) template -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) { +EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) { EIGEN_USING_STD(min) return min EIGEN_NOT_A_MACRO(x, y); } template -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) { +EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) { EIGEN_USING_STD(max) return max EIGEN_NOT_A_MACRO(x, y); } @@ -1134,6 +1211,11 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(sign, Scalar) sign(const Scalar& return EIGEN_MATHFUNC_IMPL(sign, Scalar)::run(x); } +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(copysign, Scalar) copysign(const Scalar& x, const Scalar& y) { + return EIGEN_MATHFUNC_IMPL(copysign, Scalar)::run(x, y); +} + template EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(negate, Scalar) negate(const Scalar& x) { return EIGEN_MATHFUNC_IMPL(negate, Scalar)::run(x); @@ -1294,7 +1376,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double trunc(const double& x) { // T is assumed to be an integer type with a>=0, and b>0 template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr T div_ceil(T a, T b) { - using UnsignedT = typename internal::make_unsigned::type; + using UnsignedT = std::make_unsigned_t; EIGEN_STATIC_ASSERT((NumTraits::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES) // Note: explicitly declaring a and b as non-negative values allows the compiler to use better optimizations const UnsignedT ua = UnsignedT(a); @@ -1307,8 +1389,8 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr T div_ceil(T a, T b) { // T is assumed to be an integer type with a>=0, and b>0 template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr T round_down(T a, U b) { - using UnsignedT = typename internal::make_unsigned::type; - using UnsignedU = typename internal::make_unsigned::type; + using UnsignedT = std::make_unsigned_t; + using UnsignedU = std::make_unsigned_t; EIGEN_STATIC_ASSERT((NumTraits::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES) EIGEN_STATIC_ASSERT((NumTraits::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES) // Note: explicitly declaring a and b as non-negative values allows the compiler to use better optimizations @@ -1317,6 +1399,12 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr T round_down(T a, U b) { return ub * (ua / ub); } +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T log2(T x) { + EIGEN_USING_STD(log2); + return log2(x); +} + /** Log base 2 for 32 bits positive integers. * Conveniently returns 0 for x==0. */ constexpr int log2(int x) { @@ -1396,17 +1484,17 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double log(const double& x) { #endif template -EIGEN_DEVICE_FUNC -EIGEN_ALWAYS_INLINE std::enable_if_t::IsSigned || NumTraits::IsComplex, typename NumTraits::Real> -abs(const T& x) { +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + std::enable_if_t::IsSigned || NumTraits::IsComplex, typename NumTraits::Real> + abs(const T& x) { EIGEN_USING_STD(abs); return abs(x); } template -EIGEN_DEVICE_FUNC -EIGEN_ALWAYS_INLINE std::enable_if_t::IsSigned || NumTraits::IsComplex), typename NumTraits::Real> -abs(const T& x) { +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + std::enable_if_t::IsSigned || NumTraits::IsComplex), typename NumTraits::Real> + abs(const T& x) { return x; } @@ -1871,7 +1959,8 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double fmod(const double& a, const double& template ::value>> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar logical_shift_left(const Scalar& a, int n) { - return a << n; + using UnsignedScalar = typename numext::get_integer_by_size::unsigned_type; + return bit_cast(bit_cast(a) << n); } template ::value>> @@ -1886,15 +1975,17 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar arithmetic_shift_right(const Scalar return bit_cast(bit_cast(a) >> n); } -// Use std::fma if available. -using std::fma; - -// Otherwise, rely on template implementation. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar fma(const Scalar& x, const Scalar& y, const Scalar& z) { return internal::fma_impl::run(x, y, z); } +// Multiply-add. +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar madd(const Scalar& x, const Scalar& y, const Scalar& z) { + return internal::madd_impl::run(x, y, z); +} + } // end namespace numext namespace internal { @@ -2042,7 +2133,15 @@ struct expm1_impl> { template struct rsqrt_impl { +// C4804: unsafe use of type 'bool' in operation. Unavoidable when instantiated with T=bool. +#if EIGEN_COMP_MSVC +#pragma warning(push) +#pragma warning(disable : 4804) +#endif EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE T run(const T& x) { return T(1) / numext::sqrt(x); } +#if EIGEN_COMP_MSVC +#pragma warning(pop) +#endif }; #if defined(EIGEN_GPU_COMPILE_PHASE) @@ -2054,6 +2153,57 @@ struct conj_impl, true> { }; #endif +// Complex multiply and division operators. +// Note that these do not handle the case if inf+NaNi, which is considered an infinity. +// This is for consistency with our standard pmul, pdiv implementations. +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex complex_multiply(const std::complex& a, + const std::complex& b) { + const T a_real = numext::real(a); + const T a_imag = numext::imag(a); + const T b_real = numext::real(b); + const T b_imag = numext::imag(b); + return std::complex(a_real * b_real - a_imag * b_imag, a_imag * b_real + a_real * b_imag); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex complex_divide_fast(const std::complex& a, + const std::complex& b) { + const T a_real = numext::real(a); + const T a_imag = numext::imag(a); + const T b_real = numext::real(b); + const T b_imag = numext::imag(b); + const T norm = (b_real * b_real + b_imag * b_imag); + return std::complex((a_real * b_real + a_imag * b_imag) / norm, (a_imag * b_real - a_real * b_imag) / norm); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex complex_divide_smith(const std::complex& a, + const std::complex& b) { + const T a_real = numext::real(a); + const T a_imag = numext::imag(a); + const T b_real = numext::real(b); + const T b_imag = numext::imag(b); + // Smith's complex division (https://arxiv.org/pdf/1210.4539.pdf), + // guards against over/under-flow. + const bool scale_imag = numext::abs(b_imag) <= numext::abs(b_real); + const T rscale = scale_imag ? T(1) : b_real / b_imag; + const T iscale = scale_imag ? b_imag / b_real : T(1); + const T denominator = b_real * rscale + b_imag * iscale; + return std::complex((a_real * rscale + a_imag * iscale) / denominator, + (a_imag * rscale - a_real * iscale) / denominator); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::complex complex_divide(const std::complex& a, + const std::complex& b) { +#if EIGEN_FAST_MATH + return complex_divide_fast(a, b); +#else + return complex_divide_smith(a, b); +#endif +} + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h index cf8dcc3b893..243318762e4 100644 --- a/Eigen/src/Core/MathFunctionsImpl.h +++ b/Eigen/src/Core/MathFunctionsImpl.h @@ -28,7 +28,7 @@ namespace internal { 2. If a is zero, approx_a_recip must be infinite with the same sign as a. 3. If a is infinite, approx_a_recip must be zero with the same sign as a. - If the preconditions are satisfied, which they are for for the _*_rcp_ps + If the preconditions are satisfied, which they are for the _*_rcp_ps instructions on x86, the result has a maximum relative error of 2 ulps, and correctly handles reciprocals of zero, infinity, and NaN. */ @@ -37,15 +37,16 @@ struct generic_reciprocal_newton_step { static_assert(Steps > 0, "Steps must be at least 1."); EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& a, const Packet& approx_a_recip) { using Scalar = typename unpacket_traits::type; - const Packet two = pset1(Scalar(2)); + const Packet one = pset1(Scalar(1)); // Refine the approximation using one Newton-Raphson step: // x_{i} = x_{i-1} * (2 - a * x_{i-1}) const Packet x = generic_reciprocal_newton_step::run(a, approx_a_recip); - const Packet tmp = pnmadd(a, x, two); + const Packet tmp = pnmadd(a, x, one); // If tmp is NaN, it means that a is either +/-0 or +/-Inf. // In this case return the approximation directly. const Packet is_not_nan = pcmp_eq(tmp, tmp); - return pselect(is_not_nan, pmul(x, tmp), x); + // Use two FMAs instead of FMA+FMUL to improve precision. + return pselect(is_not_nan, pmadd(x, tmp, x), x); } }; @@ -66,7 +67,7 @@ struct generic_reciprocal_newton_step { 2. If a is zero, approx_a_recip must be infinite with the same sign as a. 3. If a is infinite, approx_a_recip must be zero with the same sign as a. - If the preconditions are satisfied, which they are for for the _*_rcp_ps + If the preconditions are satisfied, which they are for the _*_rcp_ps instructions on x86, the result has a maximum relative error of 2 ulps, and correctly handles zero, infinity, and NaN. Positive denormals are treated as zero. @@ -116,7 +117,7 @@ struct generic_rsqrt_newton_step { 2. If a is zero, approx_rsqrt must be infinite. 3. If a is infinite, approx_rsqrt must be zero. - If the preconditions are satisfied, which they are for for the _*_rsqrt_ps + If the preconditions are satisfied, which they are for the _*_rsqrt_ps instructions on x86, the result has a maximum relative error of 2 ulps, and correctly handles zero and infinity, and NaN. Positive denormal inputs are treated as zero. @@ -147,16 +148,16 @@ struct generic_sqrt_newton_step { }; template -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) { +EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE RealScalar positive_real_hypot(const RealScalar& x, + const RealScalar& y) { // IEEE IEC 6059 special cases. if ((numext::isinf)(x) || (numext::isinf)(y)) return NumTraits::infinity(); if ((numext::isnan)(x) || (numext::isnan)(y)) return NumTraits::quiet_NaN(); EIGEN_USING_STD(sqrt); - RealScalar p, qp; - p = numext::maxi(x, y); + RealScalar p = numext::maxi(x, y); if (numext::is_exactly_zero(p)) return RealScalar(0); - qp = numext::mini(y, x) / p; + RealScalar qp = numext::mini(y, x) / p; return p * sqrt(RealScalar(1) + qp * qp); } @@ -172,7 +173,7 @@ struct hypot_impl { // Generic complex sqrt implementation that correctly handles corner cases // according to https://en.cppreference.com/w/cpp/numeric/complex/sqrt template -EIGEN_DEVICE_FUNC ComplexT complex_sqrt(const ComplexT& z) { +EIGEN_DEVICE_FUNC constexpr ComplexT complex_sqrt(const ComplexT& z) { // Computes the principal sqrt of the input. // // For a complex square root of the number x + i*y. We want to find real @@ -208,7 +209,7 @@ EIGEN_DEVICE_FUNC ComplexT complex_sqrt(const ComplexT& z) { // Generic complex rsqrt implementation. template -EIGEN_DEVICE_FUNC ComplexT complex_rsqrt(const ComplexT& z) { +EIGEN_DEVICE_FUNC constexpr ComplexT complex_rsqrt(const ComplexT& z) { // Computes the principal reciprocal sqrt of the input. // // For a complex reciprocal square root of the number z = x + i*y. We want to @@ -247,7 +248,7 @@ EIGEN_DEVICE_FUNC ComplexT complex_rsqrt(const ComplexT& z) { } template -EIGEN_DEVICE_FUNC ComplexT complex_log(const ComplexT& z) { +EIGEN_DEVICE_FUNC constexpr ComplexT complex_log(const ComplexT& z) { // Computes complex log. using T = typename NumTraits::Real; T a = numext::abs(z); diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index a2c8eba574a..314c01dd9fa 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -207,7 +207,7 @@ class Matrix : public PlainObjectBase::value) { Base::operator=(std::move(other)); return *this; @@ -271,7 +271,7 @@ class Matrix : public PlainObjectBase - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Matrix(const T& x) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE explicit Matrix(const T& x) { Base::template _init1(x); } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y) { Base::template _init2(x, y); } @@ -367,7 +366,7 @@ class Matrix : public PlainObjectBase&) */ template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const EigenBase& other) : Base(other.derived()) {} + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Matrix(const EigenBase& other) + : Base(other.derived()) {} EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return 1; } EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return this->innerSize(); } @@ -433,14 +434,14 @@ class Matrix : public PlainObjectBase` where `Size` can be \c 2,\c 3,\c 4 for fixed size square matrices or \c X for dynamic size. * - `MatrixXSize` and `MatrixSizeX` where `Size` can be \c 2,\c 3,\c 4 for hybrid dynamic/fixed matrices. * - `VectorSize` and `RowVectorSize` for column and row vectors. * - * With \cpp11, you can also use fully generic column and row vector types: `Vector` and + * You can also use fully generic column and row vector types: `Vector` and * `RowVector`. * * \sa class Matrix @@ -484,28 +485,28 @@ EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex, cd) #undef EIGEN_MAKE_TYPEDEFS #undef EIGEN_MAKE_FIXED_TYPEDEFS -#define EIGEN_MAKE_TYPEDEFS(Size, SizeSuffix) \ - /** \ingroup matrixtypedefs */ \ - /** \brief \cpp11 `Size`×`Size` matrix of type `Type`.*/ \ - template \ - using Matrix##SizeSuffix = Matrix; \ - /** \ingroup matrixtypedefs */ \ - /** \brief \cpp11 `Size`×`1` vector of type `Type`.*/ \ - template \ - using Vector##SizeSuffix = Matrix; \ - /** \ingroup matrixtypedefs */ \ - /** \brief \cpp11 `1`×`Size` vector of type `Type`.*/ \ - template \ +#define EIGEN_MAKE_TYPEDEFS(Size, SizeSuffix) \ + /** \ingroup matrixtypedefs */ \ + /** \brief `Size`×`Size` matrix of type `Type`.*/ \ + template \ + using Matrix##SizeSuffix = Matrix; \ + /** \ingroup matrixtypedefs */ \ + /** \brief `Size`×`1` vector of type `Type`.*/ \ + template \ + using Vector##SizeSuffix = Matrix; \ + /** \ingroup matrixtypedefs */ \ + /** \brief `1`×`Size` vector of type `Type`.*/ \ + template \ using RowVector##SizeSuffix = Matrix; -#define EIGEN_MAKE_FIXED_TYPEDEFS(Size) \ - /** \ingroup matrixtypedefs */ \ - /** \brief \cpp11 `Size`×`Dynamic` matrix of type `Type` */ \ - template \ - using Matrix##Size##X = Matrix; \ - /** \ingroup matrixtypedefs */ \ - /** \brief \cpp11 `Dynamic`×`Size` matrix of type `Type`. */ \ - template \ +#define EIGEN_MAKE_FIXED_TYPEDEFS(Size) \ + /** \ingroup matrixtypedefs */ \ + /** \brief `Size`×`Dynamic` matrix of type `Type` */ \ + template \ + using Matrix##Size##X = Matrix; \ + /** \ingroup matrixtypedefs */ \ + /** \brief `Dynamic`×`Size` matrix of type `Type`. */ \ + template \ using Matrix##X##Size = Matrix; EIGEN_MAKE_TYPEDEFS(2, 2) @@ -517,12 +518,12 @@ EIGEN_MAKE_FIXED_TYPEDEFS(3) EIGEN_MAKE_FIXED_TYPEDEFS(4) /** \ingroup matrixtypedefs - * \brief \cpp11 `Size`×`1` vector of type `Type`. */ + * \brief `Size`×`1` vector of type `Type`. */ template using Vector = Matrix; /** \ingroup matrixtypedefs - * \brief \cpp11 `1`×`Size` vector of type `Type`. */ + * \brief `1`×`Size` vector of type `Type`. */ template using RowVector = Matrix; diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 8d5c47e472f..ff1a2c23264 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -99,7 +99,7 @@ class MatrixBase : public DenseBase { /** \returns the size of the main diagonal, which is min(rows(),cols()). * \sa rows(), cols(), SizeAtCompileTime. */ - EIGEN_DEVICE_FUNC inline Index diagonalSize() const { return (numext::mini)(rows(), cols()); } + EIGEN_DEVICE_FUNC constexpr Index diagonalSize() const { return (numext::mini)(rows(), cols()); } typedef typename Base::PlainObject PlainObject; @@ -136,19 +136,19 @@ class MatrixBase : public DenseBase { /** Special case of the template operator=, in order to prevent the compiler * from generating a default operator= (issue hit with g++ 4.1) */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const MatrixBase& other); + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& operator=(const MatrixBase& other); // We cannot inherit here via Base::operator= since it is causing // trouble with MSVC. template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase& other); + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Derived& operator=(const DenseBase& other); template - EIGEN_DEVICE_FUNC Derived& operator=(const EigenBase& other); + EIGEN_DEVICE_FUNC constexpr Derived& operator=(const EigenBase& other); template - EIGEN_DEVICE_FUNC Derived& operator=(const ReturnByValue& other); + EIGEN_DEVICE_FUNC constexpr Derived& operator=(const ReturnByValue& other); template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator+=(const MatrixBase& other); @@ -180,11 +180,11 @@ class MatrixBase : public DenseBase { const SkewSymmetricBase& skew) const; template - EIGEN_DEVICE_FUNC typename ScalarBinaryOpTraits::Scalar, - typename internal::traits::Scalar>::ReturnType + EIGEN_DEVICE_FUNC constexpr typename ScalarBinaryOpTraits::Scalar, + typename internal::traits::Scalar>::ReturnType dot(const MatrixBase& other) const; - EIGEN_DEVICE_FUNC RealScalar squaredNorm() const; + EIGEN_DEVICE_FUNC constexpr RealScalar squaredNorm() const; EIGEN_DEVICE_FUNC RealScalar norm() const; RealScalar stableNorm() const; RealScalar blueNorm() const; @@ -194,23 +194,23 @@ class MatrixBase : public DenseBase { EIGEN_DEVICE_FUNC void normalize(); EIGEN_DEVICE_FUNC void stableNormalize(); - EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const; + EIGEN_DEVICE_FUNC constexpr const AdjointReturnType adjoint() const; EIGEN_DEVICE_FUNC void adjointInPlace(); typedef Diagonal DiagonalReturnType; - EIGEN_DEVICE_FUNC DiagonalReturnType diagonal(); + EIGEN_DEVICE_FUNC constexpr DiagonalReturnType diagonal(); typedef Diagonal ConstDiagonalReturnType; - EIGEN_DEVICE_FUNC const ConstDiagonalReturnType diagonal() const; + EIGEN_DEVICE_FUNC constexpr const ConstDiagonalReturnType diagonal() const; template - EIGEN_DEVICE_FUNC Diagonal diagonal(); + EIGEN_DEVICE_FUNC constexpr Diagonal diagonal(); template - EIGEN_DEVICE_FUNC const Diagonal diagonal() const; + EIGEN_DEVICE_FUNC constexpr const Diagonal diagonal() const; - EIGEN_DEVICE_FUNC Diagonal diagonal(Index index); - EIGEN_DEVICE_FUNC const Diagonal diagonal(Index index) const; + EIGEN_DEVICE_FUNC constexpr Diagonal diagonal(Index index); + EIGEN_DEVICE_FUNC constexpr const Diagonal diagonal(Index index) const; template struct TriangularViewReturnType { @@ -222,9 +222,9 @@ class MatrixBase : public DenseBase { }; template - EIGEN_DEVICE_FUNC typename TriangularViewReturnType::Type triangularView(); + EIGEN_DEVICE_FUNC constexpr typename TriangularViewReturnType::Type triangularView(); template - EIGEN_DEVICE_FUNC typename ConstTriangularViewReturnType::Type triangularView() const; + EIGEN_DEVICE_FUNC constexpr typename ConstTriangularViewReturnType::Type triangularView() const; template struct SelfAdjointViewReturnType { @@ -236,9 +236,9 @@ class MatrixBase : public DenseBase { }; template - EIGEN_DEVICE_FUNC typename SelfAdjointViewReturnType::Type selfadjointView(); + EIGEN_DEVICE_FUNC constexpr typename SelfAdjointViewReturnType::Type selfadjointView(); template - EIGEN_DEVICE_FUNC typename ConstSelfAdjointViewReturnType::Type selfadjointView() const; + EIGEN_DEVICE_FUNC constexpr typename ConstSelfAdjointViewReturnType::Type selfadjointView() const; const SparseView sparseView( const Scalar& m_reference = Scalar(0), @@ -252,9 +252,9 @@ class MatrixBase : public DenseBase { EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ(); EIGEN_DEVICE_FUNC static const BasisReturnType UnitW(); - EIGEN_DEVICE_FUNC const DiagonalWrapper asDiagonal() const; + EIGEN_DEVICE_FUNC constexpr const DiagonalWrapper asDiagonal() const; const PermutationWrapper asPermutation() const; - EIGEN_DEVICE_FUNC const SkewSymmetricWrapper asSkewSymmetric() const; + EIGEN_DEVICE_FUNC constexpr const SkewSymmetricWrapper asSkewSymmetric() const; EIGEN_DEVICE_FUNC Derived& setIdentity(); EIGEN_DEVICE_FUNC Derived& setIdentity(Index rows, Index cols); @@ -274,6 +274,17 @@ class MatrixBase : public DenseBase { const RealScalar& prec = NumTraits::dummy_precision()) const; bool isUnitary(const RealScalar& prec = NumTraits::dummy_precision()) const; + /* diagonalView */ + template + EIGEN_DEVICE_FUNC constexpr DiagonalWrapper> diagonalView(); + + template + EIGEN_DEVICE_FUNC constexpr DiagonalWrapper> diagonalView() const; + + EIGEN_DEVICE_FUNC constexpr DiagonalWrapper> diagonalView(Index index); + + EIGEN_DEVICE_FUNC constexpr DiagonalWrapper> diagonalView(Index index) const; + /** \returns true if each coefficients of \c *this and \a other are all exactly equal. * \warning When using floating point scalar values you probably should rather use a * fuzzy comparison such as isApprox() @@ -296,14 +307,14 @@ class MatrixBase : public DenseBase { // TODO forceAlignedAccess is temporarily disabled // Need to find a nicer workaround. - inline const Derived& forceAlignedAccess() const { return derived(); } - inline Derived& forceAlignedAccess() { return derived(); } + constexpr const Derived& forceAlignedAccess() const { return derived(); } + constexpr Derived& forceAlignedAccess() { return derived(); } template - inline const Derived& forceAlignedAccessIf() const { + constexpr const Derived& forceAlignedAccessIf() const { return derived(); } template - inline Derived& forceAlignedAccessIf() { + constexpr Derived& forceAlignedAccessIf() { return derived(); } @@ -312,29 +323,31 @@ class MatrixBase : public DenseBase { template EIGEN_DEVICE_FUNC RealScalar lpNorm() const; - EIGEN_DEVICE_FUNC MatrixBase& matrix() { return *this; } - EIGEN_DEVICE_FUNC const MatrixBase& matrix() const { return *this; } + EIGEN_DEVICE_FUNC constexpr MatrixBase& matrix() { return *this; } + EIGEN_DEVICE_FUNC constexpr const MatrixBase& matrix() const { return *this; } /** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix * \sa ArrayBase::matrix() */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper array() { return ArrayWrapper(derived()); } + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE ArrayWrapper array() { + return ArrayWrapper(derived()); + } /** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix * \sa ArrayBase::matrix() */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper array() const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const ArrayWrapper array() const { return ArrayWrapper(derived()); } /////////// LU module /////////// template - inline const FullPivLU fullPivLu() const; + inline FullPivLU fullPivLu() const; template - inline const PartialPivLU partialPivLu() const; + inline PartialPivLU partialPivLu() const; template - inline const PartialPivLU lu() const; + inline PartialPivLU lu() const; - EIGEN_DEVICE_FUNC inline const Inverse inverse() const; + EIGEN_DEVICE_FUNC inline Inverse inverse() const; template inline void computeInverseAndDetWithCheck( @@ -350,18 +363,18 @@ class MatrixBase : public DenseBase { /////////// Cholesky module /////////// - inline const LLT llt() const; - inline const LDLT ldlt() const; + inline LLT llt() const; + inline LDLT ldlt() const; /////////// QR module /////////// - inline const HouseholderQR householderQr() const; + inline HouseholderQR householderQr() const; template - inline const ColPivHouseholderQR colPivHouseholderQr() const; + inline ColPivHouseholderQR colPivHouseholderQr() const; template - inline const FullPivHouseholderQR fullPivHouseholderQr() const; + inline FullPivHouseholderQR fullPivHouseholderQr() const; template - inline const CompleteOrthogonalDecomposition completeOrthogonalDecomposition() const; + inline CompleteOrthogonalDecomposition completeOrthogonalDecomposition() const; /////////// Eigenvalues module /////////// @@ -373,12 +386,14 @@ class MatrixBase : public DenseBase { template inline JacobiSVD jacobiSvd() const; template - EIGEN_DEPRECATED inline JacobiSVD jacobiSvd(unsigned int computationOptions) const; + EIGEN_DEPRECATED_WITH_REASON("Options should be specified using method's template parameter.") + inline JacobiSVD jacobiSvd(unsigned int computationOptions) const; template inline BDCSVD bdcSvd() const; template - EIGEN_DEPRECATED inline BDCSVD bdcSvd(unsigned int computationOptions) const; + EIGEN_DEPRECATED_WITH_REASON("Options should be specified using method's template parameter.") + inline BDCSVD bdcSvd(unsigned int computationOptions) const; /////////// Geometry module /////////// @@ -391,11 +406,11 @@ class MatrixBase : public DenseBase { EIGEN_DEVICE_FUNC inline PlainObject unitOrthogonal(void) const; - EIGEN_DEPRECATED EIGEN_DEVICE_FUNC inline Matrix eulerAngles(Index a0, Index a1, Index a2) const; + EIGEN_DEPRECATED_WITH_REASON("Use .canonicalEulerAngles() instead.") + EIGEN_DEVICE_FUNC inline Matrix eulerAngles(Index a0, Index a1, Index a2) const; EIGEN_DEVICE_FUNC inline Matrix canonicalEulerAngles(Index a0, Index a1, Index a2) const; - // put this as separate enum value to work around possible GCC 4.3 bug (?) enum { HomogeneousReturnTypeDirection = ColsAtCompileTime == 1 && RowsAtCompileTime == 1 diff --git a/Eigen/src/Core/NestByValue.h b/Eigen/src/Core/NestByValue.h index 2ce83a8c564..f18559ba18c 100644 --- a/Eigen/src/Core/NestByValue.h +++ b/Eigen/src/Core/NestByValue.h @@ -43,24 +43,24 @@ class NestByValue : public internal::dense_xpr_base EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue) - EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} + EIGEN_DEVICE_FUNC constexpr explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_expression.rows(); } EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_expression.cols(); } - EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } + EIGEN_DEVICE_FUNC constexpr operator const ExpressionType&() const { return m_expression; } - EIGEN_DEVICE_FUNC const ExpressionType& nestedExpression() const { return m_expression; } + EIGEN_DEVICE_FUNC constexpr const ExpressionType& nestedExpression() const { return m_expression; } - EIGEN_DEVICE_FUNC typename std::enable_if::type data() const { + EIGEN_DEVICE_FUNC constexpr std::enable_if_t data() const { return m_expression.data(); } - EIGEN_DEVICE_FUNC typename std::enable_if::type innerStride() const { + EIGEN_DEVICE_FUNC constexpr std::enable_if_t innerStride() const { return m_expression.innerStride(); } - EIGEN_DEVICE_FUNC typename std::enable_if::type outerStride() const { + EIGEN_DEVICE_FUNC constexpr std::enable_if_t outerStride() const { return m_expression.outerStride(); } @@ -71,7 +71,7 @@ class NestByValue : public internal::dense_xpr_base /** \returns an expression of the temporary version of *this. */ template -EIGEN_DEVICE_FUNC inline const NestByValue DenseBase::nestByValue() const { +EIGEN_DEVICE_FUNC constexpr inline const NestByValue DenseBase::nestByValue() const { return NestByValue(derived()); } @@ -82,7 +82,7 @@ template struct evaluator > : public evaluator { typedef evaluator Base; - EIGEN_DEVICE_FUNC explicit evaluator(const NestByValue& xpr) : Base(xpr.nestedExpression()) {} + EIGEN_DEVICE_FUNC constexpr explicit evaluator(const NestByValue& xpr) : Base(xpr.nestedExpression()) {} }; } // namespace internal diff --git a/Eigen/src/Core/NoAlias.h b/Eigen/src/Core/NoAlias.h index b6c7209104e..6a882014b40 100644 --- a/Eigen/src/Core/NoAlias.h +++ b/Eigen/src/Core/NoAlias.h @@ -35,7 +35,7 @@ class NoAlias { public: typedef typename ExpressionType::Scalar Scalar; - EIGEN_DEVICE_FUNC explicit NoAlias(ExpressionType& expression) : m_expression(expression) {} + EIGEN_DEVICE_FUNC constexpr explicit NoAlias(ExpressionType& expression) : m_expression(expression) {} template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase& other) { @@ -58,7 +58,7 @@ class NoAlias { return m_expression; } - EIGEN_DEVICE_FUNC ExpressionType& expression() const { return m_expression; } + EIGEN_DEVICE_FUNC constexpr ExpressionType& expression() const { return m_expression; } protected: ExpressionType& m_expression; diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h index 5e4e5c2ff60..2bb1eedb8fe 100644 --- a/Eigen/src/Core/NumTraits.h +++ b/Eigen/src/Core/NumTraits.h @@ -95,9 +95,22 @@ struct default_max_digits10_impl // Integer } // end namespace internal namespace numext { -/** \internal bit-wise cast without changing the underlying bit representation. */ -// TODO: Replace by std::bit_cast (available in C++20) +/** \internal bit-wise cast without changing the underlying bit representation. */ +#if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L +template +EIGEN_DEVICE_FUNC constexpr Tgt bit_cast(const Src& src) { + return std::bit_cast(src); +} +#elif EIGEN_HAS_BUILTIN(__builtin_bit_cast) +template +EIGEN_DEVICE_FUNC constexpr Tgt bit_cast(const Src& src) { + EIGEN_STATIC_ASSERT(std::is_trivially_copyable::value, THIS_TYPE_IS_NOT_SUPPORTED) + EIGEN_STATIC_ASSERT(std::is_trivially_copyable::value, THIS_TYPE_IS_NOT_SUPPORTED) + EIGEN_STATIC_ASSERT(sizeof(Src) == sizeof(Tgt), THIS_TYPE_IS_NOT_SUPPORTED) + return __builtin_bit_cast(Tgt, src); +} +#else template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Tgt bit_cast(const Src& src) { // The behaviour of memcpy is not specified for non-trivially copyable types @@ -113,6 +126,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Tgt bit_cast(const Src& src) { memcpy(static_cast(&tgt), static_cast(&staged), sizeof(Tgt)); return tgt; } +#endif } // namespace numext // clang-format off diff --git a/Eigen/src/Core/PartialReduxEvaluator.h b/Eigen/src/Core/PartialReduxEvaluator.h index 1f638f9ac5d..18aeca3887d 100644 --- a/Eigen/src/Core/PartialReduxEvaluator.h +++ b/Eigen/src/Core/PartialReduxEvaluator.h @@ -42,12 +42,12 @@ namespace internal { /* logic deciding a strategy for unrolling of vectorized paths */ template struct packetwise_redux_traits { - enum { - OuterSize = int(Evaluator::IsRowMajor) ? Evaluator::RowsAtCompileTime : Evaluator::ColsAtCompileTime, - Cost = OuterSize == Dynamic ? HugeCost - : OuterSize * Evaluator::CoeffReadCost + (OuterSize - 1) * functor_traits::Cost, - Unrolling = Cost <= EIGEN_UNROLLING_LIMIT ? CompleteUnrolling : NoUnrolling - }; + static constexpr int OuterSize = + int(Evaluator::IsRowMajor) ? Evaluator::RowsAtCompileTime : Evaluator::ColsAtCompileTime; + static constexpr int Cost = OuterSize == Dynamic + ? HugeCost + : OuterSize * Evaluator::CoeffReadCost + (OuterSize - 1) * functor_traits::Cost; + static constexpr int Unrolling = Cost <= EIGEN_UNROLLING_LIMIT ? CompleteUnrolling : NoUnrolling; }; /* Value to be returned when size==0 , by default let's return 0 */ @@ -70,8 +70,8 @@ struct packetwise_redux_impl; /* Perform the actual reduction with unrolling */ template struct packetwise_redux_impl { - typedef redux_novec_unroller Base; - typedef typename Evaluator::Scalar Scalar; + using Base = redux_novec_unroller; + using Scalar = typename Evaluator::Scalar; template EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator& eval, const Func& func, Index /*size*/) { @@ -96,8 +96,8 @@ struct redux_vec_unroller { /* Perform the actual reduction for dynamic sizes */ template struct packetwise_redux_impl { - typedef typename Evaluator::Scalar Scalar; - typedef typename redux_traits::PacketType PacketScalar; + using Scalar = typename Evaluator::Scalar; + using PacketScalar = typename redux_traits::PacketType; template EIGEN_DEVICE_FUNC static PacketType run(const Evaluator& eval, const Func& func, Index size) { @@ -122,8 +122,8 @@ struct packetwise_redux_impl { template struct packetwise_segment_redux_impl { - typedef typename Evaluator::Scalar Scalar; - typedef typename redux_traits::PacketType PacketScalar; + using Scalar = typename Evaluator::Scalar; + using PacketScalar = typename redux_traits::PacketType; template EIGEN_DEVICE_FUNC static PacketType run(const Evaluator& eval, const Func& func, Index size, Index begin, @@ -140,16 +140,16 @@ struct packetwise_segment_redux_impl { template struct evaluator > : evaluator_base > { - typedef PartialReduxExpr XprType; - typedef typename internal::nested_eval::type ArgTypeNested; - typedef add_const_on_value_type_t ConstArgTypeNested; - typedef internal::remove_all_t ArgTypeNestedCleaned; - typedef typename ArgType::Scalar InputScalar; - typedef typename XprType::Scalar Scalar; + using XprType = PartialReduxExpr; + using ArgTypeNested = typename internal::nested_eval::type; + using ConstArgTypeNested = add_const_on_value_type_t; + using ArgTypeNestedCleaned = internal::remove_all_t; + using InputScalar = typename ArgType::Scalar; + using Scalar = typename XprType::Scalar; enum { TraversalSize = Direction == int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime) }; - typedef typename MemberOp::template Cost CostOpType; + using CostOpType = typename MemberOp::template Cost; enum { CoeffReadCost = TraversalSize == Dynamic ? HugeCost : TraversalSize == 0 @@ -168,13 +168,13 @@ struct evaluator > Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized }; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) { + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) { EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize == Dynamic ? HugeCost : (TraversalSize == 0 ? 1 : int(CostOpType::value))); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - typedef typename XprType::CoeffReturnType CoeffReturnType; + using CoeffReturnType = typename XprType::CoeffReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const { return coeff(Direction == Vertical ? j : i); @@ -199,11 +199,10 @@ struct evaluator > using BinaryOp = typename MemberOp::BinaryOp; using Impl = internal::packetwise_redux_impl; - // FIXME - // See bug 1612, currently if PacketSize==1 (i.e. complex with 128bits registers) then the storage-order of - // panel get reversed and methods like packetByOuterInner do not make sense anymore in this context. So let's just - // by pass "vectorization" in this case: - if (PacketSize == 1) return internal::pset1(coeff(idx)); + // Workaround for issue 1612 (closed): when PacketSize==1 (i.e. complex with 128bits registers) the + // storage-order of panel gets reversed and methods like packetByOuterInner do not make sense in this context, so + // bypass "vectorization": + EIGEN_IF_CONSTEXPR(PacketSize == 1) return internal::pset1(coeff(idx)); Index startRow = Direction == Vertical ? 0 : idx; Index startCol = Direction == Vertical ? idx : 0; diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h index 4748b118a68..77133545165 100644 --- a/Eigen/src/Core/PermutationMatrix.h +++ b/Eigen/src/Core/PermutationMatrix.h @@ -109,6 +109,9 @@ class PermutationBase : public EigenBase { */ DenseMatrixType toDenseMatrix() const { return derived(); } + /** \returns the plain matrix representation of the permutation. */ + DenseMatrixType eval() const { return toDenseMatrix(); } + /** const version of indices(). */ const IndicesType& indices() const { return derived().indices(); } /** \returns a reference to the stored array representing the permutation. */ @@ -468,17 +471,17 @@ class PermutationWrapper : public PermutationBase -EIGEN_DEVICE_FUNC const Product operator*( +EIGEN_DEVICE_FUNC const Product operator*( const MatrixBase& matrix, const PermutationBase& permutation) { - return Product(matrix.derived(), permutation.derived()); + return Product(matrix.derived(), permutation.derived()); } /** \returns the matrix with the permutation applied to the rows. */ template -EIGEN_DEVICE_FUNC const Product operator*( +EIGEN_DEVICE_FUNC const Product operator*( const PermutationBase& permutation, const MatrixBase& matrix) { - return Product(permutation.derived(), matrix.derived()); + return Product(permutation.derived(), matrix.derived()); } template @@ -520,16 +523,16 @@ class InverseImpl : public EigenBase - friend const Product operator*(const MatrixBase& matrix, - const InverseType& trPerm) { - return Product(matrix.derived(), trPerm.derived()); + friend const Product operator*(const MatrixBase& matrix, + const InverseType& trPerm) { + return Product(matrix.derived(), trPerm.derived()); } /** \returns the matrix with the inverse permutation applied to the rows. */ template - const Product operator*(const MatrixBase& matrix) const { - return Product(derived(), matrix.derived()); + const Product operator*(const MatrixBase& matrix) const { + return Product(derived(), matrix.derived()); } }; diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index a78305e2592..2f4c357c5f2 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -159,17 +159,17 @@ class PlainObjectBase : public internal::dense_xpr_base::type { INVALID_MATRIX_TEMPLATE_PARAMETERS) EIGEN_STATIC_ASSERT(((Options & (DontAlign | RowMajor)) == Options), INVALID_MATRIX_TEMPLATE_PARAMETERS) - EIGEN_DEVICE_FUNC Base& base() { return *static_cast(this); } - EIGEN_DEVICE_FUNC const Base& base() const { return *static_cast(this); } + EIGEN_DEVICE_FUNC constexpr Base& base() { return *static_cast(this); } + EIGEN_DEVICE_FUNC constexpr const Base& base() const { return *static_cast(this); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const noexcept { return m_storage.rows(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept { return m_storage.cols(); } + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_storage.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_storage.cols(); } /** This is an overloaded version of DenseCoeffsBase::coeff(Index,Index) const * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. * * See DenseCoeffsBase::coeff(Index) const for details. */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeff(Index rowId, Index colId) const { + EIGEN_DEVICE_FUNC constexpr const Scalar& coeff(Index rowId, Index colId) const { if (Flags & RowMajorBit) return m_storage.data()[colId + rowId * m_storage.cols()]; else // column-major @@ -180,15 +180,13 @@ class PlainObjectBase : public internal::dense_xpr_base::type { * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. * * See DenseCoeffsBase::coeff(Index) const for details. */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeff(Index index) const { - return m_storage.data()[index]; - } + EIGEN_DEVICE_FUNC constexpr const Scalar& coeff(Index index) const { return m_storage.data()[index]; } /** This is an overloaded version of DenseCoeffsBase::coeffRef(Index,Index) const * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. * * See DenseCoeffsBase::coeffRef(Index,Index) const for details. */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index rowId, Index colId) { + EIGEN_DEVICE_FUNC constexpr Scalar& coeffRef(Index rowId, Index colId) { if (Flags & RowMajorBit) return m_storage.data()[colId + rowId * m_storage.cols()]; else // column-major @@ -199,11 +197,11 @@ class PlainObjectBase : public internal::dense_xpr_base::type { * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. * * See DenseCoeffsBase::coeffRef(Index) const for details. */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index index) { return m_storage.data()[index]; } + EIGEN_DEVICE_FUNC constexpr Scalar& coeffRef(Index index) { return m_storage.data()[index]; } /** This is the const version of coeffRef(Index,Index) which is thus synonym of coeff(Index,Index). * It is provided for convenience. */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeffRef(Index rowId, Index colId) const { + EIGEN_DEVICE_FUNC constexpr const Scalar& coeffRef(Index rowId, Index colId) const { if (Flags & RowMajorBit) return m_storage.data()[colId + rowId * m_storage.cols()]; else // column-major @@ -212,9 +210,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type { /** This is the const version of coeffRef(Index) which is thus synonym of coeff(Index). * It is provided for convenience. */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeffRef(Index index) const { - return m_storage.data()[index]; - } + EIGEN_DEVICE_FUNC constexpr const Scalar& coeffRef(Index index) const { return m_storage.data()[index]; } /** \internal */ template @@ -343,7 +339,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type { * remain row-vectors and vectors remain vectors. */ template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resizeLike(const EigenBase& _other) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void resizeLike(const EigenBase& _other) { const OtherDerived& other = _other.derived(); #ifndef EIGEN_NO_DEBUG internal::check_rows_cols_for_overflow::run( @@ -426,9 +422,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type { /** This is a special case of the templated operator=. Its purpose is to * prevent a default operator= from hiding the templated operator=. */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Derived& operator=(const PlainObjectBase& other) { - return _set(other); - } + EIGEN_DEVICE_FUNC constexpr Derived& operator=(const PlainObjectBase& other) { return _set(other); } /** \sa MatrixBase::lazyAssign() */ template @@ -446,9 +440,9 @@ class PlainObjectBase : public internal::dense_xpr_base::type { // Prevent user from trying to instantiate PlainObjectBase objects // by making all its constructor protected. See bug 1074. protected: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase() = default; + EIGEN_DEVICE_FUNC constexpr PlainObjectBase() = default; /** \brief Move constructor */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(PlainObjectBase&&) = default; + EIGEN_DEVICE_FUNC constexpr PlainObjectBase(PlainObjectBase&&) = default; /** \brief Move assignment operator */ EIGEN_DEVICE_FUNC constexpr PlainObjectBase& operator=(PlainObjectBase&& other) noexcept { m_storage = std::move(other.m_storage); @@ -456,7 +450,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type { } /** Copy constructor */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(const PlainObjectBase&) = default; + EIGEN_DEVICE_FUNC constexpr PlainObjectBase(const PlainObjectBase&) = default; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols) : m_storage(size, rows, cols) {} @@ -467,7 +461,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type { * This constructor is for 1D array or vectors with more than 4 coefficients. * * \warning To construct a column (resp. row) vector of fixed length, the number of values passed to this - * constructor must match the the fixed number of rows (resp. columns) of \c *this. + * constructor must match the fixed number of rows (resp. columns) of \c *this. */ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const Scalar& a0, const Scalar& a1, const Scalar& a2, @@ -524,14 +518,14 @@ class PlainObjectBase : public internal::dense_xpr_base::type { /** \sa PlainObjectBase::operator=(const EigenBase&) */ template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase& other) : m_storage() { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase& other) : m_storage() { resizeLike(other); _set_noalias(other); } /** \sa PlainObjectBase::operator=(const EigenBase&) */ template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase& other) : m_storage() { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase& other) : m_storage() { resizeLike(other); *this = other.derived(); } @@ -691,6 +685,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type { eigen_assert((this->size() == 0 || (IsVectorAtCompileTime ? (this->size() == other.size()) : (rows() == other.rows() && cols() == other.cols()))) && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); + if (this->size() == 0) resizeLike(other); EIGEN_ONLY_USED_FOR_DEBUG(other); #else resizeLike(other); @@ -714,7 +709,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type { // aliasing is dealt once in internal::call_assignment // so at this stage we have to assume aliasing... and resising has to be done later. template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Derived& _set(const DenseBase& other) { + EIGEN_DEVICE_FUNC constexpr Derived& _set(const DenseBase& other) { internal::call_assignment(this->derived(), other.derived()); return this->derived(); } @@ -725,7 +720,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type { * \sa operator=(const MatrixBase&), _set() */ template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Derived& _set_noalias(const DenseBase& other) { + EIGEN_DEVICE_FUNC constexpr Derived& _set_noalias(const DenseBase& other) { // I don't think we need this resize call since the lazyAssign will anyways resize // and lazyAssign will be called by the assign selector. //_resize_to_match(other); @@ -737,23 +732,23 @@ class PlainObjectBase : public internal::dense_xpr_base::type { } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, - std::enable_if_t* = 0) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, + std::enable_if_t* = 0) { EIGEN_STATIC_ASSERT(internal::is_valid_index_type::value && internal::is_valid_index_type::value, T0 AND T1 MUST BE INTEGER TYPES) resize(rows, cols); } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, - std::enable_if_t* = 0) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, + std::enable_if_t* = 0) { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2) m_storage.data()[0] = Scalar(val0); m_storage.data()[1] = Scalar(val1); } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init2( + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init2( const Index& val0, const Index& val1, std::enable_if_t<(!internal::is_same::value) && (internal::is_same::value) && (internal::is_same::value) && Base::SizeAtCompileTime == 2, @@ -766,7 +761,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type { // The argument is convertible to the Index type and we either have a non 1x1 Matrix, or a dynamic-sized Array, // then the argument is meant to be the size of the object. template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1( + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1( Index size, std::enable_if_t<(Base::SizeAtCompileTime != 1 || !internal::is_convertible::value) && ((!internal::is_same::XprKind, ArrayXpr>::value || @@ -782,7 +777,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type { // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar // type can be implicitly converted) template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1( + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1( const Scalar& val0, std::enable_if_t::value, T>* = 0) { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1) @@ -792,7 +787,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type { // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar // type match the index type) template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1( + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1( const Index& val0, std::enable_if_t<(!internal::is_same::value) && (internal::is_same::value) && Base::SizeAtCompileTime == 1 && internal::is_convertible::value, @@ -803,42 +798,42 @@ class PlainObjectBase : public internal::dense_xpr_base::type { // Initialize a fixed size matrix from a pointer to raw data template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const Scalar* data) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1(const Scalar* data) { this->_set_noalias(ConstMapType(data)); } // Initialize an arbitrary matrix from a dense expression template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const DenseBase& other) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1(const DenseBase& other) { this->_set_noalias(other); } // Initialize an arbitrary matrix from an object convertible to the Derived type. template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const Derived& other) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1(const Derived& other) { this->_set_noalias(other); } // Initialize an arbitrary matrix from a generic Eigen expression template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const EigenBase& other) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1(const EigenBase& other) { this->derived() = other; } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const ReturnByValue& other) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1(const ReturnByValue& other) { resize(other.rows(), other.cols()); other.evalTo(this->derived()); } template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const RotationBase& r) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1(const RotationBase& r) { this->derived() = r; } // For fixed-size Array template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1( + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1( const Scalar& val0, std::enable_if_t::value && @@ -849,7 +844,7 @@ class PlainObjectBase : public internal::dense_xpr_base::type { // For fixed-size Array template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1( + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void _init1( const Index& val0, std::enable_if_t<(!internal::is_same::value) && (internal::is_same::value) && Base::SizeAtCompileTime != Dynamic && Base::SizeAtCompileTime != 1 && diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index e16c7cc963b..bc8b9dad360 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -219,16 +219,16 @@ class Product using TransposeReturnType = typename internal::product_transpose_helper::TransposeType; using AdjointReturnType = typename internal::product_transpose_helper::AdjointType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) { eigen_assert(lhs.cols() == rhs.rows() && "invalid matrix product" && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const noexcept { return m_lhs.rows(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept { return m_rhs.cols(); } + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_lhs.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_rhs.cols(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const LhsNestedCleaned& lhs() const { return m_lhs; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const RhsNestedCleaned& rhs() const { return m_rhs; } + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const LhsNestedCleaned& lhs() const { return m_lhs; } + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const RhsNestedCleaned& rhs() const { return m_rhs; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TransposeReturnType transpose() const { return internal::product_transpose_helper::run_transpose(*this); diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index ce8d954bffc..d789f7501cb 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -15,6 +15,13 @@ // IWYU pragma: private #include "./InternalHeaderCheck.h" +// C4804: unsafe use of type 'bool' in operation. Unavoidable in generic code +// instantiated with bool scalars (e.g. += and * on bool). +#if EIGEN_COMP_MSVC +#pragma warning(push) +#pragma warning(disable : 4804) +#endif + namespace Eigen { namespace internal { @@ -29,30 +36,27 @@ namespace internal { */ template struct evaluator> : public product_evaluator> { - typedef Product XprType; - typedef product_evaluator Base; + using XprType = Product; + using Base = product_evaluator; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {} }; // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" -// TODO we should apply that rule only if that's really helpful +// TODO: we should apply that rule only if that's really helpful template struct evaluator_assume_aliasing, const CwiseNullaryOp, Plain1>, - const Product>> { - static const bool value = true; -}; + const Product>> : std::true_type {}; template struct evaluator, const CwiseNullaryOp, Plain1>, const Product>> : public evaluator> { - typedef CwiseBinaryOp, - const CwiseNullaryOp, Plain1>, - const Product> - XprType; - typedef evaluator> Base; + using XprType = CwiseBinaryOp, + const CwiseNullaryOp, Plain1>, + const Product>; + using Base = evaluator>; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) {} @@ -61,8 +65,8 @@ struct evaluator, template struct evaluator, DiagIndex>> : public evaluator, DiagIndex>> { - typedef Diagonal, DiagIndex> XprType; - typedef evaluator, DiagIndex>> Base; + using XprType = Diagonal, DiagIndex>; + using Base = evaluator, DiagIndex>>; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(Diagonal, DiagIndex>( @@ -78,18 +82,16 @@ template -struct evaluator_assume_aliasing> { - static const bool value = true; -}; +struct evaluator_assume_aliasing> : std::true_type {}; // This is the default evaluator implementation for products: // It creates a temporary and call generic_product_impl template struct product_evaluator, ProductTag, LhsShape, RhsShape> : public evaluator::PlainObject> { - typedef Product XprType; - typedef typename XprType::PlainObject PlainObject; - typedef evaluator Base; + using XprType = Product; + using PlainObject = typename XprType::PlainObject; + using Base = evaluator; enum { Flags = Base::Flags | EvalBeforeNestingBit }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr) @@ -123,7 +125,7 @@ struct product_evaluator, ProductTag, LhsShape, RhsSh template struct Assignment, internal::assign_op, Dense2Dense, std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> { - typedef Product SrcXprType; + using SrcXprType = Product; static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, const internal::assign_op&) { Index dstRows = src.rows(); @@ -138,7 +140,7 @@ struct Assignment, internal::assign_op struct Assignment, internal::add_assign_op, Dense2Dense, std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> { - typedef Product SrcXprType; + using SrcXprType = Product; static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, const internal::add_assign_op&) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); @@ -151,7 +153,7 @@ struct Assignment, internal::add_assign_o template struct Assignment, internal::sub_assign_op, Dense2Dense, std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> { - typedef Product SrcXprType; + using SrcXprType = Product; static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, const internal::sub_assign_op&) { eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); @@ -161,7 +163,7 @@ struct Assignment, internal::sub_assign_o }; // Dense ?= scalar * Product -// TODO we should apply that rule if that's really helpful +// TODO: we should apply that rule if that's really helpful // for instance, this is not good for inner products template @@ -170,10 +172,9 @@ struct Assignment, Plain>, const Product>, AssignFunc, Dense2Dense> { - typedef CwiseBinaryOp, - const CwiseNullaryOp, Plain>, - const Product> - SrcXprType; + using SrcXprType = CwiseBinaryOp, + const CwiseNullaryOp, Plain>, + const Product>; static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, const AssignFunc& func) { call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs()) * src.rhs().rhs(), func); @@ -182,25 +183,21 @@ struct Assignment" expression to save one temporary -// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct +// FIXME: consider enabling these rules for all product types, not only Dense and DefaultProduct. template struct evaluator_assume_aliasing< CwiseBinaryOp< internal::scalar_sum_op::Scalar>, const OtherXpr, const Product>, - DenseShape> { - static const bool value = true; -}; + DenseShape> : std::true_type {}; template struct evaluator_assume_aliasing< CwiseBinaryOp< internal::scalar_difference_op::Scalar>, const OtherXpr, const Product>, - DenseShape> { - static const bool value = true; -}; + DenseShape> : std::true_type {}; template struct assignment_from_xpr_op_product { @@ -237,17 +234,17 @@ template struct generic_product_impl { using impl = default_inner_product_impl; template - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { + static EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0, 0) = impl::run(lhs, rhs); } template - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { + static EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0, 0) += impl::run(lhs, rhs); } template - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { + static EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0, 0) -= impl::run(lhs, rhs); } }; @@ -280,13 +277,48 @@ void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs& lhs, cons for (Index i = 0; i < rows; ++i) func(dst.row(i), lhsEval.coeff(i, Index(0)) * actual_rhs); } +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool outer_product_use_small_assignment(const Dst& dst) { + return dst.rows() <= 16 && dst.cols() <= 16; +} + +template +void EIGEN_DEVICE_FUNC outer_product_selector_run_small(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func& func, + const Scalar& alpha, const false_type&) { + evaluator rhsEval(rhs); + ei_declare_local_nested_eval(Lhs, lhs, Rhs::SizeAtCompileTime, actual_lhs); + const Index rows = dst.rows(); + const Index cols = dst.cols(); + for (Index j = 0; j < cols; ++j) { + const Scalar rhs_j = rhsEval.coeff(Index(0), j); + for (Index i = 0; i < rows; ++i) { + func.assignCoeff(dst.coeffRef(i, j), alpha * (rhs_j * actual_lhs.coeff(i, Index(0)))); + } + } +} + +template +void EIGEN_DEVICE_FUNC outer_product_selector_run_small(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func& func, + const Scalar& alpha, const true_type&) { + evaluator lhsEval(lhs); + ei_declare_local_nested_eval(Rhs, rhs, Lhs::SizeAtCompileTime, actual_rhs); + const Index rows = dst.rows(); + const Index cols = dst.cols(); + for (Index i = 0; i < rows; ++i) { + const Scalar lhs_i = lhsEval.coeff(i, Index(0)); + for (Index j = 0; j < cols; ++j) { + func.assignCoeff(dst.coeffRef(i, j), alpha * (lhs_i * actual_rhs.coeff(Index(0), j))); + } + } +} + template struct generic_product_impl { template struct is_row_major : bool_constant<(int(T::Flags) & RowMajorBit)> {}; - typedef typename Product::Scalar Scalar; + using Scalar = typename Product::Scalar; - // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose + // TODO: it would be nice to be able to exploit our *_assign_op functors for that purpose struct set { template EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { @@ -320,30 +352,50 @@ struct generic_product_impl { template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major()); + if (internal::outer_product_use_small_assignment(dst)) { + internal::outer_product_selector_run_small(dst, lhs, rhs, internal::assign_op(), + Scalar(1), is_row_major()); + } else { + internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major()); + } } template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major()); + if (internal::outer_product_use_small_assignment(dst)) { + internal::outer_product_selector_run_small(dst, lhs, rhs, internal::add_assign_op(), + Scalar(1), is_row_major()); + } else { + internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major()); + } } template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major()); + if (internal::outer_product_use_small_assignment(dst)) { + internal::outer_product_selector_run_small(dst, lhs, rhs, internal::sub_assign_op(), + Scalar(1), is_row_major()); + } else { + internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major()); + } } template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major()); + if (internal::outer_product_use_small_assignment(dst)) { + internal::outer_product_selector_run_small(dst, lhs, rhs, internal::add_assign_op(), + alpha, is_row_major()); + } else { + internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major()); + } } }; // This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo template struct generic_product_impl_base { - typedef typename Product::Scalar Scalar; + using Scalar = typename Product::Scalar; template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { @@ -371,11 +423,11 @@ struct generic_product_impl_base { template struct generic_product_impl : generic_product_impl_base> { - typedef typename nested_eval::type LhsNested; - typedef typename nested_eval::type RhsNested; - typedef typename Product::Scalar Scalar; + using LhsNested = typename nested_eval::type; + using RhsNested = typename nested_eval::type; + using Scalar = typename Product::Scalar; enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; - typedef internal::remove_all_t> MatrixType; + using MatrixType = internal::remove_all_t>; template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, @@ -396,7 +448,7 @@ struct generic_product_impl template struct generic_product_impl { - typedef typename Product::Scalar Scalar; + using Scalar = typename Product::Scalar; template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { @@ -439,7 +491,7 @@ struct generic_product_impl::NeedToConjugate, ConjRhs = blas_traits::NeedToConjugate }; - // FIXME: in c++11 this should be auto, and extractScalarFactor should also return auto + // FIXME: this should be auto, and extractScalarFactor should also return auto // this is important for real*complex_mat Scalar actualAlpha = combine_scalar_factors(lhs, rhs); @@ -485,9 +537,9 @@ struct etor_product_packet_impl; template struct product_evaluator, ProductTag, DenseShape, DenseShape> : evaluator_base> { - typedef Product XprType; - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; + using XprType = Product; + using Scalar = typename XprType::Scalar; + using CoeffReturnType = typename XprType::CoeffReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr) : m_lhs(xpr.lhs()), @@ -500,30 +552,18 @@ struct product_evaluator, ProductTag, DenseShape, EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::AddCost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); -#if 0 - std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n"; - std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n"; - std::cerr << "LhsAlignment= " << LhsAlignment << "\n"; - std::cerr << "RhsAlignment= " << RhsAlignment << "\n"; - std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n"; - std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n"; - std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n"; - std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n"; - std::cerr << "Alignment= " << Alignment << "\n"; - std::cerr << "Flags= " << Flags << "\n"; -#endif } // Everything below here is taken from CoeffBasedProduct.h - typedef typename internal::nested_eval::type LhsNested; - typedef typename internal::nested_eval::type RhsNested; + using LhsNested = typename internal::nested_eval::type; + using RhsNested = typename internal::nested_eval::type; - typedef internal::remove_all_t LhsNestedCleaned; - typedef internal::remove_all_t RhsNestedCleaned; + using LhsNestedCleaned = internal::remove_all_t; + using RhsNestedCleaned = internal::remove_all_t; - typedef evaluator LhsEtorType; - typedef evaluator RhsEtorType; + using LhsEtorType = evaluator; + using RhsEtorType = evaluator; enum { RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime, @@ -533,78 +573,77 @@ struct product_evaluator, ProductTag, DenseShape, MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime }; - typedef typename find_best_packet::type LhsVecPacketType; - typedef typename find_best_packet::type RhsVecPacketType; - - enum { - - LhsCoeffReadCost = LhsEtorType::CoeffReadCost, - RhsCoeffReadCost = RhsEtorType::CoeffReadCost, - CoeffReadCost = InnerSize == 0 ? NumTraits::ReadCost - : InnerSize == Dynamic - ? HugeCost - : InnerSize * (NumTraits::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost)) + - (InnerSize - 1) * NumTraits::AddCost, - - Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, - - LhsFlags = LhsEtorType::Flags, - RhsFlags = RhsEtorType::Flags, - - LhsRowMajor = LhsFlags & RowMajorBit, - RhsRowMajor = RhsFlags & RowMajorBit, - - LhsVecPacketSize = unpacket_traits::size, - RhsVecPacketSize = unpacket_traits::size, - - // Here, we don't care about alignment larger than the usable packet size. - LhsAlignment = - plain_enum_min(LhsEtorType::Alignment, LhsVecPacketSize* int(sizeof(typename LhsNestedCleaned::Scalar))), - RhsAlignment = - plain_enum_min(RhsEtorType::Alignment, RhsVecPacketSize* int(sizeof(typename RhsNestedCleaned::Scalar))), - - SameType = is_same::value, - - CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime != 1), - CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime != 1), - - EvalToRowMajor = (MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1) ? 1 - : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1) - ? 0 - : (bool(RhsRowMajor) && !CanVectorizeLhs), - - Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit) | - (EvalToRowMajor ? RowMajorBit : 0) - // TODO enable vectorization for mixed types - | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) | - (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0), - - LhsOuterStrideBytes = - int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)), - RhsOuterStrideBytes = - int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)), - - Alignment = bool(CanVectorizeLhs) - ? (LhsOuterStrideBytes <= 0 || (int(LhsOuterStrideBytes) % plain_enum_max(1, LhsAlignment)) != 0 - ? 0 - : LhsAlignment) - : bool(CanVectorizeRhs) - ? (RhsOuterStrideBytes <= 0 || (int(RhsOuterStrideBytes) % plain_enum_max(1, RhsAlignment)) != 0 - ? 0 - : RhsAlignment) - : 0, - - /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside - * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner - * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect - * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. - */ - CanVectorizeInner = SameType && LhsRowMajor && (!RhsRowMajor) && - (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit) && - (int(InnerSize) % packet_traits::size == 0) - }; + using LhsVecPacketType = typename find_best_packet::type; + using RhsVecPacketType = typename find_best_packet::type; + + static constexpr int LhsCoeffReadCost = LhsEtorType::CoeffReadCost; + static constexpr int RhsCoeffReadCost = RhsEtorType::CoeffReadCost; + static constexpr int CoeffReadCost = + InnerSize == 0 ? NumTraits::ReadCost + : InnerSize == Dynamic + ? HugeCost + : InnerSize * (NumTraits::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost)) + + (InnerSize - 1) * NumTraits::AddCost; + + static constexpr bool Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT; + + static constexpr int LhsFlags = LhsEtorType::Flags; + static constexpr int RhsFlags = RhsEtorType::Flags; + + static constexpr int LhsRowMajor = LhsFlags & RowMajorBit; + static constexpr int RhsRowMajor = RhsFlags & RowMajorBit; + + static constexpr int LhsVecPacketSize = unpacket_traits::size; + static constexpr int RhsVecPacketSize = unpacket_traits::size; + + // Here, we don't care about alignment larger than the usable packet size. + static constexpr int LhsAlignment = + plain_enum_min(LhsEtorType::Alignment, LhsVecPacketSize* int(sizeof(typename LhsNestedCleaned::Scalar))); + static constexpr int RhsAlignment = + plain_enum_min(RhsEtorType::Alignment, RhsVecPacketSize* int(sizeof(typename RhsNestedCleaned::Scalar))); + + static constexpr bool SameType = is_same::value; + + static constexpr bool CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime != 1); + static constexpr bool CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime != 1); + + static constexpr int EvalToRowMajor = (MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1) ? 1 + : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1) + ? 0 + : (bool(RhsRowMajor) && !CanVectorizeLhs); + + static constexpr int Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit) | + (EvalToRowMajor ? RowMajorBit : 0) + // TODO: enable vectorization for mixed types + | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) | + (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0); + + static constexpr int LhsOuterStrideBytes = + int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)); + static constexpr int RhsOuterStrideBytes = + int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)); + + static constexpr int Alignment = + bool(CanVectorizeLhs) + ? (LhsOuterStrideBytes <= 0 || (int(LhsOuterStrideBytes) % plain_enum_max(1, LhsAlignment)) != 0 + ? 0 + : LhsAlignment) + : bool(CanVectorizeRhs) + ? (RhsOuterStrideBytes <= 0 || (int(RhsOuterStrideBytes) % plain_enum_max(1, RhsAlignment)) != 0 + ? 0 + : RhsAlignment) + : 0; + + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside + * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner + * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect + * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. + */ + static constexpr bool CanVectorizeInner = SameType && LhsRowMajor && (!RhsRowMajor) && + (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit) && + (int(InnerSize) % packet_traits::size == 0); - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const { return (m_lhs.row(row).transpose().cwiseProduct(m_rhs.col(col))).sum(); } @@ -612,7 +651,7 @@ struct product_evaluator, ProductTag, DenseShape, * which is why we don't set the LinearAccessBit. * TODO: this seems possible when the result is a vector */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index index) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index index) const { const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index; const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0; return (m_lhs.row(row).transpose().cwiseProduct(m_rhs.col(col))).sum(); @@ -621,9 +660,9 @@ struct product_evaluator, ProductTag, DenseShape, template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packet(Index row, Index col) const { PacketType res; - typedef etor_product_packet_impl - PacketImpl; + using PacketImpl = + etor_product_packet_impl; PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); return res; } @@ -639,9 +678,9 @@ struct product_evaluator, ProductTag, DenseShape, EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetSegment(Index row, Index col, Index begin, Index count) const { PacketType res; - typedef etor_product_packet_impl - PacketImpl; + using PacketImpl = + etor_product_packet_impl; PacketImpl::run_segment(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res, begin, count); return res; } @@ -660,16 +699,15 @@ struct product_evaluator, ProductTag, DenseShape, LhsEtorType m_lhsImpl; RhsEtorType m_rhsImpl; - // TODO: Get rid of m_innerDim if known at compile time - Index m_innerDim; + variable_if_dynamic m_innerDim; }; template struct product_evaluator, LazyCoeffBasedProductMode, DenseShape, DenseShape> : product_evaluator, CoeffBasedProductMode, DenseShape, DenseShape> { - typedef Product XprType; - typedef Product BaseProduct; - typedef product_evaluator Base; + using XprType = Product; + using BaseProduct = Product; + using Base = product_evaluator; enum { Flags = Base::Flags | EvalBeforeNestingBit }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr) : Base(BaseProduct(xpr.lhs(), xpr.rhs())) {} @@ -717,8 +755,8 @@ struct etor_product_packet_impl struct etor_product_packet_impl { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, - Index /*innerDim*/, Packet& res) { + static EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index /*innerDim*/, Packet& res) { res = pmul(pset1(lhs.coeff(row, Index(0))), rhs.template packet(Index(0), col)); } static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs, @@ -731,8 +769,8 @@ struct etor_product_packet_impl { template struct etor_product_packet_impl { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, - Index /*innerDim*/, Packet& res) { + static EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index /*innerDim*/, Packet& res) { res = pmul(lhs.template packet(row, Index(0)), pset1(rhs.coeff(Index(0), col))); } static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs, @@ -812,7 +850,7 @@ struct triangular_product_impl; template struct generic_product_impl : generic_product_impl_base> { - typedef typename Product::Scalar Scalar; + using Scalar = typename Product::Scalar; template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { @@ -824,7 +862,7 @@ struct generic_product_impl template struct generic_product_impl : generic_product_impl_base> { - typedef typename Product::Scalar Scalar; + using Scalar = typename Product::Scalar; template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { @@ -842,11 +880,11 @@ struct selfadjoint_product_impl; template struct generic_product_impl : generic_product_impl_base> { - typedef typename Product::Scalar Scalar; + using Scalar = typename Product::Scalar; template static EIGEN_DEVICE_FUNC void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - selfadjoint_product_impl::run( + selfadjoint_product_impl::run( dst, lhs.nestedExpression(), rhs, alpha); } }; @@ -854,11 +892,11 @@ struct generic_product_impl template struct generic_product_impl : generic_product_impl_base> { - typedef typename Product::Scalar Scalar; + using Scalar = typename Product::Scalar; template static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { - selfadjoint_product_impl::run( + selfadjoint_product_impl::run( dst, lhs, rhs.nestedExpression(), alpha); } }; @@ -869,7 +907,7 @@ struct generic_product_impl template struct diagonal_product_evaluator_base : evaluator_base { - typedef typename ScalarBinaryOpTraits::ReturnType Scalar; + using Scalar = typename ScalarBinaryOpTraits::ReturnType; public: enum { @@ -896,8 +934,8 @@ struct diagonal_product_evaluator_base : evaluator_base { (ScalarAccessOnDiag_ || (bool(int(DiagFlags) & PacketAccessBit))), LinearAccessMask_ = (MatrixType::RowsAtCompileTime == 1 || MatrixType::ColsAtCompileTime == 1) ? LinearAccessBit : 0, - Flags = - ((HereditaryBits | LinearAccessMask_) & (unsigned int)(MatrixFlags)) | (Vectorizable_ ? PacketAccessBit : 0), + Flags = ((HereditaryBits | LinearAccessMask_) & static_cast(MatrixFlags)) | + (Vectorizable_ ? PacketAccessBit : 0), Alignment = evaluator::Alignment, AsScalarProduct = @@ -913,7 +951,7 @@ struct diagonal_product_evaluator_base : evaluator_base { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const { if (AsScalarProduct) return m_diagImpl.coeff(0) * m_matImpl.coeff(idx); else @@ -932,8 +970,9 @@ struct diagonal_product_evaluator_base : evaluator_base { enum { InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, DiagonalPacketLoadMode = plain_enum_min( - LoadMode, - ((InnerSize % 16) == 0) ? int(Aligned16) : int(evaluator::Alignment)) // FIXME hardcoded 16!! + LoadMode, ((InnerSize * int(sizeof(Scalar))) % int(unpacket_traits::alignment) == 0) + ? int(unpacket_traits::alignment) + : int(evaluator::Alignment)) }; return internal::pmul(m_matImpl.template packet(row, col), m_diagImpl.template packet(id)); @@ -952,8 +991,9 @@ struct diagonal_product_evaluator_base : evaluator_base { enum { InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, DiagonalPacketLoadMode = plain_enum_min( - LoadMode, - ((InnerSize % 16) == 0) ? int(Aligned16) : int(evaluator::Alignment)) // FIXME hardcoded 16!! + LoadMode, ((InnerSize * int(sizeof(Scalar))) % int(unpacket_traits::alignment) == 0) + ? int(unpacket_traits::alignment) + : int(evaluator::Alignment)) }; return internal::pmul(m_matImpl.template packetSegment(row, col, begin, count), m_diagImpl.template packetSegment(id, begin, count)); @@ -968,24 +1008,23 @@ template struct product_evaluator, ProductTag, DiagonalShape, DenseShape> : diagonal_product_evaluator_base, OnTheLeft> { - typedef diagonal_product_evaluator_base, - OnTheLeft> - Base; + using Base = + diagonal_product_evaluator_base, OnTheLeft>; using Base::coeff; using Base::m_diagImpl; using Base::m_matImpl; - typedef typename Base::Scalar Scalar; + using Scalar = typename Base::Scalar; - typedef Product XprType; - typedef typename XprType::PlainObject PlainObject; - typedef typename Lhs::DiagonalVectorType DiagonalType; + using XprType = Product; + using PlainObject = typename XprType::PlainObject; + using DiagonalType = typename Lhs::DiagonalVectorType; static constexpr int StorageOrder = Base::StorageOrder_; using IsRowMajor_t = bool_constant; - EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {} + EIGEN_DEVICE_FUNC constexpr explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const { return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col); } @@ -1023,23 +1062,22 @@ template struct product_evaluator, ProductTag, DenseShape, DiagonalShape> : diagonal_product_evaluator_base, OnTheRight> { - typedef diagonal_product_evaluator_base, - OnTheRight> - Base; + using Base = diagonal_product_evaluator_base, + OnTheRight>; using Base::coeff; using Base::m_diagImpl; using Base::m_matImpl; - typedef typename Base::Scalar Scalar; + using Scalar = typename Base::Scalar; - typedef Product XprType; - typedef typename XprType::PlainObject PlainObject; + using XprType = Product; + using PlainObject = typename XprType::PlainObject; static constexpr int StorageOrder = Base::StorageOrder_; using IsColMajor_t = bool_constant; - EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal()) {} + EIGEN_DEVICE_FUNC constexpr explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const { + EIGEN_DEVICE_FUNC constexpr EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const { return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col); } @@ -1081,8 +1119,8 @@ struct permutation_matrix_product; template struct permutation_matrix_product { - typedef typename nested_eval::type MatrixType; - typedef remove_all_t MatrixTypeCleaned; + using MatrixType = typename nested_eval::type; + using MatrixTypeCleaned = remove_all_t; template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm, @@ -1170,7 +1208,7 @@ struct generic_product_impl, MatrixShape, PermutationShape, Pr * Products with transpositions matrices ***************************************************************************/ -// FIXME could we unify Transpositions and Permutation into a single "shape"?? +// FIXME: consider unifying Transpositions and Permutation into a single shape. /** \internal * \class transposition_matrix_product @@ -1178,14 +1216,14 @@ struct generic_product_impl, MatrixShape, PermutationShape, Pr */ template struct transposition_matrix_product { - typedef typename nested_eval::type MatrixType; - typedef remove_all_t MatrixTypeCleaned; + using MatrixType = typename nested_eval::type; + using MatrixTypeCleaned = remove_all_t; template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr) { MatrixType mat(xpr); - typedef typename TranspositionType::StorageIndex StorageIndex; + using StorageIndex = typename TranspositionType::StorageIndex; const Index size = tr.size(); StorageIndex j = 0; @@ -1264,8 +1302,28 @@ struct generic_product_impl +struct generic_product_impl + : generic_product_impl {}; + +template +struct generic_product_impl + : generic_product_impl {}; + +template +struct generic_product_impl + : generic_product_impl {}; + +template +struct generic_product_impl + : generic_product_impl {}; + } // end namespace internal } // end namespace Eigen +#if EIGEN_COMP_MSVC +#pragma warning(pop) +#endif + #endif // EIGEN_PRODUCT_EVALUATORS_H diff --git a/Eigen/src/Core/Random.h b/Eigen/src/Core/Random.h index f8a54356250..91543ebb1a5 100644 --- a/Eigen/src/Core/Random.h +++ b/Eigen/src/Core/Random.h @@ -19,7 +19,7 @@ namespace internal { template struct scalar_random_op { - inline const Scalar operator()() const { return random(); } + inline Scalar operator()() const { return random(); } }; template @@ -51,7 +51,7 @@ struct functor_traits > { * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected * behavior with expressions involving random matrices. * - * See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators. + * See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using std random generators. * * \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random() */ diff --git a/Eigen/src/Core/RandomImpl.h b/Eigen/src/Core/RandomImpl.h index efba33680d7..4a622fc3733 100644 --- a/Eigen/src/Core/RandomImpl.h +++ b/Eigen/src/Core/RandomImpl.h @@ -56,19 +56,21 @@ struct random_bits_impl { EIGEN_STATIC_ASSERT(std::is_unsigned::value, SCALAR MUST BE A BUILT - IN UNSIGNED INTEGER) using RandomDevice = eigen_random_device; using RandomReturnType = typename RandomDevice::ReturnType; - static constexpr int kEntropy = RandomDevice::Entropy; static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT; + static constexpr int kEntropy = plain_enum_min(kTotalBits, RandomDevice::Entropy); // return a Scalar filled with numRandomBits beginning from the least significant bit static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { eigen_assert((numRandomBits >= 0) && (numRandomBits <= kTotalBits)); - const Scalar mask = Scalar(-1) >> ((kTotalBits - numRandomBits) & (kTotalBits - 1)); Scalar randomBits = 0; - for (int shift = 0; shift < numRandomBits; shift += kEntropy) { - RandomReturnType r = RandomDevice::run(); - randomBits |= static_cast(r) << shift; + for (int filledBits = 0; filledBits < numRandomBits; filledBits += kEntropy) { + Scalar r = static_cast(RandomDevice::run()); + int remainingBits = numRandomBits - filledBits; + if (remainingBits < kEntropy) { + // clear the excess bits to avoid UB and rounding bias + r >>= kEntropy - remainingBits; + } + randomBits |= r << filledBits; } - // clear the excess bits - randomBits &= mask; return randomBits; } }; @@ -131,8 +133,15 @@ struct random_longdouble_impl { uint64_t randomBits[2]; long double result = 2.0L; memcpy(&randomBits, &result, Size); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ randomBits[0] |= getRandomBits(numLowBits); randomBits[1] |= getRandomBits(numHighBits); +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + randomBits[0] |= getRandomBits(numHighBits); + randomBits[1] |= getRandomBits(numLowBits); +#else +#error Unexpected or undefined __BYTE_ORDER__ +#endif memcpy(&result, &randomBits, Size); result -= 3.0L; return result; @@ -197,7 +206,8 @@ struct random_int_impl { template struct random_int_impl { static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT; - using BitsType = typename make_unsigned::type; + // avoid implicit integral promotion to `int` + using BitsType = std::conditional_t<(sizeof(Scalar) < sizeof(int)), unsigned int, std::make_unsigned_t >; static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { if (y <= x) return x; // Avoid overflow by representing `range` as an unsigned type diff --git a/Eigen/src/Core/RealView.h b/Eigen/src/Core/RealView.h new file mode 100644 index 00000000000..59cd6d54ee6 --- /dev/null +++ b/Eigen/src/Core/RealView.h @@ -0,0 +1,292 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2025 Charlie Schlosser +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REALVIEW_H +#define EIGEN_REALVIEW_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +// Write access and vectorization requires array-oriented access to the real and imaginary components. +// From https://en.cppreference.com/w/cpp/numeric/complex.html: +// For any pointer to an element of an array of std::complex named p and any valid array index i, +// reinterpret_cast(p)[2 * i] is the real part of the complex number p[i], and +// reinterpret_cast(p)[2 * i + 1] is the imaginary part of the complex number p[i]. + +template +struct complex_array_access : std::false_type {}; +template +struct complex_array_access> : std::true_type {}; + +template +struct traits> : public traits { + template + static constexpr int double_size(T size, bool times_two) { + int size_as_int = int(size); + if (size_as_int == Dynamic) return Dynamic; + return times_two ? (2 * size_as_int) : size_as_int; + } + + using Base = traits; + using ComplexScalar = typename Base::Scalar; + using Scalar = typename NumTraits::Real; + + static constexpr bool ArrayAccess = complex_array_access::value; + static constexpr int ActualDirectAccessBit = ArrayAccess ? DirectAccessBit : 0; + static constexpr int ActualLvaluebit = !std::is_const::value && ArrayAccess ? LvalueBit : 0; + static constexpr int ActualPacketAccessBit = packet_traits::Vectorizable ? PacketAccessBit : 0; + static constexpr int FlagMask = + ActualDirectAccessBit | ActualLvaluebit | ActualPacketAccessBit | HereditaryBits | LinearAccessBit; + static constexpr int BaseFlags = int(evaluator::Flags) | int(Base::Flags); + static constexpr int Flags = BaseFlags & FlagMask; + static constexpr bool IsRowMajor = Flags & RowMajorBit; + static constexpr int RowsAtCompileTime = double_size(Base::RowsAtCompileTime, !IsRowMajor); + static constexpr int ColsAtCompileTime = double_size(Base::ColsAtCompileTime, IsRowMajor); + static constexpr int SizeAtCompileTime = size_at_compile_time(RowsAtCompileTime, ColsAtCompileTime); + static constexpr int MaxRowsAtCompileTime = double_size(Base::MaxRowsAtCompileTime, !IsRowMajor); + static constexpr int MaxColsAtCompileTime = double_size(Base::MaxColsAtCompileTime, IsRowMajor); + static constexpr int MaxSizeAtCompileTime = size_at_compile_time(MaxRowsAtCompileTime, MaxColsAtCompileTime); + static constexpr int OuterStrideAtCompileTime = double_size(outer_stride_at_compile_time::ret, true); + static constexpr int InnerStrideAtCompileTime = inner_stride_at_compile_time::ret; +}; + +template +struct evaluator> : private evaluator { + using BaseEvaluator = evaluator; + using XprType = RealView; + using ExpressionTraits = traits; + using ComplexScalar = typename ExpressionTraits::ComplexScalar; + using Scalar = typename ExpressionTraits::Scalar; + + static constexpr int Flags = ExpressionTraits::Flags; + static constexpr int CoeffReadCost = BaseEvaluator::CoeffReadCost; + static constexpr int Alignment = BaseEvaluator::Alignment; + static constexpr bool IsRowMajor = ExpressionTraits::IsRowMajor; + static constexpr bool DirectAccess = (Flags & DirectAccessBit) != 0; + + using ComplexCoeffReturnType = std::conditional_t; + using CoeffReturnType = std::conditional_t; + + EIGEN_DEVICE_FUNC explicit evaluator(XprType realView) : BaseEvaluator(realView.m_xpr) {} + + template = true> + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const { + Index r = IsRowMajor ? row : row / 2; + Index c = IsRowMajor ? col / 2 : col; + bool p = (IsRowMajor ? col : row) & 1; + ComplexScalar ccoeff = BaseEvaluator::coeff(r, c); + return p ? numext::imag(ccoeff) : numext::real(ccoeff); + } + template = true> + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + Index r = IsRowMajor ? row : row / 2; + Index c = IsRowMajor ? col / 2 : col; + Index p = (IsRowMajor ? col : row) & 1; + ComplexCoeffReturnType ccoeff = BaseEvaluator::coeff(r, c); + return reinterpret_cast(ccoeff)[p]; + } + template = true> + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index index) const { + ComplexScalar ccoeff = BaseEvaluator::coeff(index / 2); + bool p = index & 1; + return p ? numext::imag(ccoeff) : numext::real(ccoeff); + } + template = true> + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + ComplexCoeffReturnType ccoeff = BaseEvaluator::coeff(index / 2); + Index p = index & 1; + return reinterpret_cast(ccoeff)[p]; + } + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { + Index r = IsRowMajor ? row : row / 2; + Index c = IsRowMajor ? col / 2 : col; + Index p = (IsRowMajor ? col : row) & 1; + ComplexScalar& ccoeffRef = BaseEvaluator::coeffRef(r, c); + return reinterpret_cast(ccoeffRef)[p]; + } + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + ComplexScalar& ccoeffRef = BaseEvaluator::coeffRef(index / 2); + Index p = index & 1; + return reinterpret_cast(ccoeffRef)[p]; + } + + // If the first index is odd (imaginary), discard the first scalar + // in 'result' and assign the missing scalar. + // This operation is safe as the real component of the first scalar must exist. + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + constexpr int RealPacketSize = unpacket_traits::size; + using ComplexPacket = typename find_packet_by_size::type; + EIGEN_STATIC_ASSERT((find_packet_by_size::value), + MISSING COMPATIBLE COMPLEX PACKET TYPE) + Index r = IsRowMajor ? row : row / 2; + Index c = IsRowMajor ? col / 2 : col; + bool p = (IsRowMajor ? col : row) & 1; + ComplexPacket cresult = BaseEvaluator::template packet(r, c); + PacketType result = preinterpret(cresult); + if (p) { + Scalar aux[RealPacketSize + 1]; + pstoreu(aux, result); + Index lastr = IsRowMajor ? row : row + RealPacketSize - 1; + Index lastc = IsRowMajor ? col + RealPacketSize - 1 : col; + aux[RealPacketSize] = coeff(lastr, lastc); + result = ploadu(aux + 1); + } + return result; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + constexpr int RealPacketSize = unpacket_traits::size; + using ComplexPacket = typename find_packet_by_size::type; + EIGEN_STATIC_ASSERT((find_packet_by_size::value), + MISSING COMPATIBLE COMPLEX PACKET TYPE) + ComplexPacket cresult = BaseEvaluator::template packet(index / 2); + PacketType result = preinterpret(cresult); + bool p = index & 1; + if (p) { + Scalar aux[RealPacketSize + 1]; + pstoreu(aux, result); + aux[RealPacketSize] = coeff(index + RealPacketSize - 1); + result = ploadu(aux + 1); + } + return result; + } + + // The requested real packet segment forms the half-open interval [begin, end), where 'end' = 'begin' + 'count'. + // In order to access the underlying complex array, even indices must be aligned with the real components + // of the complex scalars. 'begin' and 'count' must be modified as follows: + // a) 'begin' must be rounded down to the nearest even number; and + // b) 'end' must be rounded up to the nearest even number. + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + constexpr int RealPacketSize = unpacket_traits::size; + using ComplexPacket = typename find_packet_by_size::type; + EIGEN_STATIC_ASSERT((find_packet_by_size::value), + MISSING COMPATIBLE COMPLEX PACKET TYPE) + Index actualBegin = numext::round_down(begin, 2); + Index actualEnd = numext::round_down(begin + count + 1, 2); + Index actualCount = actualEnd - actualBegin; + Index r = IsRowMajor ? row : row / 2; + Index c = IsRowMajor ? col / 2 : col; + ComplexPacket cresult = + BaseEvaluator::template packetSegment(r, c, actualBegin / 2, actualCount / 2); + PacketType result = preinterpret(cresult); + bool p = (IsRowMajor ? col : row) & 1; + if (p) { + Scalar aux[RealPacketSize + 1] = {}; + pstoreu(aux, result); + Index lastr = IsRowMajor ? row : row + actualEnd - 1; + Index lastc = IsRowMajor ? col + actualEnd - 1 : col; + aux[actualEnd] = coeff(lastr, lastc); + result = ploadu(aux + 1); + } + return result; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + constexpr int RealPacketSize = unpacket_traits::size; + using ComplexPacket = typename find_packet_by_size::type; + EIGEN_STATIC_ASSERT((find_packet_by_size::value), + MISSING COMPATIBLE COMPLEX PACKET TYPE) + Index actualBegin = numext::round_down(begin, 2); + Index actualEnd = numext::round_down(begin + count + 1, 2); + Index actualCount = actualEnd - actualBegin; + ComplexPacket cresult = + BaseEvaluator::template packetSegment(index / 2, actualBegin / 2, actualCount / 2); + PacketType result = preinterpret(cresult); + bool p = index & 1; + if (p) { + Scalar aux[RealPacketSize + 1] = {}; + pstoreu(aux, result); + aux[actualEnd] = coeff(index + actualEnd - 1); + result = ploadu(aux + 1); + } + return result; + } +}; + +} // namespace internal + +template +class RealView : public internal::dense_xpr_base>::type { + using ExpressionTraits = internal::traits; + EIGEN_STATIC_ASSERT(NumTraits::IsComplex, SCALAR MUST BE COMPLEX) + public: + using Scalar = typename ExpressionTraits::Scalar; + using Nested = RealView; + + EIGEN_DEVICE_FUNC explicit RealView(Xpr& xpr) : m_xpr(xpr) {} + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return Xpr::IsRowMajor ? m_xpr.rows() : 2 * m_xpr.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return Xpr::IsRowMajor ? 2 * m_xpr.cols() : m_xpr.cols(); } + EIGEN_DEVICE_FUNC constexpr Index size() const noexcept { return 2 * m_xpr.size(); } + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return m_xpr.innerStride(); } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return 2 * m_xpr.outerStride(); } + EIGEN_DEVICE_FUNC void resize(Index rows, Index cols) { + m_xpr.resize(Xpr::IsRowMajor ? rows : rows / 2, Xpr::IsRowMajor ? cols / 2 : cols); + } + EIGEN_DEVICE_FUNC void resize(Index size) { m_xpr.resize(size / 2); } + EIGEN_DEVICE_FUNC Scalar* data() { return reinterpret_cast(m_xpr.data()); } + EIGEN_DEVICE_FUNC const Scalar* data() const { return reinterpret_cast(m_xpr.data()); } + + EIGEN_DEVICE_FUNC RealView(const RealView&) = default; + + EIGEN_DEVICE_FUNC RealView& operator=(const RealView& other); + + template + EIGEN_DEVICE_FUNC RealView& operator=(const RealView& other); + + template + EIGEN_DEVICE_FUNC RealView& operator=(const DenseBase& other); + + protected: + friend struct internal::evaluator; + Xpr& m_xpr; +}; + +template +EIGEN_DEVICE_FUNC RealView& RealView::operator=(const RealView& other) { + internal::call_assignment(*this, other); + return *this; +} + +template +template +EIGEN_DEVICE_FUNC RealView& RealView::operator=(const RealView& other) { + internal::call_assignment(*this, other); + return *this; +} + +template +template +EIGEN_DEVICE_FUNC RealView& RealView::operator=(const DenseBase& other) { + internal::call_assignment(*this, other.derived()); + return *this; +} + +template +EIGEN_DEVICE_FUNC typename DenseBase::RealViewReturnType DenseBase::realView() { + return RealViewReturnType(derived()); +} + +template +EIGEN_DEVICE_FUNC typename DenseBase::ConstRealViewReturnType DenseBase::realView() const { + return ConstRealViewReturnType(derived()); +} + +} // namespace Eigen + +#endif // EIGEN_REALVIEW_H diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 4e9ab0e4f89..18ba1036452 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -101,7 +101,7 @@ struct redux_novec_unroller { typedef typename Evaluator::Scalar Scalar; - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func) { + EIGEN_DEVICE_FUNC static constexpr EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func) { return func(redux_novec_unroller::run(eval, func), redux_novec_unroller::run(eval, func)); } @@ -114,7 +114,7 @@ struct redux_novec_unroller { typedef typename Evaluator::Scalar Scalar; - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func&) { + EIGEN_DEVICE_FUNC static constexpr EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func&) { return eval.coeffByOuterInner(outer, inner); } }; @@ -125,7 +125,7 @@ struct redux_novec_unroller { template struct redux_novec_unroller { typedef typename Evaluator::Scalar Scalar; - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); } + EIGEN_DEVICE_FUNC static constexpr EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); } }; template @@ -134,7 +134,7 @@ struct redux_novec_linear_unroller { typedef typename Evaluator::Scalar Scalar; - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func) { + EIGEN_DEVICE_FUNC static constexpr EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func) { return func(redux_novec_linear_unroller::run(eval, func), redux_novec_linear_unroller::run(eval, func)); } @@ -144,7 +144,7 @@ template struct redux_novec_linear_unroller { typedef typename Evaluator::Scalar Scalar; - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func&) { + EIGEN_DEVICE_FUNC static constexpr EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func&) { return eval.coeff(Start); } }; @@ -155,7 +155,7 @@ struct redux_novec_linear_unroller { template struct redux_novec_linear_unroller { typedef typename Evaluator::Scalar Scalar; - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); } + EIGEN_DEVICE_FUNC static constexpr EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); } }; /*** vectorization ***/ @@ -367,7 +367,7 @@ struct redux_impl template EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func, const XprType& xpr) { - EIGEN_ONLY_USED_FOR_DEBUG(xpr) + EIGEN_ONLY_USED_FOR_DEBUG(xpr); eigen_assert(xpr.rows() > 0 && xpr.cols() > 0 && "you are using an empty matrix"); if (VectorizedSize > 0) { Scalar res = func.predux( @@ -398,8 +398,8 @@ class redux_evaluator : public internal::evaluator { enum { MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime, MaxColsAtCompileTime = XprType::MaxColsAtCompileTime, - // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime - // from the evaluator + // TODO: we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at + // runtime from the evaluator Flags = Base::Flags & ~DirectAccessBit, IsRowMajor = XprType::IsRowMajor, SizeAtCompileTime = XprType::SizeAtCompileTime, @@ -432,7 +432,7 @@ class redux_evaluator : public internal::evaluator { /** \returns the result of a full redux operation on the whole matrix or vector using \a func * * The template parameter \a BinaryOp is the type of the functor \a func which must be - * an associative operator. Both current C++98 and C++11 functor styles are handled. + * an associative operator. * * \warning the matrix must be not empty, otherwise an assertion is triggered. * diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index 30ec277d06e..4493441d036 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h @@ -43,7 +43,7 @@ struct traits > OuterStrideMatch = IsVectorAtCompileTime || int(OuterStrideAtCompileTime) == int(Dynamic) || int(OuterStrideAtCompileTime) == int(Derived::OuterStrideAtCompileTime), // NOTE, this indirection of evaluator::Alignment is needed - // to workaround a very strange bug in MSVC related to the instantiation + // to work around an MSVC bug related to the instantiation // of has_*ary_operator in evaluator. // This line is surprisingly very sensitive. For instance, simply adding parenthesis // as "DerivedAlignment = (int(evaluator::Alignment))," will make MSVC fail... @@ -265,7 +265,7 @@ class Ref : public RefBase > { private: typedef internal::traits Traits; template - EIGEN_DEVICE_FUNC inline Ref( + EIGEN_DEVICE_FUNC constexpr inline Ref( const PlainObjectBase& expr, std::enable_if_t::MatchAtCompileTime), Derived>* = 0); @@ -275,17 +275,17 @@ class Ref : public RefBase > { #ifndef EIGEN_PARSED_BY_DOXYGEN template - EIGEN_DEVICE_FUNC inline Ref( + EIGEN_DEVICE_FUNC constexpr inline Ref( PlainObjectBase& expr, std::enable_if_t::MatchAtCompileTime), Derived>* = 0) { EIGEN_STATIC_ASSERT(bool(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); // Construction must pass since we will not create temporary storage in the non-const case. const bool success = Base::construct(expr.derived()); - EIGEN_UNUSED_VARIABLE(success) + EIGEN_UNUSED_VARIABLE(success); eigen_assert(success); } template - EIGEN_DEVICE_FUNC inline Ref( + EIGEN_DEVICE_FUNC constexpr inline Ref( const DenseBase& expr, std::enable_if_t::MatchAtCompileTime), Derived>* = 0) #else @@ -299,7 +299,7 @@ class Ref : public RefBase > { EIGEN_STATIC_ASSERT(!Derived::IsPlainObjectBase, THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); // Construction must pass since we will not create temporary storage in the non-const case. const bool success = Base::construct(expr.const_cast_derived()); - EIGEN_UNUSED_VARIABLE(success) + EIGEN_UNUSED_VARIABLE(success); eigen_assert(success); } @@ -327,8 +327,9 @@ class Ref EIGEN_DENSE_PUBLIC_INTERFACE(Ref) template - EIGEN_DEVICE_FUNC inline Ref(const DenseBase& expr, - std::enable_if_t::ScalarTypeMatch), Derived>* = 0) { + EIGEN_DEVICE_FUNC constexpr inline Ref( + const DenseBase& expr, + std::enable_if_t::ScalarTypeMatch), Derived>* = 0) { // std::cout << match_helper::HasDirectAccess << "," << match_helper::OuterStrideMatch << "," // << match_helper::InnerStrideMatch << "\n"; std::cout << int(StrideType::OuterStrideAtCompileTime) // << " - " << int(Derived::OuterStrideAtCompileTime) << "\n"; std::cout << @@ -338,11 +339,11 @@ class Ref construct(expr.derived(), typename Traits::template match::type()); } - EIGEN_DEVICE_FUNC inline Ref(const Ref& other) : Base(other) { + EIGEN_DEVICE_FUNC constexpr inline Ref(const Ref& other) : Base(other) { // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy } - EIGEN_DEVICE_FUNC inline Ref(Ref&& other) { + EIGEN_DEVICE_FUNC constexpr inline Ref(Ref&& other) { if (other.data() == other.m_object.data()) { m_object = std::move(other.m_object); Base::construct(m_object); @@ -351,7 +352,7 @@ class Ref } template - EIGEN_DEVICE_FUNC inline Ref(const RefBase& other) { + EIGEN_DEVICE_FUNC constexpr inline Ref(const RefBase& other) { EIGEN_STATIC_ASSERT(Traits::template match::type::value || may_map_m_object_successfully, STORAGE_LAYOUT_DOES_NOT_MATCH); construct(other.derived(), typename Traits::template match::type()); @@ -370,7 +371,7 @@ class Ref EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type) { internal::call_assignment_no_alias(m_object, expr, internal::assign_op()); const bool success = Base::construct(m_object); - EIGEN_ONLY_USED_FOR_DEBUG(success) + EIGEN_ONLY_USED_FOR_DEBUG(success); eigen_assert(success); } diff --git a/Eigen/src/Core/Replicate.h b/Eigen/src/Core/Replicate.h index 34150452278..9bdc725c796 100644 --- a/Eigen/src/Core/Replicate.h +++ b/Eigen/src/Core/Replicate.h @@ -30,7 +30,7 @@ struct traits > : traits ColsAtCompileTime = ColFactor == Dynamic || int(MatrixType::ColsAtCompileTime) == Dynamic ? Dynamic : ColFactor * MatrixType::ColsAtCompileTime, - // FIXME we don't propagate the max sizes !!! + // FIXME: propagate MaxRowsAtCompileTime and MaxColsAtCompileTime. MaxRowsAtCompileTime = RowsAtCompileTime, MaxColsAtCompileTime = ColsAtCompileTime, IsRowMajor = MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1 ? 1 @@ -38,7 +38,7 @@ struct traits > : traits : (MatrixType::Flags & RowMajorBit) ? 1 : 0, - // FIXME enable DirectAccess with negative strides? + // FIXME: consider enabling DirectAccess with negative strides. Flags = IsRowMajor ? RowMajorBit : 0 }; }; @@ -71,7 +71,7 @@ class Replicate : public internal::dense_xpr_base NestedExpression; template - EIGEN_DEVICE_FUNC inline explicit Replicate(const OriginalMatrixType& matrix) + EIGEN_DEVICE_FUNC constexpr inline explicit Replicate(const OriginalMatrixType& matrix) : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor) { EIGEN_STATIC_ASSERT((internal::is_same, OriginalMatrixType>::value), THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) @@ -79,7 +79,7 @@ class Replicate : public internal::dense_xpr_base - EIGEN_DEVICE_FUNC inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor) + EIGEN_DEVICE_FUNC constexpr inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor) : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor) { EIGEN_STATIC_ASSERT((internal::is_same, OriginalMatrixType>::value), THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) @@ -88,7 +88,7 @@ class Replicate : public internal::dense_xpr_base public: typedef Impl Base; EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ReshapedImpl) - EIGEN_DEVICE_FUNC inline ReshapedImpl(XprType& xpr) : Impl(xpr) {} - EIGEN_DEVICE_FUNC inline ReshapedImpl(XprType& xpr, Index reshapeRows, Index reshapeCols) + EIGEN_DEVICE_FUNC constexpr inline ReshapedImpl(XprType& xpr) : Impl(xpr) {} + EIGEN_DEVICE_FUNC constexpr inline ReshapedImpl(XprType& xpr, Index reshapeRows, Index reshapeCols) : Impl(xpr, reshapeRows, reshapeCols) {} }; @@ -161,15 +161,15 @@ class ReshapedImpl_dense /** Fixed-size constructor */ - EIGEN_DEVICE_FUNC inline ReshapedImpl_dense(XprType& xpr) : m_xpr(xpr), m_rows(Rows), m_cols(Cols) {} + EIGEN_DEVICE_FUNC constexpr inline ReshapedImpl_dense(XprType& xpr) : m_xpr(xpr), m_rows(Rows), m_cols(Cols) {} /** Dynamic-size constructor */ - EIGEN_DEVICE_FUNC inline ReshapedImpl_dense(XprType& xpr, Index nRows, Index nCols) + EIGEN_DEVICE_FUNC constexpr inline ReshapedImpl_dense(XprType& xpr, Index nRows, Index nCols) : m_xpr(xpr), m_rows(nRows), m_cols(nCols) {} - EIGEN_DEVICE_FUNC Index rows() const { return m_rows; } - EIGEN_DEVICE_FUNC Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_cols; } #ifdef EIGEN_PARSED_BY_DOXYGEN /** \sa MapBase::data() */ @@ -179,10 +179,10 @@ class ReshapedImpl_dense #endif /** \returns the nested expression */ - EIGEN_DEVICE_FUNC const internal::remove_all_t& nestedExpression() const { return m_xpr; } + EIGEN_DEVICE_FUNC constexpr const internal::remove_all_t& nestedExpression() const { return m_xpr; } /** \returns the nested expression */ - EIGEN_DEVICE_FUNC std::remove_reference_t& nestedExpression() { return m_xpr; } + EIGEN_DEVICE_FUNC constexpr std::remove_reference_t& nestedExpression() { return m_xpr; } protected: MatrixTypeNested m_xpr; @@ -203,16 +203,16 @@ class ReshapedImpl_dense : public MapBase& nestedExpression() const { return m_xpr; } + EIGEN_DEVICE_FUNC constexpr const internal::remove_all_t& nestedExpression() const { return m_xpr; } - EIGEN_DEVICE_FUNC XprType& nestedExpression() { return m_xpr; } + EIGEN_DEVICE_FUNC constexpr XprType& nestedExpression() { return m_xpr; } /** \sa MapBase::innerStride() */ EIGEN_DEVICE_FUNC constexpr Index innerStride() const { return m_xpr.innerStride(); } @@ -265,7 +265,7 @@ struct evaluator > Alignment = evaluator::Alignment }; typedef reshaped_evaluator reshaped_evaluator_type; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : reshaped_evaluator_type(xpr) { + EIGEN_DEVICE_FUNC constexpr explicit evaluator(const XprType& xpr) : reshaped_evaluator_type(xpr) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } }; @@ -283,7 +283,8 @@ struct reshaped_evaluator RowCol; - EIGEN_DEVICE_FUNC inline RowCol index_remap(Index rowId, Index colId) const { + EIGEN_DEVICE_FUNC constexpr inline RowCol index_remap(Index rowId, Index colId) const { if (Order == ColMajor) { const Index nth_elem_idx = colId * m_xpr.rows() + rowId; return RowCol(nth_elem_idx % m_xpr.nestedExpression().rows(), nth_elem_idx / m_xpr.nestedExpression().rows()); @@ -302,74 +303,38 @@ struct reshaped_evaluator - inline PacketScalar packet(Index rowId, Index colId) const - { - const RowCol row_col = index_remap(rowId, colId); - return m_argImpl.template packet(row_col.first, row_col.second); - - } - - template - EIGEN_DEVICE_FUNC - inline void writePacket(Index rowId, Index colId, const PacketScalar& val) - { - const RowCol row_col = index_remap(rowId, colId); - m_argImpl.const_cast_derived().template writePacket - (row_col.first, row_col.second, val); - } - - template - EIGEN_DEVICE_FUNC - inline PacketScalar packet(Index index) const - { - const RowCol row_col = index_remap(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); - return m_argImpl.template packet(row_col.first, row_col.second); - } - template - EIGEN_DEVICE_FUNC - inline void writePacket(Index index, const PacketScalar& val) - { - const RowCol row_col = index_remap(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); - return m_argImpl.template packet(row_col.first, row_col.second, val); - } -#endif protected: evaluator m_argImpl; const XprType& m_xpr; @@ -382,10 +347,8 @@ struct reshaped_evaluator XprType; typedef typename XprType::Scalar Scalar; - EIGEN_DEVICE_FUNC explicit reshaped_evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC constexpr explicit reshaped_evaluator(const XprType& xpr) : mapbase_evaluator(xpr) { - // TODO: for the 3.4 release, this should be turned to an internal assertion, but let's keep it as is for the beta - // lifetime eigen_assert(((std::uintptr_t(xpr.data()) % plain_enum_max(1, evaluator::Alignment)) == 0) && "data is not aligned"); } diff --git a/Eigen/src/Core/ReturnByValue.h b/Eigen/src/Core/ReturnByValue.h index 892c193bd31..410b77d2e2d 100644 --- a/Eigen/src/Core/ReturnByValue.h +++ b/Eigen/src/Core/ReturnByValue.h @@ -23,7 +23,7 @@ struct traits > : public traits: enum { // We're disabling the DirectAccess because e.g. the constructor of // the Block-with-DirectAccess expression requires to have a coeffRef method. - // Also, we don't want to have to implement the stride stuff. + // Also, this avoids having to implement stride support. Flags = (traits::ReturnType>::Flags | EvalBeforeNestingBit) & ~DirectAccessBit }; }; @@ -32,7 +32,7 @@ struct traits > : public traits: * So the only way that nesting it in an expression can work, is by evaluating it into a plain matrix. * So internal::nested always gives the plain return matrix type. * - * FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ?? + * FIXME: this specialization may be redundant with EvalBeforeNestingBit. * Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators */ template diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h index d11ba16708b..a4af8d11598 100644 --- a/Eigen/src/Core/Reverse.h +++ b/Eigen/src/Core/Reverse.h @@ -83,7 +83,7 @@ class Reverse : public internal::dense_xpr_base > typedef internal::reverse_packet_cond reverse_packet; public: - EIGEN_DEVICE_FUNC explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) {} + EIGEN_DEVICE_FUNC constexpr explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) {} EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse) @@ -92,7 +92,7 @@ class Reverse : public internal::dense_xpr_base > EIGEN_DEVICE_FUNC inline Index innerStride() const { return -m_matrix.innerStride(); } - EIGEN_DEVICE_FUNC const internal::remove_all_t& nestedExpression() const { + EIGEN_DEVICE_FUNC constexpr const internal::remove_all_t& nestedExpression() const { return m_matrix; } diff --git a/Eigen/src/Core/Select.h b/Eigen/src/Core/Select.h index 0fa5f1e178e..6ad290f23a6 100644 --- a/Eigen/src/Core/Select.h +++ b/Eigen/src/Core/Select.h @@ -15,7 +15,7 @@ namespace Eigen { -/** \class Select +/** \typedef Select * \ingroup Core_Module * * \brief Expression of a coefficient wise version of the C++ ternary operator ?: @@ -24,73 +24,16 @@ namespace Eigen { * \tparam ThenMatrixType the type of the \em then expression * \tparam ElseMatrixType the type of the \em else expression * - * This class represents an expression of a coefficient wise version of the C++ ternary operator ?:. + * This type represents an expression of a coefficient wise version of the C++ ternary operator ?:. * It is the return type of DenseBase::select() and most of the time this is the only way it is used. * * \sa DenseBase::select(const DenseBase&, const DenseBase&) const */ - -namespace internal { -template -struct traits > : traits { - typedef typename traits::Scalar Scalar; - typedef Dense StorageKind; - typedef typename traits::XprKind XprKind; - typedef typename ConditionMatrixType::Nested ConditionMatrixNested; - typedef typename ThenMatrixType::Nested ThenMatrixNested; - typedef typename ElseMatrixType::Nested ElseMatrixNested; - enum { - RowsAtCompileTime = ConditionMatrixType::RowsAtCompileTime, - ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime, - MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime, - MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime, - Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit - }; -}; -} // namespace internal - template -class Select : public internal::dense_xpr_base >::type, - internal::no_assignment_operator { - public: - typedef typename internal::dense_xpr_base