diff --git a/source/module_base/blas_connector.h b/source/module_base/blas_connector.h index e7b78bab06..090d8512ee 100644 --- a/source/module_base/blas_connector.h +++ b/source/module_base/blas_connector.h @@ -39,6 +39,20 @@ extern "C" double dnrm2_( const int *n, const double *X, const int *incX ); double dznrm2_( const int *n, const std::complex *X, const int *incX ); + // symmetric rank-k update + void dsyrk_( + const char* uplo, + const char* trans, + const int* n, + const int* k, + const double* alpha, + const double* a, + const int* lda, + const double* beta, + double* c, + const int* ldc + ); + // level 2: matrix-std::vector operations, O(n^2) data and O(n^2) work. void sgemv_(const char*const transa, const int*const m, const int*const n, const float*const alpha, const float*const a, const int*const lda, const float*const x, const int*const incx, @@ -267,4 +281,4 @@ void zgemv_i(const char *trans, */ #endif // GATHER_INFO -#endif // BLAS_CONNECTOR_H \ No newline at end of file +#endif // BLAS_CONNECTOR_H diff --git a/source/module_base/grid/batch.cpp b/source/module_base/grid/batch.cpp new file mode 100644 index 0000000000..832323e62a --- /dev/null +++ b/source/module_base/grid/batch.cpp @@ -0,0 +1,123 @@ +#include "module_base/grid/batch.h" + +#include +#include +#include + +#include "module_base/blas_connector.h" +#include "module_base/lapack_connector.h" + +namespace { + +/** + * @brief Divide a set of points into two subsets by the "MaxMin" algorithm. + * + * This function divides a given set of grid points by a cut plane + * {x|n^T*(x-c) = 0} where the normal vector n and the point c are + * determined by the "MaxMin" problem: + * + * max min sum_{i=1}^{m} [n^T * (r[idx[i]] - c)]^2 + * n c + * + * here r[j] = (grid[3*j], grid[3*j+1], grid[3*j+2]) is the position of + * the j-th point. + * + * It can be shown that the optimal c is the centroid of the points, and + * the optimal n is the eigenvector corresponding to the largest eigenvalue + * of the matrix R*R^T, where the i-th column of R is r[idx[i]] - c. + * + * @param[in] grid Coordinates of all grid points. + * grid[3*j], grid[3*j+1], grid[3*j+2] are the + * x, y, z coordinates of the j-th point. + * @param[in,out] idx Indices of the selected points within grid. + * On return, idx will be rearranged such that + * points belonging to the same subset have their + * indices placed together. + * @param[in] m Number of selected points (length of idx). + * + * @return The number of points in the first subset within idx. + * + */ +int _maxmin_divide(const double* grid, int* idx, int m) { + assert(m > 1); + if (m == 2) { + return 1; + } + + std::vector centroid(3, 0.0); + for (int i = 0; i < m; ++i) { + int j = idx[i]; + centroid[0] += grid[3*j ]; + centroid[1] += grid[3*j + 1]; + centroid[2] += grid[3*j + 2]; + } + centroid[0] /= m; + centroid[1] /= m; + centroid[2] /= m; + + // positions w.r.t. the centroid + std::vector R(3*m, 0.0); + for (int i = 0; i < m; ++i) { + int j = idx[i]; + R[3*i ] = grid[3*j ] - centroid[0]; + R[3*i + 1] = grid[3*j + 1] - centroid[1]; + R[3*i + 2] = grid[3*j + 2] - centroid[2]; + } + + // The normal vector of the cut plane is taken to be the eigenvector + // corresponding to the largest eigenvalue of the 3x3 matrix A = R*R^T. + std::vector A(9, 0.0); + int i3 = 3, i1 = 1; + double d0 = 0.0, d1 = 1.0; + dsyrk_("U", "N", &i3, &m, &d1, R.data(), &i3, &d0, A.data(), &i3); + + int info = 0, lwork = 102 /* determined by a work space query */; + std::vector e(3), work(lwork); + dsyev_("V", "U", &i3, A.data(), &i3, e.data(), work.data(), &lwork, &info); + double* n = A.data() + 6; // normal vector of the cut plane + + // Rearrange the indices to put points in each subset together by + // examining the signed distances of points to the cut plane (R^T*n). + std::vector dist(m); + dgemv_("T", &i3, &m, &d1, R.data(), &i3, n, &i1, &d0, dist.data(), &i1); + + int *head = idx; + std::reverse_iterator tail(idx + m), rend(idx); + auto is_negative = [&dist, &idx](int& j) { return dist[&j - idx] < 0; }; + while ( ( head = std::find_if(head, idx + m, is_negative) ) < + ( tail = std::find_if_not(tail, rend, is_negative) ).base() ) { + std::swap(*head, *tail); + std::swap(dist[head - idx], dist[tail.base() - idx - 1]); + ++head; + ++tail; + } + + return head - idx; +} + +} // end of anonymous namespace + + +std::vector Grid::Batch::maxmin( + const double* grid, + int* idx, + int m, + int m_thr +) { + if (m <= m_thr) { + return std::vector{0}; + } + + int m_left = _maxmin_divide(grid, idx, m); + + std::vector left = maxmin(grid, idx, m_left, m_thr); + std::vector right = maxmin(grid, idx + m_left, m - m_left, m_thr); + std::for_each(right.begin(), right.end(), + [m_left](int& x) { x += m_left; } + ); + + left.insert(left.end(), right.begin(), right.end()); + return left; +} + + diff --git a/source/module_base/grid/batch.h b/source/module_base/grid/batch.h new file mode 100644 index 0000000000..8a6ef733e1 --- /dev/null +++ b/source/module_base/grid/batch.h @@ -0,0 +1,54 @@ +#ifndef GRID_BATCH_H +#define GRID_BATCH_H + +#include + +namespace Grid { +namespace Batch { + +/** + * @brief Divide a set of points into batches by the "MaxMin" algorithm. + * + * This function recursively uses cut planes to divide grid points into + * two subsets using the "MaxMin" algorithm, until the number of points + * in each subset (batch) is no more than m_thr. + * + * @param[in] grid Coordinates of all grid points. + * grid[3*j], grid[3*j+1], grid[3*j+2] are + * the x, y, z coordinates of the j-th point. + * @param[in,out] idx Indices of the initial set within grid. + * On return, idx will be rearranged such + * that points belonging to the same batch + * have their indices placed together. + * @param[in] m Number of points in the initial set. + * (length of idx) + * @param[in] m_thr Size limit of a batch. + * + * @return Indices (for idx) of the first point in each batch. + * + * For example, given grid (illustrated by their indices) located as follows: + * + * 0 1 16 2 3 18 + * 4 5 17 6 7 + * + * + * 8 9 20 10 11 + * 12 13 19 14 15 + * + * a possible outcome with m_thr = 4 and idx(in) = {0, 1, 2, ..., 15} + * (idx may correspond to a subset of grid and does not have to be sorted, + * but it must not contain duplicates) is: + * + * idx(out): 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 + * return : {0, 4, 8, 12} + * + * which means the selected set (labeled 0-15) is divided into 4 batches: + * {0, 1, 4, 5}, {8, 9, 12, 13}, {2, 3, 6, 7}, {10, 11, 14, 15}. + * + */ +std::vector maxmin(const double* grid, int* idx, int m, int m_thr); + +} // end of namespace Batch +} // end of namespace Grid + +#endif diff --git a/source/module_base/grid/test/CMakeLists.txt b/source/module_base/grid/test/CMakeLists.txt index e403c8c2c6..a0f1b6fa9f 100644 --- a/source/module_base/grid/test/CMakeLists.txt +++ b/source/module_base/grid/test/CMakeLists.txt @@ -20,3 +20,10 @@ AddTest( ../radial.cpp ../delley.cpp ) + +AddTest( + TARGET test_batch + SOURCES test_batch.cpp + ../batch.cpp + LIBS ${math_libs} +) diff --git a/source/module_base/grid/test/test_batch.cpp b/source/module_base/grid/test/test_batch.cpp new file mode 100644 index 0000000000..74324bc47b --- /dev/null +++ b/source/module_base/grid/test/test_batch.cpp @@ -0,0 +1,234 @@ +#include "module_base/grid/batch.h" + +#include "gtest/gtest.h" +#include +#include +//#include + +#ifdef __MPI +#include +#endif + +using namespace Grid::Batch; + + +class BatchTest: public ::testing::Test +{ +protected: + + std::vector grid_; + std::vector idx_; + + // parameters for 8-octant clusters + const int n_batch_oct_ = 10; + const double width_oct_ = 1.0; + const double offset_x_ = 7.0; + const double offset_y_ = 8.0; + const double offset_z_ = 9.0; + // NOTE: These offsets should be different from each other as maxmin + // might fail for highly symmetric, well-separated clusters. + // Consider the case where the 8 clusters as a whole have octahedral + // symmetry. In this case, R*R^T must be proprotional to the identity, + // and eigenvalues are three-fold degenerate, because xy, yz and zx + // plane are equivalent in terms of the maxmin optimization problem. + // This means eigenvectors are arbitrary in this case. + + + // parameters for a random cluster + const int n_grid_rand_ = 1000; + const int n_batch_rand_ = 200; + const double width_rand_ = 10.0; + const double xc_ = 1.0; + const double yc_ = 1.0; + const double zc_ = 2.0; +}; + + +void gen_random( + int ngrid, + double xc, + double yc, + double zc, + double width, + std::vector& grid, + std::vector& idx +) { + + // Generates a set of points centered around (xc, yc, zc). + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dis(-width, width); + + grid.resize(3 * ngrid); + for (int i = 0; i < ngrid; ++i) { + grid[3*i ] = xc + dis(gen); + grid[3*i + 1] = yc + dis(gen); + grid[3*i + 2] = zc + dis(gen); + } + + idx.resize(ngrid); + std::iota(idx.begin(), idx.end(), 0); + std::shuffle(idx.begin(), idx.end(), gen); +} + + +void gen_octant( + int n_each, + double offset_x, + double offset_y, + double offset_z, + double width, + std::vector& grid, + std::vector& idx +) { + + // Generates a set of points consisting of 8 well-separated, equal-sized + // clusters located in individual octants. + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dis(-width, width); + + int ngrid = 8 * n_each; + grid.resize(3 * ngrid); + int I = 0; + for (int sign_x : {-1, 1}) { + for (int sign_y : {-1, 1}) { + for (int sign_z : {-1, 1}) { + for (int i = 0; i < n_each; ++i, ++I) { + grid[3*I ] = sign_x * offset_x + dis(gen); + grid[3*I + 1] = sign_y * offset_y + dis(gen); + grid[3*I + 2] = sign_z * offset_z + dis(gen); + } + } + } + } + + idx.resize(ngrid); + std::iota(idx.begin(), idx.end(), 0); + std::shuffle(idx.begin(), idx.end(), gen); +} + + +bool is_same_octant(int ngrid, const double* grid) { + if (ngrid == 0) { + return true; + } + const bool is_positive_x = grid[0] > 0; + const bool is_positive_y = grid[1] > 0; + const bool is_positive_z = grid[2] > 0; + const double* end = grid + 3 * ngrid; + for (; grid != end; grid += 3) { + if ( is_positive_x != (grid[0] > 0) || + is_positive_y != (grid[1] > 0) || + is_positive_z != (grid[2] > 0) ) { + return false; + } + } + return true; +} + + +bool good_batch_size( + const std::vector& idx, + const std::vector& delim, + int n_batch_thr +) { + // checks if the sizes of batches are within the specified limit + + bool flag = (delim[0] == 0); + + size_t i = 1; + while (flag && i < delim.size()) { + int sz_batch = delim[i] - delim[i-1]; + flag = flag && (sz_batch > 0) && (sz_batch <= n_batch_thr); + ++i; + } + + return flag && ( ((int)idx.size() - delim.back()) < n_batch_thr ); +} + + +TEST_F(BatchTest, MaxMinRandom) +{ + // This test verifies that the sizes of batches produced by maxmin + // do not exceed the specified limit. + + gen_random(n_grid_rand_, xc_, yc_, zc_, width_rand_, grid_, idx_); + + std::vector delim = + maxmin(grid_.data(), idx_.data(), idx_.size(), n_batch_rand_); + + EXPECT_TRUE(good_batch_size(idx_, delim, n_batch_rand_)); + + // write grid, idx & delim to file + //FILE* fp = fopen("grid.dat", "w"); + //for (size_t i = 0; i < grid_.size() / 3; ++i) { + // std::fprintf(fp, "% 12.6f % 12.6f % 12.6f\n", + // grid_[3*i], grid_[3*i + 1], grid_[3*i + 2]); + //} + //fclose(fp); + + //fp = fopen("idx.dat", "w"); + //for (size_t i = 0; i < idx_.size(); ++i) { + // std::fprintf(fp, "%d\n", idx_[i]); + //} + //fclose(fp); + + //fp = fopen("delim.dat", "w"); + //for (size_t i = 0; i < delim.size(); ++i) { + // std::fprintf(fp, "%d\n", delim[i]); + //} + //fclose(fp); +} + + +TEST_F(BatchTest, MaxMinOctant) +{ + // This test applies maxmin to a set of points consisting of 8 + // well-separated, equal-sized clusters located in individual octants. + // The resulting batches should be able to recover this structure. + + gen_octant(n_batch_oct_, offset_x_, offset_y_, offset_z_, width_oct_, + grid_, idx_); + + std::vector delim = + maxmin(grid_.data(), idx_.data(), idx_.size(), n_batch_oct_); + + EXPECT_EQ(delim.size(), 8); + + std::vector grid_batch(3 * n_batch_oct_); + for (int i = 0; i < 8; ++i) { + + EXPECT_EQ(delim[i], i * n_batch_oct_); + + // collect points within the present batch + for (int j = 0; j < n_batch_oct_; ++j) { + int ig = idx_[delim[i] + j]; + grid_batch[3*j ] = grid_[3*ig ]; + grid_batch[3*j + 1] = grid_[3*ig + 1]; + grid_batch[3*j + 2] = grid_[3*ig + 2]; + } + + // verify that points in a batch reside in the same octant + EXPECT_TRUE(is_same_octant(n_batch_oct_, grid_batch.data())); + } +} + + +int main(int argc, char** argv) +{ +#ifdef __MPI + MPI_Init(&argc, &argv); +#endif + + testing::InitGoogleTest(&argc, argv); + int result = RUN_ALL_TESTS(); + +#ifdef __MPI + MPI_Finalize(); +#endif + + return result; +}