Skip to content

Commit f8b00a2

Browse files
committed
complete!
1 parent 0369832 commit f8b00a2

File tree

4 files changed

+78
-39
lines changed

4 files changed

+78
-39
lines changed

source/module_base/grid/batch.cpp

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,22 +24,21 @@ namespace {
2424
*
2525
* It can be shown that the optimal c is the centroid of the points, and
2626
* the optimal n is the eigenvector corresponding to the largest eigenvalue
27-
* of the matrix R*R^T, where R is the matrix whose i-th column is
28-
* r[idx[i]] - c.
27+
* of the matrix R*R^T, where the i-th column of R is r[idx[i]] - c.
2928
*
30-
* @param[in] m Number of selected points (length of idx).
3129
* @param[in] grid Coordinates of all grid points.
3230
* grid[3*j], grid[3*j+1], grid[3*j+2] are the
3331
* x, y, z coordinates of the j-th point.
34-
* The length of grid is at least 3*m.
3532
* @param[in,out] idx Indices of the selected points within grid.
36-
* On exit, the indices are rearranged such that
37-
* points in each subset are put together.
33+
* On return, idx will be rearranged such that
34+
* points belonging to the same subset have their
35+
* indices placed together.
36+
* @param[in] m Number of selected points (length of idx).
3837
*
3938
* @return The number of points in the first subset (according to idx).
4039
*
4140
*/
42-
int _maxmin_divide(int m, const double* grid, int* idx) {
41+
int _maxmin_divide(const double* grid, int* idx, int m) {
4342
assert(m > 1);
4443
if (m == 2) {
4544
return 1;
@@ -84,16 +83,16 @@ int _maxmin_divide(int m, const double* grid, int* idx) {
8483

8584
int *head = idx;
8685
std::reverse_iterator<int*> tail(idx + m), rend(idx);
87-
auto is_negative = [&dist](int j) { return dist[j] < 0; };
86+
auto is_negative = [&dist, &idx](int& j) { return dist[&j - idx] < 0; };
8887
while ( ( head = std::find_if(head, idx + m, is_negative) ) <
8988
( tail = std::find_if_not(tail, rend, is_negative) ).base() ) {
9089
std::swap(*head, *tail);
91-
std::swap(dist[head - idx], dist[tail.base() - idx]);
90+
std::swap(dist[head - idx], dist[tail.base() - idx - 1]);
9291
++head;
9392
++tail;
9493
}
9594

96-
return std::find(idx, idx + m, is_negative) - idx;
95+
return head - idx;
9796
}
9897

9998
} // end of anonymous namespace
@@ -109,7 +108,7 @@ std::vector<int> Grid::Batch::maxmin(
109108
return std::vector<int>{0};
110109
}
111110

112-
int m_left = _maxmin_divide(m, grid, idx);
111+
int m_left = _maxmin_divide(grid, idx, m);
113112

114113
std::vector<int> left = maxmin(grid, idx, m_left, m_thr);
115114
std::vector<int> right = maxmin(grid, idx + m_left, m - m_left, m_thr);

source/module_base/grid/batch.h

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,39 @@ namespace Batch {
99
/**
1010
* @brief Divide a set of points into batches by the "MaxMin" algorithm.
1111
*
12-
* This function recursively uses a cut plane to divide a set of grid
13-
* points into two subsets using the "MaxMin" algorithm, until the
14-
* number of points in each subset (batch) is no more than m_thr.
12+
* This function recursively uses cut planes to divide grid points into
13+
* two subsets using the "MaxMin" algorithm, until the number of points
14+
* in each subset (batch) is no more than m_thr.
1515
*
1616
* @param[in] grid Coordinates of all grid points.
17+
* grid[3*j], grid[3*j+1], grid[3*j+2] are
18+
* the x, y, z coordinates of the j-th point.
1719
* @param[in,out] idx Indices of the initial set within grid.
1820
* On return, idx will be rearranged such
19-
* that points belonging to the same subset
20-
* are grouped together.
21+
* that points belonging to the same batch
22+
* have their indices placed together.
2123
* @param[in] m Number of points in the initial set.
22-
* @param[in] m_thr Size limit of subset.
24+
* (length of idx)
25+
* @param[in] m_thr Size limit of a batch.
2326
*
24-
* @return Indices (within idx) of the first point in each batch.
27+
* @return Indices (for idx) of the first point in each batch.
28+
*
29+
* For example, given grid (illustrated by their indices) located as follows:
30+
*
31+
* 0 1 16 2 3 18
32+
* 4 5 17 6 7
33+
*
34+
*
35+
* 8 9 20 10 11
36+
* 12 13 19 14 15
37+
*
38+
* a possible outcome with m_thr = 4 and idx(in) = {0, 1, 2, ..., 15} is:
39+
*
40+
* idx(out): 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15
41+
* return : {0, 4, 8, 12}
42+
*
43+
* which means the selected set (labeled 0-15) is divided into 4 batches:
44+
* {0, 1, 4, 5}, {8, 9, 12, 13}, {2, 3, 6, 7}, {10, 11, 14, 15}.
2545
*
2646
*/
2747
std::vector<int> maxmin(const double* grid, int* idx, int m, int m_thr);

source/module_base/grid/test/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,10 @@ AddTest(
2020
../radial.cpp
2121
../delley.cpp
2222
)
23+
24+
AddTest(
25+
TARGET test_batch
26+
SOURCES test_batch.cpp
27+
../batch.cpp
28+
LIBS ${math_libs}
29+
)

source/module_base/grid/test/test_batch.cpp

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010

1111
using namespace Grid::Batch;
1212

13-
1413
class BatchTest: public ::testing::Test
1514
{
1615
protected:
@@ -19,17 +18,28 @@ class BatchTest: public ::testing::Test
1918
std::vector<double> grid_;
2019
std::vector<int> idx_;
2120

21+
// parameters for cluster generation
2222
int n_each_ = 10;
23-
double offset_ = 10.0;
2423
double width_ = 1.0;
24+
25+
// These offsets should be different from each other as maxmin might
26+
// fail for highly symmetric, well-separated clusters.
27+
// Consider the case where the 8 clusters as a whole have octahedral
28+
// symmetry. In this case, R*R^T must be proprotional to the identity,
29+
// and eigenvalues are three-fold degenerate, because xy, yz and zx
30+
// plane are equivalent in terms of the maxmin optimization problem.
31+
// This means eigenvectors are arbitrary in this case.
32+
double offset_x_ = 7.0;
33+
double offset_y_ = 8.0;
34+
double offset_z_ = 9.0;
2535
};
2636

27-
std::vector<double> gen_octant_cluster(int n_each, double offset, double width) {
37+
std::vector<double> gen_octant_cluster(int n_each, double offset_x, double offset_y, double offset_z, double width) {
2838

2939
// Generates a set of points consisting of 8 well-separated, equal-sized
3040
// clusters located in individual octants.
3141

32-
std::vector<double> grid(n_each * 8);
42+
std::vector<double> grid(n_each * 8 * 3);
3343
int I = 0;
3444

3545
std::random_device rd;
@@ -40,15 +50,14 @@ std::vector<double> gen_octant_cluster(int n_each, double offset, double width)
4050
for (int sign_y : {-1, 1}) {
4151
for (int sign_z : {-1, 1}) {
4252
for (int i = 0; i < n_each; ++i) {
43-
grid[3*I ] = sign_x * offset + dis(gen);
44-
grid[3*I + 1] = sign_y * offset + dis(gen);
45-
grid[3*I + 2] = sign_z * offset + dis(gen);
53+
grid[3*I ] = sign_x * offset_x + dis(gen);
54+
grid[3*I + 1] = sign_y * offset_y + dis(gen);
55+
grid[3*I + 2] = sign_z * offset_z + dis(gen);
4656
++I;
4757
}
4858
}
4959
}
5060
}
51-
5261
return grid;
5362
}
5463

@@ -73,9 +82,9 @@ bool is_same_octant(int ngrid, const double* grid) {
7382

7483
void BatchTest::SetUp()
7584
{
76-
grid_ = gen_octant_cluster(n_each_, offset_, width_);
85+
grid_ = gen_octant_cluster(n_each_, offset_x_, offset_y_, offset_z_, width_);
7786

78-
idx_.resize(grid_.size());
87+
idx_.resize(grid_.size() / 3);
7988
std::iota(idx_.begin(), idx_.end(), 0);
8089

8190
std::random_device rd;
@@ -91,21 +100,25 @@ TEST_F(BatchTest, MaxMinOctantCluster)
91100
// The resulting batches should be able to recover this structure.
92101

93102
std::vector<int> delim =
94-
maxmin(n_each_, grid_.size(), grid_.data(), idx_.data());
103+
maxmin(grid_.data(), idx_.data(), grid_.size() / 3, n_each_);
104+
105+
EXPECT_EQ(delim.size(), 8);
95106

96-
EXPECT_EQ(delim.size(), 7);
97-
for (int i = 0; i < 7; ++i) {
98-
// check number of points in each batch via index delimiters
99-
EXPECT_EQ(delim[i], (i+1) * n_each_);
107+
std::vector<double> grid_batch(3 * n_each_);
108+
for (int i = 0; i < 8; ++i) {
100109

101-
// verify that points in each batch is in the same octant
102-
std::vector<double> batch(3 * n_each_);
110+
EXPECT_EQ(delim[i], i * n_each_);
111+
112+
// collect points within the present batch
103113
for (int j = 0; j < n_each_; ++j) {
104-
for (int k = 0; k < 3; ++k) {
105-
batch[3*j + k] = grid_[3*(i*n_each_ + j) + k];
106-
}
114+
int ig = idx_[delim[i] + j];
115+
grid_batch[3*j ] = grid_[3*ig ];
116+
grid_batch[3*j + 1] = grid_[3*ig + 1];
117+
grid_batch[3*j + 2] = grid_[3*ig + 2];
107118
}
108-
EXPECT_TRUE(is_same_octant(n_each_, batch.data()));
119+
120+
// verify that points in a batch reside in the same octant
121+
EXPECT_TRUE(is_same_octant(n_each_, grid_batch.data()));
109122
}
110123
}
111124

0 commit comments

Comments
 (0)