diff --git a/source/module_base/blas_connector.h b/source/module_base/blas_connector.h
index e7b78bab06..090d8512ee 100644
--- a/source/module_base/blas_connector.h
+++ b/source/module_base/blas_connector.h
@@ -39,6 +39,20 @@ extern "C"
 	double dnrm2_( const int *n, const double *X, const int *incX );
 	double dznrm2_( const int *n, const std::complex<double> *X, const int *incX );
 
+    // symmetric rank-k update
+    void dsyrk_(
+        const char* uplo,
+        const char* trans,
+        const int* n,
+        const int* k,
+        const double* alpha,
+        const double* a,
+        const int* lda,
+        const double* beta,
+        double* c,
+        const int* ldc
+    );
+
 	// level 2: matrix-std::vector operations, O(n^2) data and O(n^2) work.
 	void sgemv_(const char*const transa, const int*const m, const int*const n,
 		const float*const alpha, const float*const a, const int*const lda, const float*const x, const int*const incx,
@@ -267,4 +281,4 @@ void zgemv_i(const char *trans,
 */
 
 #endif // GATHER_INFO
-#endif // BLAS_CONNECTOR_H
\ No newline at end of file
+#endif // BLAS_CONNECTOR_H
diff --git a/source/module_base/grid/batch.cpp b/source/module_base/grid/batch.cpp
new file mode 100644
index 0000000000..832323e62a
--- /dev/null
+++ b/source/module_base/grid/batch.cpp
@@ -0,0 +1,123 @@
+#include "module_base/grid/batch.h"
+
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+
+#include "module_base/blas_connector.h"
+#include "module_base/lapack_connector.h"
+
+namespace {
+
+/**
+ * @brief Divide a set of points into two subsets by the "MaxMin" algorithm.
+ *
+ * This function divides a given set of grid points by a cut plane
+ * {x|n^T*(x-c) = 0} where the normal vector n and the point c are
+ * determined by the "MaxMin" problem:
+ *
+ *      max min sum_{i=1}^{m} [n^T * (r[idx[i]] - c)]^2
+ *       n   c
+ *
+ * here r[j] = (grid[3*j], grid[3*j+1], grid[3*j+2]) is the position of
+ * the j-th point.
+ *
+ * It can be shown that the optimal c is the centroid of the points, and
+ * the optimal n is the eigenvector corresponding to the largest eigenvalue
+ * of the matrix R*R^T, where the i-th column of R is r[idx[i]] - c.
+ *
+ * @param[in]       grid    Coordinates of all grid points.
+ *                          grid[3*j], grid[3*j+1], grid[3*j+2] are the
+ *                          x, y, z coordinates of the j-th point.
+ * @param[in,out]   idx     Indices of the selected points within grid.
+ *                          On return, idx will be rearranged such that
+ *                          points belonging to the same subset have their
+ *                          indices placed together.
+ * @param[in]       m       Number of selected points (length of idx).
+ *
+ * @return The number of points in the first subset within idx.
+ *
+ */
+int _maxmin_divide(const double* grid, int* idx, int m) {
+    assert(m > 1);
+    if (m == 2) {
+        return 1;
+    }
+
+    std::vector<double> centroid(3, 0.0);
+    for (int i = 0; i < m; ++i) {
+        int j = idx[i];
+        centroid[0] += grid[3*j    ];
+        centroid[1] += grid[3*j + 1];
+        centroid[2] += grid[3*j + 2];
+    }
+    centroid[0] /= m;
+    centroid[1] /= m;
+    centroid[2] /= m;
+
+    // positions w.r.t. the centroid
+    std::vector<double> R(3*m, 0.0);
+    for (int i = 0; i < m; ++i) {
+        int j = idx[i];
+        R[3*i    ] = grid[3*j    ] - centroid[0];
+        R[3*i + 1] = grid[3*j + 1] - centroid[1];
+        R[3*i + 2] = grid[3*j + 2] - centroid[2];
+    }
+
+    // The normal vector of the cut plane is taken to be the eigenvector
+    // corresponding to the largest eigenvalue of the 3x3 matrix A = R*R^T.
+    std::vector<double> A(9, 0.0);
+    int i3 = 3, i1 = 1;
+    double d0 = 0.0, d1 = 1.0;
+    dsyrk_("U", "N", &i3, &m, &d1, R.data(), &i3, &d0, A.data(), &i3);
+
+    int info = 0, lwork = 102 /* determined by a work space query */;
+    std::vector<double> e(3), work(lwork);
+    dsyev_("V", "U", &i3, A.data(), &i3, e.data(), work.data(), &lwork, &info);
+    double* n = A.data() + 6; // normal vector of the cut plane
+
+    // Rearrange the indices to put points in each subset together by
+    // examining the signed distances of points to the cut plane (R^T*n).
+    std::vector<double> dist(m);
+    dgemv_("T", &i3, &m, &d1, R.data(), &i3, n, &i1, &d0, dist.data(), &i1);
+
+    int *head = idx;
+    std::reverse_iterator<int*> tail(idx + m), rend(idx);
+    auto is_negative = [&dist, &idx](int& j) { return dist[&j - idx] < 0; };
+    while ( ( head = std::find_if(head, idx + m, is_negative) ) <
+            ( tail = std::find_if_not(tail, rend, is_negative) ).base() ) {
+        std::swap(*head, *tail);
+        std::swap(dist[head - idx], dist[tail.base() - idx - 1]);
+        ++head;
+        ++tail;
+    }
+
+    return head - idx;
+}
+
+} // end of anonymous namespace
+
+
+std::vector<int> Grid::Batch::maxmin(
+    const double* grid,
+    int* idx,
+    int m,
+    int m_thr
+) {
+    if (m <= m_thr) {
+        return std::vector<int>{0};
+    }
+
+    int m_left = _maxmin_divide(grid, idx, m);
+
+    std::vector<int> left = maxmin(grid, idx, m_left, m_thr);
+    std::vector<int> right = maxmin(grid, idx + m_left, m - m_left, m_thr);
+    std::for_each(right.begin(), right.end(),
+        [m_left](int& x) { x += m_left; }
+    );
+
+    left.insert(left.end(), right.begin(), right.end());
+    return left;
+}
+
+
diff --git a/source/module_base/grid/batch.h b/source/module_base/grid/batch.h
new file mode 100644
index 0000000000..8a6ef733e1
--- /dev/null
+++ b/source/module_base/grid/batch.h
@@ -0,0 +1,54 @@
+#ifndef GRID_BATCH_H
+#define GRID_BATCH_H
+
+#include <vector>
+
+namespace Grid {
+namespace Batch {
+
+/**
+ * @brief Divide a set of points into batches by the "MaxMin" algorithm.
+ *
+ * This function recursively uses cut planes to divide grid points into
+ * two subsets using the "MaxMin" algorithm, until the number of points
+ * in each subset (batch) is no more than m_thr.
+ *
+ * @param[in]       grid        Coordinates of all grid points.
+ *                              grid[3*j], grid[3*j+1], grid[3*j+2] are
+ *                              the x, y, z coordinates of the j-th point.
+ * @param[in,out]   idx         Indices of the initial set within grid.
+ *                              On return, idx will be rearranged such
+ *                              that points belonging to the same batch
+ *                              have their indices placed together.
+ * @param[in]       m           Number of points in the initial set.
+ *                              (length of idx)
+ * @param[in]       m_thr       Size limit of a batch.
+ *
+ * @return          Indices (for idx) of the first point in each batch.
+ *
+ * For example, given grid (illustrated by their indices) located as follows:
+ *
+ *      0  1  16          2  3  18
+ *      4  5  17            6  7
+ *
+ *
+ *      8  9             20 10 11
+ *     12 13 19           14 15
+ *
+ * a possible outcome with m_thr = 4 and idx(in) = {0, 1, 2, ..., 15}
+ * (idx may correspond to a subset of grid and does not have to be sorted,
+ * but it must not contain duplicates) is:
+ *
+ * idx(out): 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15
+ * return  : {0, 4, 8, 12}
+ *
+ * which means the selected set (labeled 0-15) is divided into 4 batches:
+ * {0, 1, 4, 5}, {8, 9, 12, 13}, {2, 3, 6, 7}, {10, 11, 14, 15}.
+ *
+ */
+std::vector<int> maxmin(const double* grid, int* idx, int m, int m_thr);
+
+} // end of namespace Batch
+} // end of namespace Grid
+
+#endif
diff --git a/source/module_base/grid/test/CMakeLists.txt b/source/module_base/grid/test/CMakeLists.txt
index e403c8c2c6..a0f1b6fa9f 100644
--- a/source/module_base/grid/test/CMakeLists.txt
+++ b/source/module_base/grid/test/CMakeLists.txt
@@ -20,3 +20,10 @@ AddTest(
   ../radial.cpp
   ../delley.cpp
 )
+
+AddTest(
+  TARGET test_batch
+  SOURCES test_batch.cpp
+  ../batch.cpp
+  LIBS ${math_libs}
+)
diff --git a/source/module_base/grid/test/test_batch.cpp b/source/module_base/grid/test/test_batch.cpp
new file mode 100644
index 0000000000..74324bc47b
--- /dev/null
+++ b/source/module_base/grid/test/test_batch.cpp
@@ -0,0 +1,234 @@
+#include "module_base/grid/batch.h"
+
+#include "gtest/gtest.h"
+#include <algorithm>
+#include <random>
+//#include <cstdio>
+
+#ifdef __MPI
+#include <mpi.h>
+#endif
+
+using namespace Grid::Batch;
+
+
+class BatchTest: public ::testing::Test
+{
+protected:
+
+    std::vector<double> grid_;
+    std::vector<int> idx_;
+
+    // parameters for 8-octant clusters
+    const int n_batch_oct_ = 10;
+    const double width_oct_ = 1.0;
+    const double offset_x_ = 7.0;
+    const double offset_y_ = 8.0;
+    const double offset_z_ = 9.0;
+    // NOTE: These offsets should be different from each other as maxmin
+    // might fail for highly symmetric, well-separated clusters.
+    // Consider the case where the 8 clusters as a whole have octahedral
+    // symmetry. In this case, R*R^T must be proprotional to the identity,
+    // and eigenvalues are three-fold degenerate, because xy, yz and zx
+    // plane are equivalent in terms of the maxmin optimization problem.
+    // This means eigenvectors are arbitrary in this case.
+
+
+    // parameters for a random cluster
+    const int n_grid_rand_ = 1000;
+    const int n_batch_rand_ = 200;
+    const double width_rand_ = 10.0;
+    const double xc_ = 1.0;
+    const double yc_ = 1.0;
+    const double zc_ = 2.0;
+};
+
+
+void gen_random(
+    int ngrid,
+    double xc,
+    double yc,
+    double zc,
+    double width,
+    std::vector<double>& grid,
+    std::vector<int>& idx
+) {
+
+    // Generates a set of points centered around (xc, yc, zc).
+
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<double> dis(-width, width);
+
+    grid.resize(3 * ngrid);
+    for (int i = 0; i < ngrid; ++i) {
+        grid[3*i    ] = xc + dis(gen);
+        grid[3*i + 1] = yc + dis(gen);
+        grid[3*i + 2] = zc + dis(gen);
+    }
+
+    idx.resize(ngrid);
+    std::iota(idx.begin(), idx.end(), 0);
+    std::shuffle(idx.begin(), idx.end(), gen);
+}
+
+
+void gen_octant(
+    int n_each,
+    double offset_x,
+    double offset_y,
+    double offset_z,
+    double width,
+    std::vector<double>& grid,
+    std::vector<int>& idx
+) {
+
+    // Generates a set of points consisting of 8 well-separated, equal-sized
+    // clusters located in individual octants.
+
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<double> dis(-width, width);
+
+    int ngrid = 8 * n_each;
+    grid.resize(3 * ngrid);
+    int I = 0;
+    for (int sign_x : {-1, 1}) {
+        for (int sign_y : {-1, 1}) {
+            for (int sign_z : {-1, 1}) {
+                for (int i = 0; i < n_each; ++i, ++I) {
+                    grid[3*I    ] = sign_x * offset_x + dis(gen);
+                    grid[3*I + 1] = sign_y * offset_y + dis(gen);
+                    grid[3*I + 2] = sign_z * offset_z + dis(gen);
+                }
+            }
+        }
+    }
+
+    idx.resize(ngrid);
+    std::iota(idx.begin(), idx.end(), 0);
+    std::shuffle(idx.begin(), idx.end(), gen);
+}
+
+
+bool is_same_octant(int ngrid, const double* grid) {
+    if (ngrid == 0) {
+        return true;
+    }
+    const bool is_positive_x = grid[0] > 0;
+    const bool is_positive_y = grid[1] > 0;
+    const bool is_positive_z = grid[2] > 0;
+    const double* end = grid + 3 * ngrid;
+    for (; grid != end; grid += 3) {
+        if ( is_positive_x != (grid[0] > 0) ||
+             is_positive_y != (grid[1] > 0) ||
+             is_positive_z != (grid[2] > 0) ) {
+            return false;
+        }
+    }
+    return true;
+}
+
+
+bool good_batch_size(
+    const std::vector<int>& idx,
+    const std::vector<int>& delim,
+    int n_batch_thr
+) {
+    // checks if the sizes of batches are within the specified limit
+
+    bool flag = (delim[0] == 0);
+
+    size_t i = 1;
+    while (flag && i < delim.size()) {
+        int sz_batch = delim[i] - delim[i-1];
+        flag = flag && (sz_batch > 0) && (sz_batch <= n_batch_thr);
+        ++i;
+    }
+
+    return flag && ( ((int)idx.size() - delim.back()) < n_batch_thr );
+}
+
+
+TEST_F(BatchTest, MaxMinRandom)
+{
+    // This test verifies that the sizes of batches produced by maxmin
+    // do not exceed the specified limit.
+
+    gen_random(n_grid_rand_, xc_, yc_, zc_, width_rand_, grid_, idx_);
+
+    std::vector<int> delim = 
+        maxmin(grid_.data(), idx_.data(), idx_.size(), n_batch_rand_);
+
+    EXPECT_TRUE(good_batch_size(idx_, delim, n_batch_rand_));
+
+    // write grid, idx & delim to file
+    //FILE* fp = fopen("grid.dat", "w");
+    //for (size_t i = 0; i < grid_.size() / 3; ++i) {
+    //    std::fprintf(fp, "% 12.6f % 12.6f % 12.6f\n",
+    //        grid_[3*i], grid_[3*i + 1], grid_[3*i + 2]);
+    //}
+    //fclose(fp);
+
+    //fp = fopen("idx.dat", "w");
+    //for (size_t i = 0; i < idx_.size(); ++i) {
+    //    std::fprintf(fp, "%d\n", idx_[i]);
+    //}
+    //fclose(fp);
+
+    //fp = fopen("delim.dat", "w");
+    //for (size_t i = 0; i < delim.size(); ++i) {
+    //    std::fprintf(fp, "%d\n", delim[i]);
+    //}
+    //fclose(fp);
+}
+
+
+TEST_F(BatchTest, MaxMinOctant)
+{
+    // This test applies maxmin to a set of points consisting of 8
+    // well-separated, equal-sized clusters located in individual octants.
+    // The resulting batches should be able to recover this structure.
+
+    gen_octant(n_batch_oct_, offset_x_, offset_y_, offset_z_, width_oct_,
+               grid_, idx_);
+
+    std::vector<int> delim = 
+        maxmin(grid_.data(), idx_.data(), idx_.size(), n_batch_oct_);
+
+    EXPECT_EQ(delim.size(), 8);
+
+    std::vector<double> grid_batch(3 * n_batch_oct_);
+    for (int i = 0; i < 8; ++i) {
+
+        EXPECT_EQ(delim[i], i * n_batch_oct_);
+
+        // collect points within the present batch
+        for (int j = 0; j < n_batch_oct_; ++j) {
+            int ig = idx_[delim[i] + j];
+            grid_batch[3*j    ] = grid_[3*ig    ];
+            grid_batch[3*j + 1] = grid_[3*ig + 1];
+            grid_batch[3*j + 2] = grid_[3*ig + 2];
+        }
+
+        // verify that points in a batch reside in the same octant
+        EXPECT_TRUE(is_same_octant(n_batch_oct_, grid_batch.data()));
+    }
+}
+
+
+int main(int argc, char** argv)
+{
+#ifdef __MPI
+    MPI_Init(&argc, &argv);
+#endif
+
+    testing::InitGoogleTest(&argc, argv);
+    int result = RUN_ALL_TESTS();
+
+#ifdef __MPI
+    MPI_Finalize();
+#endif
+
+    return result;
+}