@@ -65,7 +65,7 @@ void HF::molecule(const Molecule &moll) {
6565 for (unsigned int i=0 ; i<V.size (); i++) {
6666 V[i].resize (nrorbs,nrorbs);
6767 }
68- TE.resize (teindex (nrorbs,nrorbs,nrorbs,nrorbs )+1 , -1.0 );
68+ TE.resize (pow (nrorbs,4 )+1 , -1.0 );
6969
7070 if (debug) {
7171 std::cout << std::endl;
@@ -149,29 +149,61 @@ void HF::setup() {
149149 std::cout << " Constructing TE matrix... (" << teindex (nrorbs-1 ,nrorbs-1 ,nrorbs-1 ,nrorbs-1 )+1 << " ) " << std::endl;
150150 }
151151
152- // build a vector of two-electron integral jobs to evaluate
153- double tecnt=0 ;
154- std::vector<std::array<unsigned int ,5 >> te_jobs;
155- for (unsigned int i=0 ; i<nrorbs; i++) {
156- for (unsigned int j=0 ; j<=i; j++) {
157- unsigned int ij = i*(i+1 )/2 + j;
158- for (unsigned int k=0 ; k<nrorbs; k++) {
159- for (unsigned int l=0 ; l<=k; l++) {
160- unsigned int kl = k * (k+1 )/2 + l;
152+ // calculate all two-electron integrals
153+ const size_t sz = orbitals.size ();
154+ const size_t tecnt = teindex (sz-1 ,sz-1 ,sz-1 ,sz-1 ) + 1 ;
155+ std::vector<double > tedouble (tecnt, -1.0 );
156+
157+ // it is more efficient to first 'unroll' the fourfold nested loop
158+ // into a single vector of jobs to execute
159+ std::vector<std::array<size_t , 5 >> jobs;
160+ for (size_t i=0 ; i<sz; i++) {
161+ for (size_t j=0 ; j<sz; j++) {
162+ size_t ij = i*(i+1 )/2 + j;
163+ for (size_t k=0 ; k<sz; k++) {
164+ for (size_t l=0 ; l<sz; l++) {
165+ size_t kl = k * (k+1 )/2 + l;
161166 if (ij <= kl) {
162- unsigned int index = teindex (i,j,k,l);
163- te_jobs.push_back ({index,i,j,k,l});
164- tecnt++;
167+ const size_t idx = teindex (i,j,k,l);
168+
169+ if (idx >= tedouble.size ()) {
170+ throw std::runtime_error (" Process tried to access illegal array position" );
171+ }
172+
173+ if (tedouble[idx] < 0.0 ) {
174+ tedouble[idx] = 1.0 ;
175+ jobs.emplace_back (std::array<size_t ,5 >({idx, i, j, k, l}));
176+ }
165177 }
166178 }
167179 }
168180 }
169181 }
170182
171- // perform the two-electron integral calculation
183+ // evaluate jobs
172184 #pragma omp parallel for schedule(dynamic)
173- for (const auto & te_job : te_jobs) {
174- TE[te_job[0 ]] = cgf_repulsion (orbitals[te_job[1 ]],orbitals[te_job[2 ]],orbitals[te_job[3 ]],orbitals[te_job[4 ]]);
185+ for (int s=0 ; s<(int )jobs.size (); s++) { // have to use signed int for MSVC OpenMP here
186+ const size_t idx = jobs[s][0 ];
187+ const size_t i = jobs[s][1 ];
188+ const size_t j = jobs[s][2 ];
189+ const size_t k = jobs[s][3 ];
190+ const size_t l = jobs[s][4 ];
191+ tedouble[idx] = cgf_repulsion (orbitals[i], orbitals[j], orbitals[k], orbitals[l]);
192+ }
193+
194+ const size_t sz2 = sz * sz;
195+ const size_t sz3 = sz2 * sz;
196+
197+ // reorganize everyting into a tensor object
198+ for (size_t i=0 ; i<sz; i++) {
199+ for (size_t j=0 ; j<sz; j++) {
200+ for (size_t k=0 ; k<sz; k++) {
201+ for (size_t l=0 ; l<sz; l++) {
202+ const size_t idx = teindex (i,j,k,l);
203+ TE[i*sz3 + j*sz2 + k*sz + l] = tedouble[idx];
204+ }
205+ }
206+ }
175207 }
176208
177209 if (debug) {
@@ -195,23 +227,24 @@ void HF::setup() {
195227}
196228
197229void HF::step () {
198- cntstep++;
230+ cntstep++;
199231
200- unsigned int index1 = 0 ;
201- unsigned int index2 = 0 ;
232+ const size_t sz = nrorbs;
233+ const size_t sz2 = sz * sz;
234+ const size_t sz3 = sz2 * sz;
202235
203- for (unsigned int i=0 ; i<nrorbs ; i++) {
204- for (unsigned int j=0 ; j<nrorbs ; j++) {
236+ for (unsigned int i=0 ; i<sz ; i++) {
237+ for (unsigned int j=0 ; j<sz ; j++) {
205238 G[i][j] = 0 .; /* reset G matrix */
206- for (unsigned int k=0 ; k<nrorbs; k++) {
207- for (unsigned int l=0 ; l<nrorbs; l++) {
208- index1 = teindex (i,j,l,k);
209- index2 = teindex (i,k,l,j);
210- G[i][j] += P[k][l] * (TE[index1] - 0.5 * TE[index2]);
239+ for (unsigned int k=0 ; k<sz; k++) {
240+ for (unsigned int l=0 ; l<sz; l++) {
241+ const size_t index1 = i*sz3 + j*sz2 + k*sz + l;
242+ const size_t index2 = i*sz3 + k*sz2 + l*sz + j;
243+ G[i][j] += P[k][l] * (TE[index1] - 0.5 * TE[index2]);
244+ }
245+ }
211246 }
212- }
213247 }
214- }
215248
216249 /* construct Fock Matrix and orthogonalize it */
217250 F = matsum (H,G);
0 commit comments