Skip to content

Commit 9b00ec0

Browse files
committed
simplify amoeba mpole+polar
1 parent 0b94d62 commit 9b00ec0

15 files changed

+127
-176
lines changed

ext/ext/ck3.py

Lines changed: 11 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
#!/usr/bin/env python3
22

3+
'''
4+
python3 this_script.py -c y3/config.yaml
5+
6+
or
7+
8+
python3 this_script.py y3/config.yaml
9+
'''
310

411
import os
512
import re
@@ -14,15 +21,14 @@
1421
########################################################################
1522

1623

17-
alphabets = {
24+
rc_alphabets = {
1825
'0' : 'a', '1' : 'b', '2' : 'c', '3' : 'd', '4' : 'e',
1926
'5' : 'f', '6' : 'g', '7' : 'h', '8' : 'i', '9' : 'j',
2027
'10': 'k', '11': 'l', '12': 'm', '13': 'n', '14': 'o',
2128
'15': 'p', '16': 'q', '17': 'r', '18': 's', '19': 't',
2229
'20': 'u', '21': 'v', '22': 'w', '23': 'x', '24': 'y',
2330
'25': 'z'}
2431

25-
2632
rc_kernel2c = '''
2733
TEMPLATE_PARAMS \
2834
__global__ \
@@ -40,31 +46,24 @@
4046
USING_DEVICE_VARIABLES KERNEL_CONSTEXPR_FLAGS \
4147
const int ithread = threadIdx.x + blockIdx.x * blockDim.x;
4248
43-
4449
DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL
4550
DECLARE_FORCE_I_AND_K DECLARE_PARAMS_I_AND_K
4651
47-
4852
for (int ii = ithread; ii < nexclude; ii += blockDim.x * gridDim.x) {
4953
KERNEL_SCALED_KLANE KERNEL_ZERO_LOCAL_FORCE
5054
51-
5255
int i = exclude[ii][0];
5356
int k = exclude[ii][1];
5457
KERNEL_LOAD_1X_SCALES
5558
56-
5759
KERNEL_INIT_EXCLUDE_PARAMS_I_AND_K
5860
59-
6061
constexpr bool incl = true;
6162
KERNEL_SCALED_PAIRWISE_INTERACTION
6263
63-
6464
KERNEL_SAVE_LOCAL_FORCE
6565
}
6666
67-
6867
KERNEL_SUM_COUNT KERNEL_SUM_ENERGY KERNEL_SUM_VIRIAL
6968
}
7069
'''
@@ -91,20 +90,16 @@
9190
const int nwarp = blockDim.x * gridDim.x / WARP_SIZE;
9291
const int ilane = threadIdx.x & (WARP_SIZE - 1);
9392
94-
9593
DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL
9694
DECLARE_PARAMS_I_AND_K DECLARE_FORCE_I_AND_K
9795
98-
9996
for (int iw = iwarp; iw < nakpl; iw += nwarp) {
10097
KERNEL_ZERO_LOCAL_FORCE
10198
102-
10399
int tri, tx, ty;
104100
tri = iakpl[iw];
105101
tri_to_xy(tri, tx, ty);
106102
107-
108103
int iid = ty * WARP_SIZE + ilane;
109104
int atomi = min(iid, n - 1);
110105
int i = sorted[atomi].unsorted;
@@ -114,7 +109,6 @@
114109
KERNEL_INIT_PARAMS_I_AND_K
115110
KERNEL_SYNCWARP
116111
117-
118112
KERNEL_LOAD_INFO_VARIABLES
119113
for (int j = 0; j < WARP_SIZE; ++j) {
120114
int srclane = (ilane + j) & (WARP_SIZE - 1); \
@@ -124,16 +118,13 @@
124118
KERNEL_SCALE_1 \
125119
KERNEL_FULL_PAIRWISE_INTERACTION
126120
127-
128121
iid = __shfl_sync(ALL_LANES, iid, ilane + 1);
129122
KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I
130123
}
131124
132-
133125
KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP
134126
}
135127
136-
137128
KERNEL_SUM_COUNT KERNEL_SUM_ENERGY KERNEL_SUM_VIRIAL
138129
}
139130
'''
@@ -159,15 +150,12 @@
159150
const int nwarp = blockDim.x * gridDim.x / WARP_SIZE;
160151
const int ilane = threadIdx.x & (WARP_SIZE - 1);
161152
162-
163153
DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL
164154
DECLARE_PARAMS_I_AND_K DECLARE_FORCE_I_AND_K
165155
166-
167156
for (int iw = iwarp; iw < niak; iw += nwarp) {
168157
KERNEL_ZERO_LOCAL_FORCE
169158
170-
171159
int ty = iak[iw];
172160
int atomi = ty * WARP_SIZE + ilane;
173161
int i = sorted[atomi].unsorted;
@@ -176,22 +164,18 @@
176164
KERNEL_INIT_PARAMS_I_AND_K
177165
KERNEL_SYNCWARP
178166
179-
180167
for (int j = 0; j < WARP_SIZE; ++j) {
181168
KERNEL_KLANE2 \
182169
bool incl = atomk > 0; \
183170
KERNEL_SCALE_1 \
184171
KERNEL_FULL_PAIRWISE_INTERACTION
185172
186-
187173
KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I
188174
}
189175
190-
191176
KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP
192177
}
193178
194-
195179
KERNEL_SUM_COUNT KERNEL_SUM_ENERGY KERNEL_SUM_VIRIAL
196180
}
197181
'''
@@ -220,42 +204,33 @@
220204
const int nwarp = blockDim.x * gridDim.x / WARP_SIZE;
221205
const int ilane = threadIdx.x & (WARP_SIZE - 1);
222206
223-
224207
DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL
225208
DECLARE_PARAMS_I_AND_K DECLARE_FORCE_I_AND_K
226209
227-
228210
KERNEL_HAS_1X_SCALE
229211
for (int ii = ithread; ii < nexclude; ii += blockDim.x * gridDim.x) {
230212
KERNEL_SCALED_KLANE KERNEL_ZERO_LOCAL_FORCE
231213
232-
233214
int i = exclude[ii][0];
234215
int k = exclude[ii][1];
235216
KERNEL_LOAD_1X_SCALES
236217
237-
238218
KERNEL_INIT_EXCLUDE_PARAMS_I_AND_K
239219
240-
241220
constexpr bool incl = true;
242221
KERNEL_SCALED_PAIRWISE_INTERACTION
243222
244-
245223
KERNEL_SAVE_LOCAL_FORCE
246224
}
247225
// */
248226
249-
250227
for (int iw = iwarp; iw < nakpl; iw += nwarp) {
251228
KERNEL_ZERO_LOCAL_FORCE
252229
253-
254230
int tri, tx, ty;
255231
tri = iakpl[iw];
256232
tri_to_xy(tri, tx, ty);
257233
258-
259234
int iid = ty * WARP_SIZE + ilane;
260235
int atomi = min(iid, n - 1);
261236
int i = sorted[atomi].unsorted;
@@ -265,7 +240,6 @@
265240
KERNEL_INIT_PARAMS_I_AND_K
266241
KERNEL_SYNCWARP
267242
268-
269243
KERNEL_LOAD_INFO_VARIABLES
270244
for (int j = 0; j < WARP_SIZE; ++j) {
271245
int srclane = (ilane + j) & (WARP_SIZE - 1); \
@@ -275,20 +249,16 @@
275249
KERNEL_SCALE_1 \
276250
KERNEL_FULL_PAIRWISE_INTERACTION
277251
278-
279252
iid = __shfl_sync(ALL_LANES, iid, ilane + 1);
280253
KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I
281254
}
282255
283-
284256
KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP
285257
}
286258
287-
288259
for (int iw = iwarp; iw < niak; iw += nwarp) {
289260
KERNEL_ZERO_LOCAL_FORCE
290261
291-
292262
int ty = iak[iw];
293263
int atomi = ty * WARP_SIZE + ilane;
294264
int i = sorted[atomi].unsorted;
@@ -297,22 +267,18 @@
297267
KERNEL_INIT_PARAMS_I_AND_K
298268
KERNEL_SYNCWARP
299269
300-
301270
for (int j = 0; j < WARP_SIZE; ++j) {
302271
KERNEL_KLANE2 \
303272
bool incl = atomk > 0; \
304273
KERNEL_SCALE_1 \
305274
KERNEL_FULL_PAIRWISE_INTERACTION
306275
307-
308276
KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I
309277
}
310278
311-
312279
KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP
313280
}
314281
315-
316282
KERNEL_SUM_COUNT
317283
KERNEL_SUM_ENERGY
318284
KERNEL_SUM_VIRIAL
@@ -570,7 +536,7 @@ def _load_scale_param(ptype:str, stem:str, input:str, separate_scaled_pairwise:b
570536
v = ''
571537
for i in range(1,len(ss)):
572538
idx = ss[i]
573-
al = alphabets[idx]
539+
al = rc_alphabets[idx]
574540
if input is None:
575541
if not separate_scaled_pairwise:
576542
v = v + '{} {}{} = 1;'.format(t, stem, al)
@@ -610,6 +576,8 @@ def _kv(self, k:str):
610576
return self.config[k]
611577
else:
612578
return ''
579+
580+
613581
def cudaReplaceDict(self) -> dict:
614582
d = {}
615583
config = self.config

src/cu/amoeba/dfield_cu1.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// ck.py Version 3.0.0
1+
// ck.py Version 3.0.2
22
template <class ETYP>
33
__global__
44
void dfield_cu1(int n, TINKER_IMAGE_PARAMS, real off, const unsigned* restrict dpinfo, int nexclude,

0 commit comments

Comments
 (0)