|
1 | 1 | #!/usr/bin/env python3
|
2 | 2 |
|
| 3 | +''' |
| 4 | +python3 this_script.py -c y3/config.yaml |
| 5 | +
|
| 6 | +or |
| 7 | +
|
| 8 | +python3 this_script.py y3/config.yaml |
| 9 | +''' |
3 | 10 |
|
4 | 11 | import os
|
5 | 12 | import re
|
|
14 | 21 | ########################################################################
|
15 | 22 |
|
16 | 23 |
|
17 |
| -alphabets = { |
| 24 | +rc_alphabets = { |
18 | 25 | '0' : 'a', '1' : 'b', '2' : 'c', '3' : 'd', '4' : 'e',
|
19 | 26 | '5' : 'f', '6' : 'g', '7' : 'h', '8' : 'i', '9' : 'j',
|
20 | 27 | '10': 'k', '11': 'l', '12': 'm', '13': 'n', '14': 'o',
|
21 | 28 | '15': 'p', '16': 'q', '17': 'r', '18': 's', '19': 't',
|
22 | 29 | '20': 'u', '21': 'v', '22': 'w', '23': 'x', '24': 'y',
|
23 | 30 | '25': 'z'}
|
24 | 31 |
|
25 |
| - |
26 | 32 | rc_kernel2c = '''
|
27 | 33 | TEMPLATE_PARAMS \
|
28 | 34 | __global__ \
|
|
40 | 46 | USING_DEVICE_VARIABLES KERNEL_CONSTEXPR_FLAGS \
|
41 | 47 | const int ithread = threadIdx.x + blockIdx.x * blockDim.x;
|
42 | 48 |
|
43 |
| -
|
44 | 49 | DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL
|
45 | 50 | DECLARE_FORCE_I_AND_K DECLARE_PARAMS_I_AND_K
|
46 | 51 |
|
47 |
| -
|
48 | 52 | for (int ii = ithread; ii < nexclude; ii += blockDim.x * gridDim.x) {
|
49 | 53 | KERNEL_SCALED_KLANE KERNEL_ZERO_LOCAL_FORCE
|
50 | 54 |
|
51 |
| -
|
52 | 55 | int i = exclude[ii][0];
|
53 | 56 | int k = exclude[ii][1];
|
54 | 57 | KERNEL_LOAD_1X_SCALES
|
55 | 58 |
|
56 |
| -
|
57 | 59 | KERNEL_INIT_EXCLUDE_PARAMS_I_AND_K
|
58 | 60 |
|
59 |
| -
|
60 | 61 | constexpr bool incl = true;
|
61 | 62 | KERNEL_SCALED_PAIRWISE_INTERACTION
|
62 | 63 |
|
63 |
| -
|
64 | 64 | KERNEL_SAVE_LOCAL_FORCE
|
65 | 65 | }
|
66 | 66 |
|
67 |
| -
|
68 | 67 | KERNEL_SUM_COUNT KERNEL_SUM_ENERGY KERNEL_SUM_VIRIAL
|
69 | 68 | }
|
70 | 69 | '''
|
|
91 | 90 | const int nwarp = blockDim.x * gridDim.x / WARP_SIZE;
|
92 | 91 | const int ilane = threadIdx.x & (WARP_SIZE - 1);
|
93 | 92 |
|
94 |
| -
|
95 | 93 | DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL
|
96 | 94 | DECLARE_PARAMS_I_AND_K DECLARE_FORCE_I_AND_K
|
97 | 95 |
|
98 |
| -
|
99 | 96 | for (int iw = iwarp; iw < nakpl; iw += nwarp) {
|
100 | 97 | KERNEL_ZERO_LOCAL_FORCE
|
101 | 98 |
|
102 |
| -
|
103 | 99 | int tri, tx, ty;
|
104 | 100 | tri = iakpl[iw];
|
105 | 101 | tri_to_xy(tri, tx, ty);
|
106 | 102 |
|
107 |
| -
|
108 | 103 | int iid = ty * WARP_SIZE + ilane;
|
109 | 104 | int atomi = min(iid, n - 1);
|
110 | 105 | int i = sorted[atomi].unsorted;
|
|
114 | 109 | KERNEL_INIT_PARAMS_I_AND_K
|
115 | 110 | KERNEL_SYNCWARP
|
116 | 111 |
|
117 |
| -
|
118 | 112 | KERNEL_LOAD_INFO_VARIABLES
|
119 | 113 | for (int j = 0; j < WARP_SIZE; ++j) {
|
120 | 114 | int srclane = (ilane + j) & (WARP_SIZE - 1); \
|
|
124 | 118 | KERNEL_SCALE_1 \
|
125 | 119 | KERNEL_FULL_PAIRWISE_INTERACTION
|
126 | 120 |
|
127 |
| -
|
128 | 121 | iid = __shfl_sync(ALL_LANES, iid, ilane + 1);
|
129 | 122 | KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I
|
130 | 123 | }
|
131 | 124 |
|
132 |
| -
|
133 | 125 | KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP
|
134 | 126 | }
|
135 | 127 |
|
136 |
| -
|
137 | 128 | KERNEL_SUM_COUNT KERNEL_SUM_ENERGY KERNEL_SUM_VIRIAL
|
138 | 129 | }
|
139 | 130 | '''
|
|
159 | 150 | const int nwarp = blockDim.x * gridDim.x / WARP_SIZE;
|
160 | 151 | const int ilane = threadIdx.x & (WARP_SIZE - 1);
|
161 | 152 |
|
162 |
| -
|
163 | 153 | DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL
|
164 | 154 | DECLARE_PARAMS_I_AND_K DECLARE_FORCE_I_AND_K
|
165 | 155 |
|
166 |
| -
|
167 | 156 | for (int iw = iwarp; iw < niak; iw += nwarp) {
|
168 | 157 | KERNEL_ZERO_LOCAL_FORCE
|
169 | 158 |
|
170 |
| -
|
171 | 159 | int ty = iak[iw];
|
172 | 160 | int atomi = ty * WARP_SIZE + ilane;
|
173 | 161 | int i = sorted[atomi].unsorted;
|
|
176 | 164 | KERNEL_INIT_PARAMS_I_AND_K
|
177 | 165 | KERNEL_SYNCWARP
|
178 | 166 |
|
179 |
| -
|
180 | 167 | for (int j = 0; j < WARP_SIZE; ++j) {
|
181 | 168 | KERNEL_KLANE2 \
|
182 | 169 | bool incl = atomk > 0; \
|
183 | 170 | KERNEL_SCALE_1 \
|
184 | 171 | KERNEL_FULL_PAIRWISE_INTERACTION
|
185 | 172 |
|
186 |
| -
|
187 | 173 | KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I
|
188 | 174 | }
|
189 | 175 |
|
190 |
| -
|
191 | 176 | KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP
|
192 | 177 | }
|
193 | 178 |
|
194 |
| -
|
195 | 179 | KERNEL_SUM_COUNT KERNEL_SUM_ENERGY KERNEL_SUM_VIRIAL
|
196 | 180 | }
|
197 | 181 | '''
|
|
220 | 204 | const int nwarp = blockDim.x * gridDim.x / WARP_SIZE;
|
221 | 205 | const int ilane = threadIdx.x & (WARP_SIZE - 1);
|
222 | 206 |
|
223 |
| -
|
224 | 207 | DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL
|
225 | 208 | DECLARE_PARAMS_I_AND_K DECLARE_FORCE_I_AND_K
|
226 | 209 |
|
227 |
| -
|
228 | 210 | KERNEL_HAS_1X_SCALE
|
229 | 211 | for (int ii = ithread; ii < nexclude; ii += blockDim.x * gridDim.x) {
|
230 | 212 | KERNEL_SCALED_KLANE KERNEL_ZERO_LOCAL_FORCE
|
231 | 213 |
|
232 |
| -
|
233 | 214 | int i = exclude[ii][0];
|
234 | 215 | int k = exclude[ii][1];
|
235 | 216 | KERNEL_LOAD_1X_SCALES
|
236 | 217 |
|
237 |
| -
|
238 | 218 | KERNEL_INIT_EXCLUDE_PARAMS_I_AND_K
|
239 | 219 |
|
240 |
| -
|
241 | 220 | constexpr bool incl = true;
|
242 | 221 | KERNEL_SCALED_PAIRWISE_INTERACTION
|
243 | 222 |
|
244 |
| -
|
245 | 223 | KERNEL_SAVE_LOCAL_FORCE
|
246 | 224 | }
|
247 | 225 | // */
|
248 | 226 |
|
249 |
| -
|
250 | 227 | for (int iw = iwarp; iw < nakpl; iw += nwarp) {
|
251 | 228 | KERNEL_ZERO_LOCAL_FORCE
|
252 | 229 |
|
253 |
| -
|
254 | 230 | int tri, tx, ty;
|
255 | 231 | tri = iakpl[iw];
|
256 | 232 | tri_to_xy(tri, tx, ty);
|
257 | 233 |
|
258 |
| -
|
259 | 234 | int iid = ty * WARP_SIZE + ilane;
|
260 | 235 | int atomi = min(iid, n - 1);
|
261 | 236 | int i = sorted[atomi].unsorted;
|
|
265 | 240 | KERNEL_INIT_PARAMS_I_AND_K
|
266 | 241 | KERNEL_SYNCWARP
|
267 | 242 |
|
268 |
| -
|
269 | 243 | KERNEL_LOAD_INFO_VARIABLES
|
270 | 244 | for (int j = 0; j < WARP_SIZE; ++j) {
|
271 | 245 | int srclane = (ilane + j) & (WARP_SIZE - 1); \
|
|
275 | 249 | KERNEL_SCALE_1 \
|
276 | 250 | KERNEL_FULL_PAIRWISE_INTERACTION
|
277 | 251 |
|
278 |
| -
|
279 | 252 | iid = __shfl_sync(ALL_LANES, iid, ilane + 1);
|
280 | 253 | KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I
|
281 | 254 | }
|
282 | 255 |
|
283 |
| -
|
284 | 256 | KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP
|
285 | 257 | }
|
286 | 258 |
|
287 |
| -
|
288 | 259 | for (int iw = iwarp; iw < niak; iw += nwarp) {
|
289 | 260 | KERNEL_ZERO_LOCAL_FORCE
|
290 | 261 |
|
291 |
| -
|
292 | 262 | int ty = iak[iw];
|
293 | 263 | int atomi = ty * WARP_SIZE + ilane;
|
294 | 264 | int i = sorted[atomi].unsorted;
|
|
297 | 267 | KERNEL_INIT_PARAMS_I_AND_K
|
298 | 268 | KERNEL_SYNCWARP
|
299 | 269 |
|
300 |
| -
|
301 | 270 | for (int j = 0; j < WARP_SIZE; ++j) {
|
302 | 271 | KERNEL_KLANE2 \
|
303 | 272 | bool incl = atomk > 0; \
|
304 | 273 | KERNEL_SCALE_1 \
|
305 | 274 | KERNEL_FULL_PAIRWISE_INTERACTION
|
306 | 275 |
|
307 |
| -
|
308 | 276 | KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I
|
309 | 277 | }
|
310 | 278 |
|
311 |
| -
|
312 | 279 | KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP
|
313 | 280 | }
|
314 | 281 |
|
315 |
| -
|
316 | 282 | KERNEL_SUM_COUNT
|
317 | 283 | KERNEL_SUM_ENERGY
|
318 | 284 | KERNEL_SUM_VIRIAL
|
@@ -570,7 +536,7 @@ def _load_scale_param(ptype:str, stem:str, input:str, separate_scaled_pairwise:b
|
570 | 536 | v = ''
|
571 | 537 | for i in range(1,len(ss)):
|
572 | 538 | idx = ss[i]
|
573 |
| - al = alphabets[idx] |
| 539 | + al = rc_alphabets[idx] |
574 | 540 | if input is None:
|
575 | 541 | if not separate_scaled_pairwise:
|
576 | 542 | v = v + '{} {}{} = 1;'.format(t, stem, al)
|
@@ -610,6 +576,8 @@ def _kv(self, k:str):
|
610 | 576 | return self.config[k]
|
611 | 577 | else:
|
612 | 578 | return ''
|
| 579 | + |
| 580 | + |
613 | 581 | def cudaReplaceDict(self) -> dict:
|
614 | 582 | d = {}
|
615 | 583 | config = self.config
|
|
0 commit comments