|
1 | 1 | #!/usr/bin/env python3 |
2 | 2 |
|
| 3 | +''' |
| 4 | +python3 this_script.py -c y3/config.yaml |
| 5 | +
|
| 6 | +or |
| 7 | +
|
| 8 | +python3 this_script.py y3/config.yaml |
| 9 | +''' |
3 | 10 |
|
4 | 11 | import os |
5 | 12 | import re |
|
14 | 21 | ######################################################################## |
15 | 22 |
|
16 | 23 |
|
17 | | -alphabets = { |
| 24 | +rc_alphabets = { |
18 | 25 | '0' : 'a', '1' : 'b', '2' : 'c', '3' : 'd', '4' : 'e', |
19 | 26 | '5' : 'f', '6' : 'g', '7' : 'h', '8' : 'i', '9' : 'j', |
20 | 27 | '10': 'k', '11': 'l', '12': 'm', '13': 'n', '14': 'o', |
21 | 28 | '15': 'p', '16': 'q', '17': 'r', '18': 's', '19': 't', |
22 | 29 | '20': 'u', '21': 'v', '22': 'w', '23': 'x', '24': 'y', |
23 | 30 | '25': 'z'} |
24 | 31 |
|
25 | | - |
26 | 32 | rc_kernel2c = ''' |
27 | 33 | TEMPLATE_PARAMS \ |
28 | 34 | __global__ \ |
|
40 | 46 | USING_DEVICE_VARIABLES KERNEL_CONSTEXPR_FLAGS \ |
41 | 47 | const int ithread = threadIdx.x + blockIdx.x * blockDim.x; |
42 | 48 |
|
43 | | -
|
44 | 49 | DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL |
45 | 50 | DECLARE_FORCE_I_AND_K DECLARE_PARAMS_I_AND_K |
46 | 51 |
|
47 | | -
|
48 | 52 | for (int ii = ithread; ii < nexclude; ii += blockDim.x * gridDim.x) { |
49 | 53 | KERNEL_SCALED_KLANE KERNEL_ZERO_LOCAL_FORCE |
50 | 54 |
|
51 | | -
|
52 | 55 | int i = exclude[ii][0]; |
53 | 56 | int k = exclude[ii][1]; |
54 | 57 | KERNEL_LOAD_1X_SCALES |
55 | 58 |
|
56 | | -
|
57 | 59 | KERNEL_INIT_EXCLUDE_PARAMS_I_AND_K |
58 | 60 |
|
59 | | -
|
60 | 61 | constexpr bool incl = true; |
61 | 62 | KERNEL_SCALED_PAIRWISE_INTERACTION |
62 | 63 |
|
63 | | -
|
64 | 64 | KERNEL_SAVE_LOCAL_FORCE |
65 | 65 | } |
66 | 66 |
|
67 | | -
|
68 | 67 | KERNEL_SUM_COUNT KERNEL_SUM_ENERGY KERNEL_SUM_VIRIAL |
69 | 68 | } |
70 | 69 | ''' |
|
91 | 90 | const int nwarp = blockDim.x * gridDim.x / WARP_SIZE; |
92 | 91 | const int ilane = threadIdx.x & (WARP_SIZE - 1); |
93 | 92 |
|
94 | | -
|
95 | 93 | DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL |
96 | 94 | DECLARE_PARAMS_I_AND_K DECLARE_FORCE_I_AND_K |
97 | 95 |
|
98 | | -
|
99 | 96 | for (int iw = iwarp; iw < nakpl; iw += nwarp) { |
100 | 97 | KERNEL_ZERO_LOCAL_FORCE |
101 | 98 |
|
102 | | -
|
103 | 99 | int tri, tx, ty; |
104 | 100 | tri = iakpl[iw]; |
105 | 101 | tri_to_xy(tri, tx, ty); |
106 | 102 |
|
107 | | -
|
108 | 103 | int iid = ty * WARP_SIZE + ilane; |
109 | 104 | int atomi = min(iid, n - 1); |
110 | 105 | int i = sorted[atomi].unsorted; |
|
114 | 109 | KERNEL_INIT_PARAMS_I_AND_K |
115 | 110 | KERNEL_SYNCWARP |
116 | 111 |
|
117 | | -
|
118 | 112 | KERNEL_LOAD_INFO_VARIABLES |
119 | 113 | for (int j = 0; j < WARP_SIZE; ++j) { |
120 | 114 | int srclane = (ilane + j) & (WARP_SIZE - 1); \ |
|
124 | 118 | KERNEL_SCALE_1 \ |
125 | 119 | KERNEL_FULL_PAIRWISE_INTERACTION |
126 | 120 |
|
127 | | -
|
128 | 121 | iid = __shfl_sync(ALL_LANES, iid, ilane + 1); |
129 | 122 | KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I |
130 | 123 | } |
131 | 124 |
|
132 | | -
|
133 | 125 | KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP |
134 | 126 | } |
135 | 127 |
|
136 | | -
|
137 | 128 | KERNEL_SUM_COUNT KERNEL_SUM_ENERGY KERNEL_SUM_VIRIAL |
138 | 129 | } |
139 | 130 | ''' |
|
159 | 150 | const int nwarp = blockDim.x * gridDim.x / WARP_SIZE; |
160 | 151 | const int ilane = threadIdx.x & (WARP_SIZE - 1); |
161 | 152 |
|
162 | | -
|
163 | 153 | DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL |
164 | 154 | DECLARE_PARAMS_I_AND_K DECLARE_FORCE_I_AND_K |
165 | 155 |
|
166 | | -
|
167 | 156 | for (int iw = iwarp; iw < niak; iw += nwarp) { |
168 | 157 | KERNEL_ZERO_LOCAL_FORCE |
169 | 158 |
|
170 | | -
|
171 | 159 | int ty = iak[iw]; |
172 | 160 | int atomi = ty * WARP_SIZE + ilane; |
173 | 161 | int i = sorted[atomi].unsorted; |
|
176 | 164 | KERNEL_INIT_PARAMS_I_AND_K |
177 | 165 | KERNEL_SYNCWARP |
178 | 166 |
|
179 | | -
|
180 | 167 | for (int j = 0; j < WARP_SIZE; ++j) { |
181 | 168 | KERNEL_KLANE2 \ |
182 | 169 | bool incl = atomk > 0; \ |
183 | 170 | KERNEL_SCALE_1 \ |
184 | 171 | KERNEL_FULL_PAIRWISE_INTERACTION |
185 | 172 |
|
186 | | -
|
187 | 173 | KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I |
188 | 174 | } |
189 | 175 |
|
190 | | -
|
191 | 176 | KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP |
192 | 177 | } |
193 | 178 |
|
194 | | -
|
195 | 179 | KERNEL_SUM_COUNT KERNEL_SUM_ENERGY KERNEL_SUM_VIRIAL |
196 | 180 | } |
197 | 181 | ''' |
|
220 | 204 | const int nwarp = blockDim.x * gridDim.x / WARP_SIZE; |
221 | 205 | const int ilane = threadIdx.x & (WARP_SIZE - 1); |
222 | 206 |
|
223 | | -
|
224 | 207 | DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL |
225 | 208 | DECLARE_PARAMS_I_AND_K DECLARE_FORCE_I_AND_K |
226 | 209 |
|
227 | | -
|
228 | 210 | KERNEL_HAS_1X_SCALE |
229 | 211 | for (int ii = ithread; ii < nexclude; ii += blockDim.x * gridDim.x) { |
230 | 212 | KERNEL_SCALED_KLANE KERNEL_ZERO_LOCAL_FORCE |
231 | 213 |
|
232 | | -
|
233 | 214 | int i = exclude[ii][0]; |
234 | 215 | int k = exclude[ii][1]; |
235 | 216 | KERNEL_LOAD_1X_SCALES |
236 | 217 |
|
237 | | -
|
238 | 218 | KERNEL_INIT_EXCLUDE_PARAMS_I_AND_K |
239 | 219 |
|
240 | | -
|
241 | 220 | constexpr bool incl = true; |
242 | 221 | KERNEL_SCALED_PAIRWISE_INTERACTION |
243 | 222 |
|
244 | | -
|
245 | 223 | KERNEL_SAVE_LOCAL_FORCE |
246 | 224 | } |
247 | 225 | // */ |
248 | 226 |
|
249 | | -
|
250 | 227 | for (int iw = iwarp; iw < nakpl; iw += nwarp) { |
251 | 228 | KERNEL_ZERO_LOCAL_FORCE |
252 | 229 |
|
253 | | -
|
254 | 230 | int tri, tx, ty; |
255 | 231 | tri = iakpl[iw]; |
256 | 232 | tri_to_xy(tri, tx, ty); |
257 | 233 |
|
258 | | -
|
259 | 234 | int iid = ty * WARP_SIZE + ilane; |
260 | 235 | int atomi = min(iid, n - 1); |
261 | 236 | int i = sorted[atomi].unsorted; |
|
265 | 240 | KERNEL_INIT_PARAMS_I_AND_K |
266 | 241 | KERNEL_SYNCWARP |
267 | 242 |
|
268 | | -
|
269 | 243 | KERNEL_LOAD_INFO_VARIABLES |
270 | 244 | for (int j = 0; j < WARP_SIZE; ++j) { |
271 | 245 | int srclane = (ilane + j) & (WARP_SIZE - 1); \ |
|
275 | 249 | KERNEL_SCALE_1 \ |
276 | 250 | KERNEL_FULL_PAIRWISE_INTERACTION |
277 | 251 |
|
278 | | -
|
279 | 252 | iid = __shfl_sync(ALL_LANES, iid, ilane + 1); |
280 | 253 | KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I |
281 | 254 | } |
282 | 255 |
|
283 | | -
|
284 | 256 | KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP |
285 | 257 | } |
286 | 258 |
|
287 | | -
|
288 | 259 | for (int iw = iwarp; iw < niak; iw += nwarp) { |
289 | 260 | KERNEL_ZERO_LOCAL_FORCE |
290 | 261 |
|
291 | | -
|
292 | 262 | int ty = iak[iw]; |
293 | 263 | int atomi = ty * WARP_SIZE + ilane; |
294 | 264 | int i = sorted[atomi].unsorted; |
|
297 | 267 | KERNEL_INIT_PARAMS_I_AND_K |
298 | 268 | KERNEL_SYNCWARP |
299 | 269 |
|
300 | | -
|
301 | 270 | for (int j = 0; j < WARP_SIZE; ++j) { |
302 | 271 | KERNEL_KLANE2 \ |
303 | 272 | bool incl = atomk > 0; \ |
304 | 273 | KERNEL_SCALE_1 \ |
305 | 274 | KERNEL_FULL_PAIRWISE_INTERACTION |
306 | 275 |
|
307 | | -
|
308 | 276 | KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I |
309 | 277 | } |
310 | 278 |
|
311 | | -
|
312 | 279 | KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP |
313 | 280 | } |
314 | 281 |
|
315 | | -
|
316 | 282 | KERNEL_SUM_COUNT |
317 | 283 | KERNEL_SUM_ENERGY |
318 | 284 | KERNEL_SUM_VIRIAL |
@@ -570,7 +536,7 @@ def _load_scale_param(ptype:str, stem:str, input:str, separate_scaled_pairwise:b |
570 | 536 | v = '' |
571 | 537 | for i in range(1,len(ss)): |
572 | 538 | idx = ss[i] |
573 | | - al = alphabets[idx] |
| 539 | + al = rc_alphabets[idx] |
574 | 540 | if input is None: |
575 | 541 | if not separate_scaled_pairwise: |
576 | 542 | v = v + '{} {}{} = 1;'.format(t, stem, al) |
@@ -610,6 +576,8 @@ def _kv(self, k:str): |
610 | 576 | return self.config[k] |
611 | 577 | else: |
612 | 578 | return '' |
| 579 | + |
| 580 | + |
613 | 581 | def cudaReplaceDict(self) -> dict: |
614 | 582 | d = {} |
615 | 583 | config = self.config |
|
0 commit comments