3
3
# don't introduce a pandas/pandas.compat import
4
4
# or we get a bootstrapping problem
5
5
from StringIO import StringIO
6
- import os
7
6
8
7
header = """
9
8
cimport numpy as np
@@ -1150,6 +1149,86 @@ def group_var_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
1150
1149
(ct * ct - ct))
1151
1150
"""
1152
1151
1152
+ group_count_template = """@cython.boundscheck(False)
1153
+ @cython.wraparound(False)
1154
+ def group_count_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
1155
+ ndarray[int64_t] counts,
1156
+ ndarray[%(c_type)s, ndim=2] values,
1157
+ ndarray[int64_t] labels):
1158
+ '''
1159
+ Only aggregates on axis=0
1160
+ '''
1161
+ cdef:
1162
+ Py_ssize_t i, j, N, K, lab
1163
+ %(dest_type2)s val
1164
+ ndarray[%(dest_type2)s, ndim=2] nobs = np.zeros_like(out)
1165
+
1166
+
1167
+ if not len(values) == len(labels):
1168
+ raise AssertionError("len(index) != len(labels)")
1169
+
1170
+ N, K = (<object> values).shape
1171
+
1172
+ for i in range(N):
1173
+ lab = labels[i]
1174
+ if lab < 0:
1175
+ continue
1176
+
1177
+ counts[lab] += 1
1178
+ for j in range(K):
1179
+ val = values[i, j]
1180
+
1181
+ # not nan
1182
+ nobs[lab, j] += val == val
1183
+
1184
+ for i in range(len(counts)):
1185
+ for j in range(K):
1186
+ out[i, j] = nobs[i, j]
1187
+
1188
+
1189
+ """
1190
+
1191
+ group_count_bin_template = """@cython.boundscheck(False)
1192
+ @cython.wraparound(False)
1193
+ def group_count_bin_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
1194
+ ndarray[int64_t] counts,
1195
+ ndarray[%(c_type)s, ndim=2] values,
1196
+ ndarray[int64_t] bins):
1197
+ '''
1198
+ Only aggregates on axis=0
1199
+ '''
1200
+ cdef:
1201
+ Py_ssize_t i, j, N, K, ngroups, b
1202
+ %(dest_type2)s val, count
1203
+ ndarray[%(dest_type2)s, ndim=2] nobs
1204
+
1205
+ nobs = np.zeros_like(out)
1206
+
1207
+ if bins[len(bins) - 1] == len(values):
1208
+ ngroups = len(bins)
1209
+ else:
1210
+ ngroups = len(bins) + 1
1211
+
1212
+ N, K = (<object> values).shape
1213
+
1214
+ b = 0
1215
+ for i in range(N):
1216
+ while b < ngroups - 1 and i >= bins[b]:
1217
+ b += 1
1218
+
1219
+ counts[b] += 1
1220
+ for j in range(K):
1221
+ val = values[i, j]
1222
+
1223
+ # not nan
1224
+ nobs[b, j] += val == val
1225
+
1226
+ for i in range(ngroups):
1227
+ for j in range(K):
1228
+ out[i, j] = nobs[i, j]
1229
+
1230
+
1231
+ """
1153
1232
# add passing bin edges, instead of labels
1154
1233
1155
1234
@@ -2251,6 +2330,8 @@ def generate_from_template(template, exclude=None):
2251
2330
group_max_bin_template ,
2252
2331
group_ohlc_template ]
2253
2332
2333
+ groupby_count = [group_count_template , group_count_bin_template ]
2334
+
2254
2335
templates_1d = [map_indices_template ,
2255
2336
pad_template ,
2256
2337
backfill_template ,
@@ -2272,6 +2353,7 @@ def generate_from_template(template, exclude=None):
2272
2353
take_2d_axis1_template ,
2273
2354
take_2d_multi_template ]
2274
2355
2356
+
2275
2357
def generate_take_cython_file (path = 'generated.pyx' ):
2276
2358
with open (path , 'w' ) as f :
2277
2359
print (header , file = f )
@@ -2288,7 +2370,10 @@ def generate_take_cython_file(path='generated.pyx'):
2288
2370
print (generate_put_template (template ), file = f )
2289
2371
2290
2372
for template in groupbys :
2291
- print (generate_put_template (template , use_ints = False ), file = f )
2373
+ print (generate_put_template (template , use_ints = False ), file = f )
2374
+
2375
+ for template in groupby_count :
2376
+ print (generate_put_template (template ), file = f )
2292
2377
2293
2378
# for template in templates_1d_datetime:
2294
2379
# print >> f, generate_from_template_datetime(template)
@@ -2299,5 +2384,6 @@ def generate_take_cython_file(path='generated.pyx'):
2299
2384
for template in nobool_1d_templates :
2300
2385
print (generate_from_template (template , exclude = ['bool' ]), file = f )
2301
2386
2387
+
2302
2388
if __name__ == '__main__' :
2303
2389
generate_take_cython_file ()
0 commit comments