@@ -25,6 +25,9 @@ void sync_threadblock();
25
25
int64_t get_thread_index ();
26
26
int64_t get_block_dim ();
27
27
28
+ int32_t agg_sum_int32_shared (GENERIC_ADDR_SPACE int32_t * agg, const int32_t val);
29
+ int64_t agg_sum_shared (GENERIC_ADDR_SPACE int64_t * agg, const int64_t val);
30
+ void agg_max_int32_shared (GENERIC_ADDR_SPACE int32_t * agg, const int32_t val);
28
31
void agg_max_shared (GENERIC_ADDR_SPACE int64_t * agg, const int64_t val);
29
32
int64_t agg_count_shared (GENERIC_ADDR_SPACE int64_t * agg, const int64_t val);
30
33
uint32_t agg_count_int32_shared (GENERIC_ADDR_SPACE uint32_t * agg, const int32_t val);
@@ -94,6 +97,50 @@ void agg_max_double_skip_val_shared(GENERIC_ADDR_SPACE int64_t* agg,
94
97
}
95
98
}
96
99
100
+ int32_t atomicSum32SkipVal (GENERIC_ADDR_SPACE int32_t * addr,
101
+ const int32_t val,
102
+ const int32_t skip_val) {
103
+ int32_t old = atomic_xchg_int_32 (addr, 0 );
104
+ int32_t old2 = agg_sum_int32_shared (addr, old == skip_val ? val : (val + old));
105
+ return old == skip_val ? old2 : (old2 + old);
106
+ }
107
+
108
+ int64_t atomicSum64SkipVal (GENERIC_ADDR_SPACE int64_t * addr,
109
+ const int64_t val,
110
+ const int64_t skip_val) {
111
+ int32_t old = atomic_xchg_int_64 (addr, 0 );
112
+ int32_t old2 = agg_sum_shared (addr, old == skip_val ? val : (val + old));
113
+ return old == skip_val ? old2 : (old2 + old);
114
+ }
115
+
116
+ int32_t agg_sum_int32_skip_val_shared (GENERIC_ADDR_SPACE int32_t * agg,
117
+ const int32_t val,
118
+ const int32_t skip_val) {
119
+ if (val != skip_val) {
120
+ const int32_t old = atomicSum32SkipVal (agg, val, skip_val);
121
+ return old;
122
+ }
123
+ return 0 ;
124
+ }
125
+
126
+ int64_t agg_sum_int64_skip_val_shared (GENERIC_ADDR_SPACE int64_t * agg,
127
+ const int64_t val,
128
+ const int64_t skip_val) {
129
+ if (val != skip_val) {
130
+ const int64_t old = atomicSum64SkipVal (agg, val, skip_val);
131
+ return old;
132
+ }
133
+ return 0 ;
134
+ }
135
+
136
+ void agg_max_int32_skip_val_shared (GENERIC_ADDR_SPACE int32_t * agg,
137
+ const int32_t val,
138
+ const int32_t skip_val) {
139
+ if (val != skip_val) {
140
+ agg_max_int32_shared (agg, val);
141
+ }
142
+ }
143
+
97
144
const GENERIC_ADDR_SPACE int64_t * init_shared_mem (
98
145
const GENERIC_ADDR_SPACE int64_t * global_groups_buffer,
99
146
const int32_t groups_buffer_size) {
0 commit comments