@@ -76,46 +76,6 @@ bool SubmitQuery11(queue& q, Database& dbinfo, std::string& nation,
76
76
// start timer
77
77
high_resolution_clock::time_point host_start = high_resolution_clock::now ();
78
78
79
- // /////////////////////////////////////////////////////////////////////////
80
- // // ProducePartSupplier Kernel
81
- auto produce_ps_event = q.submit ([&](handler& h) {
82
- // PARTSUPPLIER table accessors
83
- accessor ps_partkey_accessor (ps_partkey_buf, h, read_only);
84
- accessor ps_suppkey_accessor (ps_suppkey_buf, h, read_only);
85
- accessor ps_availqty_accessor (ps_availqty_buf, h, read_only);
86
- accessor ps_supplycost_accessor (ps_supplycost_buf, h, read_only);
87
-
88
- // kernel to produce the PARTSUPPLIER table
89
- h.single_task <ProducePartSupplier>([=]() [[intel::kernel_args_restrict]] {
90
- [[intel::initiation_interval (1 )]]
91
- for (size_t i = 0 ; i < ps_iters; i++) {
92
- // bulk read of data from global memory
93
- NTuple<kJoinWinSize , PartSupplierRow> data;
94
-
95
- UnrolledLoop<0 , kJoinWinSize >([&](auto j) {
96
- size_t idx = i * kJoinWinSize + j;
97
- bool in_range = idx < ps_rows;
98
-
99
- DBIdentifier partkey = ps_partkey_accessor[idx];
100
- DBIdentifier suppkey = ps_suppkey_accessor[idx];
101
- int availqty = ps_availqty_accessor[idx];
102
- DBDecimal supplycost = ps_supplycost_accessor[idx];
103
-
104
- data.get <j>() =
105
- PartSupplierRow (in_range, partkey, suppkey, availqty, supplycost);
106
- });
107
-
108
- // write to pipe
109
- ProducePartSupplierPipe::write (
110
- PartSupplierRowPipeData (false , true , data));
111
- }
112
-
113
- // tell the downstream kernel we are done producing data
114
- ProducePartSupplierPipe::write (PartSupplierRowPipeData (true , false ));
115
- });
116
- });
117
- // /////////////////////////////////////////////////////////////////////////
118
-
119
79
// /////////////////////////////////////////////////////////////////////////
120
80
// // JoinPartSupplierParts Kernel
121
81
auto join_event = q.submit ([&](handler& h) {
@@ -250,6 +210,47 @@ bool SubmitQuery11(queue& q, Database& dbinfo, std::string& nation,
250
210
});
251
211
// /////////////////////////////////////////////////////////////////////////
252
212
213
+ // Must be last to ensure reliable timings
214
+ // /////////////////////////////////////////////////////////////////////////
215
+ // // ProducePartSupplier Kernel
216
+ auto produce_ps_event = q.submit ([&](handler& h) {
217
+ // PARTSUPPLIER table accessors
218
+ accessor ps_partkey_accessor (ps_partkey_buf, h, read_only);
219
+ accessor ps_suppkey_accessor (ps_suppkey_buf, h, read_only);
220
+ accessor ps_availqty_accessor (ps_availqty_buf, h, read_only);
221
+ accessor ps_supplycost_accessor (ps_supplycost_buf, h, read_only);
222
+
223
+ // kernel to produce the PARTSUPPLIER table
224
+ h.single_task <ProducePartSupplier>([=]() [[intel::kernel_args_restrict]] {
225
+ [[intel::initiation_interval (1 )]]
226
+ for (size_t i = 0 ; i < ps_iters; i++) {
227
+ // bulk read of data from global memory
228
+ NTuple<kJoinWinSize , PartSupplierRow> data;
229
+
230
+ UnrolledLoop<0 , kJoinWinSize >([&](auto j) {
231
+ size_t idx = i * kJoinWinSize + j;
232
+ bool in_range = idx < ps_rows;
233
+
234
+ DBIdentifier partkey = ps_partkey_accessor[idx];
235
+ DBIdentifier suppkey = ps_suppkey_accessor[idx];
236
+ int availqty = ps_availqty_accessor[idx];
237
+ DBDecimal supplycost = ps_supplycost_accessor[idx];
238
+
239
+ data.get <j>() =
240
+ PartSupplierRow (in_range, partkey, suppkey, availqty, supplycost);
241
+ });
242
+
243
+ // write to pipe
244
+ ProducePartSupplierPipe::write (
245
+ PartSupplierRowPipeData (false , true , data));
246
+ }
247
+
248
+ // tell the downstream kernel we are done producing data
249
+ ProducePartSupplierPipe::write (PartSupplierRowPipeData (true , false ));
250
+ });
251
+ });
252
+ // /////////////////////////////////////////////////////////////////////////
253
+
253
254
// wait for kernels to finish
254
255
produce_ps_event.wait ();
255
256
join_event.wait ();
@@ -260,9 +261,9 @@ bool SubmitQuery11(queue& q, Database& dbinfo, std::string& nation,
260
261
high_resolution_clock::time_point host_end = high_resolution_clock::now ();
261
262
duration<double , std::milli> diff = host_end - host_start;
262
263
263
- // gather profiling info
264
+ // gather profiling info from start of pipeline to end
264
265
auto start_time =
265
- consume_sort_event
266
+ produce_ps_event
266
267
.get_profiling_info <info::event_profiling::command_start>();
267
268
auto end_time = consume_sort_event
268
269
.get_profiling_info <info::event_profiling::command_end>();
0 commit comments