Skip to content

Commit 9efc800

Browse files
authored
FPGA: Tidy up Component Interfaces Comparison code sample (#2341)
- improve comparibility across code versions - remove unnecessary ready signal from csr-pipes version - re-run clang-format
1 parent a3bede2 commit 9efc800

File tree

5 files changed

+82
-48
lines changed

5 files changed

+82
-48
lines changed

DirectProgramming/C++SYCL_FPGA/Tutorials/Features/hls_flow_interfaces/component_interfaces_comparison/csr-pipes/src/vector_add.cpp

Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,38 @@
11
#include <iostream>
22

33
// oneAPI headers
4-
#include <sycl/sycl.hpp>
54
#include <sycl/ext/intel/fpga_extensions.hpp>
5+
#include <sycl/sycl.hpp>
6+
67
#include "exception_handler.hpp"
78

89
// Forward declare the kernel name in the global scope. This is an FPGA best
910
// practice that reduces name mangling in the optimization reports.
10-
class SimpleVAddPipes;
11+
class IDSimpleVAdd;
1112

1213
// Forward declare pipe names to reduce name mangling
1314
class IDPipeA;
1415
class IDPipeB;
1516
class IDPipeC;
1617

17-
constexpr int kVectorSize = 256;
18-
1918
using PipeProps = decltype(sycl::ext::oneapi::experimental::properties(
2019
sycl::ext::intel::experimental::ready_latency<0>));
2120

2221
using InputPipeA =
23-
sycl::ext::intel::experimental::pipe<IDPipeA, int, 0,
24-
PipeProps>;
22+
sycl::ext::intel::experimental::pipe<IDPipeA, int, 0, PipeProps>;
2523
using InputPipeB =
26-
sycl::ext::intel::experimental::pipe<IDPipeB, int, 0,
27-
PipeProps>;
24+
sycl::ext::intel::experimental::pipe<IDPipeB, int, 0, PipeProps>;
2825

29-
using CSRPipeProps = decltype(sycl::ext::oneapi::experimental::properties(
30-
sycl::ext::intel::experimental::protocol_avalon_mm_uses_ready));
26+
using CsrOutProperties = decltype(sycl::ext::oneapi::experimental::properties(
27+
sycl::ext::intel::experimental::protocol<
28+
// Host doesn't care about possibly missing an update, so no need for
29+
// protocol_name::avalon_mm_uses_ready
30+
sycl::ext::intel::experimental::protocol_name::avalon_mm>));
3131

32-
// this csr pipe will only be read from and written to once
3332
using OutputPipeC =
34-
sycl::ext::intel::experimental::pipe<IDPipeC, int, 0,
35-
CSRPipeProps>;
33+
sycl::ext::intel::experimental::pipe<IDPipeC, int, 0, CsrOutProperties>;
3634

37-
struct SimpleVAddKernelPipes {
35+
struct SimpleVAddKernel {
3836
int len;
3937

4038
void operator()() const {
@@ -47,11 +45,15 @@ struct SimpleVAddKernelPipes {
4745
sum_total += sum;
4846
}
4947

50-
// Write to OutputPipeC only once per kernel invocation
48+
// Write to OutputPipeC only once per kernel invocation. Since we requested
49+
// protcol_avalon_mm instead of protocol_avalon_mm_uses_ready, this write is
50+
// effectively non-blocking.
5151
OutputPipeC::write(sum_total);
5252
}
5353
};
5454

55+
constexpr int kVectorSize = 256;
56+
5557
int main() {
5658
try {
5759
// Use compile-time macros to select either:
@@ -69,32 +71,42 @@ int main() {
6971
// create the device queue
7072
sycl::queue q(selector, fpga_tools::exception_handler);
7173

72-
int count = kVectorSize; // pass array size by value
74+
auto device = q.get_device();
75+
76+
std::cout << "Running on device: "
77+
<< device.get_info<sycl::info::device::name>().c_str()
78+
<< std::endl;
79+
80+
// Vector size is a constant here, but it could be a run-time variable too.
81+
int count = kVectorSize;
7382

7483
int expected_sum = 0;
7584

76-
// push data into pipes
85+
// push data into pipes before invoking kernel
7786
int *a = new int[count];
7887
int *b = new int[count];
7988
for (int i = 0; i < count; i++) {
8089
a[i] = i;
8190
b[i] = (count - i);
8291

8392
expected_sum += (a[i] + b[i]);
84-
// When writing to a host pipe in non kernel code,
93+
// When writing to a host pipe in non kernel code,
8594
// you must pass the sycl::queue as the first argument
8695
InputPipeA::write(q, a[i]);
8796
InputPipeB::write(q, b[i]);
8897
}
8998

9099
std::cout << "Add two vectors of size " << count << std::endl;
91100

92-
q.single_task<SimpleVAddPipes>(SimpleVAddKernelPipes{count});
101+
sycl::event e = q.single_task<IDSimpleVAdd>(SimpleVAddKernel{count});
93102

94-
// verify that outputs are correct
103+
// Verify that outputs are correct, after the kernel has finished running.
104+
// Since the write to OutputPipeC is non-blocking, no need to worry about
105+
// deadlock.
106+
e.wait();
95107
bool passed = true;
96108

97-
// only need to read from OutputPipeC once, since the kernel only wrote to it once
109+
// Only read from OutputPipeC once, since the kernel only wrote to it once
98110
int calc = OutputPipeC::read(q);
99111
if (calc != expected_sum) {
100112
std::cout << "result " << calc << ", expected (" << expected_sum << ")"

DirectProgramming/C++SYCL_FPGA/Tutorials/Features/hls_flow_interfaces/component_interfaces_comparison/mm-host/src/vector_add.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ constexpr int kBL3 = 3;
1212

1313
// Forward declare the kernel name in the global scope. This is an FPGA best
1414
// practice that reduces name mangling in the optimization reports.
15-
class SimpleVAdd;
15+
class IDSimpleVAdd;
1616

1717
struct SimpleVAddKernel {
1818
sycl::ext::oneapi::experimental::annotated_arg<
@@ -108,7 +108,7 @@ int main() {
108108

109109
std::cout << "Add two vectors of size " << count << std::endl;
110110

111-
q.single_task<SimpleVAdd>(SimpleVAddKernel{a, b, c, count}).wait();
111+
q.single_task<IDSimpleVAdd>(SimpleVAddKernel{a, b, c, count}).wait();
112112

113113
// verify that VC is correct
114114
bool passed = true;

DirectProgramming/C++SYCL_FPGA/Tutorials/Features/hls_flow_interfaces/component_interfaces_comparison/naive/src/vector_add.cpp

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
#include <iostream>
22

33
// oneAPI headers
4-
#include <sycl/sycl.hpp>
54
#include <sycl/ext/intel/fpga_extensions.hpp>
5+
#include <sycl/sycl.hpp>
6+
67
#include "exception_handler.hpp"
8+
79
// Forward declare the kernel name in the global scope. This is an FPGA best
810
// practice that reduces name mangling in the optimization reports.
911
class IDSimpleVAdd;
@@ -27,8 +29,7 @@ struct SimpleVAddKernel {
2729
constexpr int kVectorSize = 256;
2830

2931
int main() {
30-
31-
try{
32+
try {
3233
// Use compile-time macros to select either:
3334
// - the FPGA emulator device (CPU emulation of the FPGA)
3435
// - the FPGA device (a real FPGA)
@@ -44,9 +45,16 @@ int main() {
4445
// create the device queue
4546
sycl::queue q(selector, fpga_tools::exception_handler);
4647

47-
int count = kVectorSize; // pass array size by value
48+
auto device = q.get_device();
49+
50+
std::cout << "Running on device: "
51+
<< device.get_info<sycl::info::device::name>().c_str()
52+
<< std::endl;
53+
54+
// Vector size is a constant here, but it could be a run-time variable too.
55+
int count = kVectorSize;
4856

49-
// Create USM shared allocations in the specified buffer_location.
57+
// Create USM shared allocations in the specified buffer_location.
5058
// You can also use host allocations with malloc_host(...) API
5159
int *a = sycl::malloc_shared<int>(count, q);
5260
int *b = sycl::malloc_shared<int>(count, q);
@@ -58,9 +66,10 @@ int main() {
5866

5967
std::cout << "Add two vectors of size " << count << std::endl;
6068

61-
q.single_task<IDSimpleVAdd>(SimpleVAddKernel{a, b, c, count}).wait();
69+
sycl::event e = q.single_task<IDSimpleVAdd>(SimpleVAddKernel{a, b, c, count});
6270

63-
// verify that VC is correct
71+
// Verify that outputs are correct, after the kernel has finished running.
72+
e.wait();
6473
bool passed = true;
6574
for (int i = 0; i < count; i++) {
6675
int expected = a[i] + b[i];

DirectProgramming/C++SYCL_FPGA/Tutorials/Features/hls_flow_interfaces/component_interfaces_comparison/pipes/src/vector_add.cpp

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,31 @@
11
#include <iostream>
22

33
// oneAPI headers
4-
#include <sycl/sycl.hpp>
54
#include <sycl/ext/intel/fpga_extensions.hpp>
5+
#include <sycl/sycl.hpp>
6+
67
#include "exception_handler.hpp"
78

8-
constexpr int kVectorSize = 256;
99
// Forward declare the kernel name in the global scope. This is an FPGA best
1010
// practice that reduces name mangling in the optimization reports.
11-
class IDSimpleVAddPipes;
11+
class IDSimpleVAdd;
12+
13+
// Forward declare pipe names to reduce name mangling
1214
class IDPipeA;
1315
class IDPipeB;
1416
class IDPipeC;
1517

1618
using PipeProps = decltype(sycl::ext::oneapi::experimental::properties(
1719
sycl::ext::intel::experimental::ready_latency<0>));
20+
1821
using InputPipeA =
1922
sycl::ext::intel::experimental::pipe<IDPipeA, int, 0, PipeProps>;
2023
using InputPipeB =
2124
sycl::ext::intel::experimental::pipe<IDPipeB, int, 0, PipeProps>;
2225
using OutputPipeC =
2326
sycl::ext::intel::experimental::pipe<IDPipeC, int, 0, PipeProps>;
2427

25-
struct SimpleVAddKernelPipes {
28+
struct SimpleVAddKernel {
2629
int len;
2730

2831
void operator()() const {
@@ -35,6 +38,8 @@ struct SimpleVAddKernelPipes {
3538
}
3639
};
3740

41+
constexpr int kVectorSize = 256;
42+
3843
int main() {
3944
try {
4045
// Use compile-time macros to select either:
@@ -58,26 +63,28 @@ int main() {
5863
<< device.get_info<sycl::info::device::name>().c_str()
5964
<< std::endl;
6065

61-
int count = kVectorSize; // pass array size by value
66+
// Vector size is a constant here, but it could be a run-time variable too.
67+
int count = kVectorSize;
6268

63-
// push data into pipes before invoking kernel
69+
// Push data into pipes before invoking kernel
6470
int *a = new int[count];
6571
int *b = new int[count];
6672
for (int i = 0; i < count; i++) {
6773
a[i] = i;
6874
b[i] = (count - i);
69-
// When writing to a host pipe in non kernel code,
75+
// When writing to a host pipe in non kernel code,
7076
// you must pass the sycl::queue as the first argument
7177
InputPipeA::write(q, a[i]);
7278
InputPipeB::write(q, b[i]);
7379
}
7480

7581
std::cout << "Add two vectors of size " << count << std::endl;
7682

77-
q.single_task<IDSimpleVAddPipes>(
78-
SimpleVAddKernelPipes{count});
83+
q.single_task<IDSimpleVAdd>(SimpleVAddKernel{count});
7984

80-
// verify that VC is correct
85+
// Verify that outputs are correct. Do not wait for the kernel to complete,
86+
// because the pipe reads are blocking. Therefore, waiting would cause
87+
// deadlock.
8188
bool passed = true;
8289
for (int i = 0; i < count; i++) {
8390
int expected = a[i] + b[i];

DirectProgramming/C++SYCL_FPGA/Tutorials/Features/hls_flow_interfaces/component_interfaces_comparison/streaming-invocation/src/vector_add.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
#include <iostream>
22

33
// oneAPI headers
4-
#include <sycl/sycl.hpp>
54
#include <sycl/ext/intel/fpga_extensions.hpp>
5+
#include <sycl/sycl.hpp>
6+
67
#include "exception_handler.hpp"
78

89
// Forward declare the kernel name in the global scope. This is an FPGA best
@@ -65,9 +66,16 @@ int main() {
6566
// create the device queue
6667
sycl::queue q(selector, fpga_tools::exception_handler);
6768

68-
int count = kVectorSize; // pass array size by value
69+
auto device = q.get_device();
6970

70-
// Create USM shared allocations in the specified buffer_location.
71+
std::cout << "Running on device: "
72+
<< device.get_info<sycl::info::device::name>().c_str()
73+
<< std::endl;
74+
75+
// Vector size is a constant here, but it could be a run-time variable too.
76+
int count = kVectorSize;
77+
78+
// Create USM shared allocations in the specified buffer_location.
7179
// You can also use host allocations with malloc_host(...) API
7280
int *a = sycl::malloc_shared<int>(count, q);
7381
int *b = sycl::malloc_shared<int>(count, q);
@@ -79,9 +87,10 @@ int main() {
7987

8088
std::cout << "Add two vectors of size " << count << std::endl;
8189

82-
q.single_task<IDSimpleVAdd>(SimpleVAddKernel{a, b, c, count}).wait();
90+
sycl::event e = q.single_task<IDSimpleVAdd>(SimpleVAddKernel{a, b, c, count});
8391

84-
// verify that VC is correct
92+
// Verify that outputs are correct, after the kernel has finished running.
93+
e.wait();
8594
bool passed = true;
8695
for (int i = 0; i < count; i++) {
8796
int expected = a[i] + b[i];
@@ -107,9 +116,6 @@ int main() {
107116
"ensure that your system is plugged to an FPGA board that is "
108117
"set up correctly"
109118
<< std::endl;
110-
std::cerr << " If you are targeting the FPGA emulator, compile with "
111-
"-DFPGA_EMULATOR"
112-
<< std::endl;
113119
std::terminate();
114120
}
115121
}

0 commit comments

Comments
 (0)