Skip to content

Commit 8166ebc

Browse files
Bhaskar VishnuVardhan ChebroluGitHub Enterprise
authored andcommitted
update slave bridge bandwidth for better numbers with small buffers
1 parent 7dfc5b8 commit 8166ebc

File tree

4 files changed

+111
-48
lines changed

4 files changed

+111
-48
lines changed

host/slave_bridge_bandwidth/src/bandwidth.cpp

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,28 +13,53 @@
1313
* License for the specific language governing permissions and limitations
1414
* under the License.
1515
*/
16+
1617
#include <ap_int.h>
1718
#include <iostream>
1819

19-
auto constexpr DATAWIDTH = 512;
20-
using TYPE = ap_uint<DATAWIDTH>;
20+
auto constexpr DATA_WIDTH = 512;
21+
auto constexpr c_widthInBytes = DATA_WIDTH / 8;
22+
auto constexpr c_maxBurstSize = 4 * 1024; // 4KB
23+
auto constexpr c_burstLength = c_maxBurstSize / c_widthInBytes;
24+
25+
using TYPE = ap_uint<DATA_WIDTH>;
2126

2227
extern "C" {
2328
void bandwidth(TYPE* input0, TYPE* output0, int64_t buf_size, int64_t iter) {
24-
#pragma HLS INTERFACE m_axi port = input0 offset = slave bundle = gmem0 max_read_burst_length = \
25-
64 num_read_outstanding = 256
26-
#pragma HLS INTERFACE m_axi port = output0 offset = slave bundle = gmem1 max_write_burst_length = \
27-
64 num_write_outstanding = 256
29+
#pragma HLS INTERFACE m_axi port = input0 offset = slave bundle = gmem max_read_burst_length = \
30+
64 num_read_outstanding = 16
31+
#pragma HLS INTERFACE m_axi port = output0 offset = slave bundle = gmem max_write_burst_length = \
32+
64 num_write_outstanding = 16
2833
#pragma HLS INTERFACE s_axilite port = input0
2934
#pragma HLS INTERFACE s_axilite port = output0
3035
#pragma HLS INTERFACE s_axilite port = buf_size
3136
#pragma HLS INTERFACE s_axilite port = iter
3237
#pragma HLS INTERFACE s_axilite port = return
3338

34-
for (int64_t i = 0; i < iter; i++) {
35-
for (int64_t blockindex = 0; blockindex < buf_size; blockindex++) {
36-
TYPE temp0 = input0[blockindex];
37-
output0[blockindex] = temp0;
39+
TYPE temp = 0;
40+
41+
uint32_t factor = buf_size / c_maxBurstSize;
42+
uint32_t Indx = 0;
43+
uint32_t baseAddr = 0;
44+
45+
if (buf_size <= 8 * 1024) {
46+
for (int itr = 0; itr < iter * factor; itr++) {
47+
#pragma HLS PIPELINE II = 1
48+
for (int i = 0; i < c_burstLength; i++) {
49+
#pragma HLS PIPELINE II = 1
50+
temp = input0[baseAddr + i];
51+
output0[baseAddr + i] = temp;
52+
}
53+
Indx = itr % factor;
54+
baseAddr = c_burstLength * Indx;
55+
}
56+
} else {
57+
buf_size = buf_size / c_widthInBytes;
58+
for (int64_t i = 0; i < iter; i++) {
59+
for (int64_t blockindex = 0; blockindex < buf_size; blockindex++) {
60+
temp = input0[blockindex];
61+
output0[blockindex] = temp;
62+
}
3863
}
3964
}
4065
}

host/slave_bridge_bandwidth/src/host.cpp

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ int main(int argc, char* argv[]) {
6161
exit(EXIT_FAILURE);
6262
}
6363

64-
double concurrent_max[2] = {0};
65-
double read_max[2] = {0};
66-
double write_max[2] = {0};
64+
double concurrent_max = 0;
65+
double read_max = 0;
66+
double write_max = 0;
6767

6868
for (size_t i = 4 * 1024; i <= 256 * 1024 * 1024; i *= 2) {
6969
size_t iter = 1024;
@@ -74,8 +74,6 @@ int main(int argc, char* argv[]) {
7474
if (bufsize > 8 * 1024) break;
7575
}
7676

77-
cl_ulong num_blocks = bufsize / 64;
78-
7977
/* Input buffer */
8078
unsigned char* input_host = ((unsigned char*)malloc(bufsize));
8179
if (input_host == NULL) {
@@ -107,11 +105,11 @@ int main(int argc, char* argv[]) {
107105

108106
OCL_CHECK(err, err = krnl.setArg(0, *(buffer[0])));
109107
OCL_CHECK(err, err = krnl.setArg(1, *(buffer[1])));
110-
OCL_CHECK(err, err = krnl.setArg(2, num_blocks));
108+
OCL_CHECK(err, err = krnl.setArg(2, bufsize));
111109
OCL_CHECK(err, err = krnl.setArg(3, iter));
112110

113111
double dbytes = bufsize;
114-
double dmbytes = dbytes / (((double)1024) * ((double)1024));
112+
std::string size_str = xcl::convert_size(bufsize);
115113

116114
/* Write input buffer */
117115
/* Map input buffer for PCIe write */
@@ -160,16 +158,15 @@ int main(int argc, char* argv[]) {
160158
double bpersec = (dbytes / dsduration);
161159
double gbpersec = (2 * bpersec) / ((double)1024 * 1024 * 1024); // For Concurrent Read and Write
162160

163-
std::cout << "Concurrent Read and Write Throughput = " << gbpersec << " (GB/sec) for buffer size " << dmbytes
164-
<< " MB\n";
161+
std::cout << "Concurrent Read and Write Throughput = " << gbpersec << " (GB/sec) for buffer size " << size_str
162+
<< std::endl;
165163

166-
if (gbpersec > concurrent_max[0]) {
167-
concurrent_max[0] = gbpersec;
168-
concurrent_max[1] = dmbytes;
164+
if (gbpersec > concurrent_max) {
165+
concurrent_max = gbpersec;
169166
}
170167

171168
OCL_CHECK(err, err = krnl_read.setArg(0, *(buffer[0])));
172-
OCL_CHECK(err, err = krnl_read.setArg(1, num_blocks));
169+
OCL_CHECK(err, err = krnl_read.setArg(1, bufsize));
173170
OCL_CHECK(err, err = krnl_read.setArg(2, iter));
174171

175172
/* Execute Kernel */
@@ -186,15 +183,14 @@ int main(int argc, char* argv[]) {
186183
bpersec = (dbytes / dsduration);
187184
gbpersec = bpersec / ((double)1024 * 1024 * 1024);
188185

189-
std::cout << "Read Throughput = " << gbpersec << " (GB/sec) for buffer size " << dmbytes << " MB\n";
186+
std::cout << "Read Throughput = " << gbpersec << " (GB/sec) for buffer size " << size_str << std::endl;
190187

191-
if (gbpersec > read_max[0]) {
192-
read_max[0] = gbpersec;
193-
read_max[1] = dmbytes;
188+
if (gbpersec > read_max) {
189+
read_max = gbpersec;
194190
}
195191

196192
OCL_CHECK(err, err = krnl_write.setArg(0, *(buffer[1])));
197-
OCL_CHECK(err, err = krnl_write.setArg(1, num_blocks));
193+
OCL_CHECK(err, err = krnl_write.setArg(1, bufsize));
198194
OCL_CHECK(err, err = krnl_write.setArg(2, iter));
199195

200196
/* Execute Kernel */
@@ -211,22 +207,20 @@ int main(int argc, char* argv[]) {
211207
bpersec = (dbytes / dsduration);
212208
gbpersec = bpersec / ((double)1024 * 1024 * 1024);
213209

214-
std::cout << "Write Throughput = " << gbpersec << " (GB/sec) for buffer size " << dmbytes << " MB\n\n";
210+
std::cout << "Write Throughput = " << gbpersec << " (GB/sec) for buffer size " << size_str << "\n\n";
215211

216-
if (gbpersec > write_max[0]) {
217-
write_max[0] = gbpersec;
218-
write_max[1] = dmbytes;
212+
if (gbpersec > write_max) {
213+
write_max = gbpersec;
219214
}
220215

221216
delete (buffer[0]);
222217
delete (buffer[1]);
223218
}
224219

225220
std::cout << "Maximum bandwidth achieved :\n";
226-
std::cout << "Concurrent Read and Write Throughput = " << concurrent_max[0] << " (GB/sec) for buffer size "
227-
<< concurrent_max[1] << " MB\n";
228-
std::cout << "Read Throughput = " << read_max[0] << " (GB/sec) for buffer size " << read_max[1] << " MB\n";
229-
std::cout << "Write Throughput = " << write_max[0] << " (GB/sec) for buffer size " << write_max[1] << " MB\n\n";
221+
std::cout << "Concurrent Read and Write Throughput = " << concurrent_max << " (GB/sec) \n";
222+
std::cout << "Read Throughput = " << read_max << " (GB/sec) \n";
223+
std::cout << "Write Throughput = " << write_max << " (GB/sec) \n\n";
230224
std::cout << "TEST PASSED\n";
231225
return EXIT_SUCCESS;
232226
}

host/slave_bridge_bandwidth/src/read_bandwidth.cpp

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,22 +16,43 @@
1616
#include <ap_int.h>
1717
#include <iostream>
1818

19-
auto constexpr DATAWIDTH = 512;
20-
using TYPE = ap_uint<DATAWIDTH>;
19+
auto constexpr DATA_WIDTH = 512;
20+
auto constexpr c_widthInBytes = DATA_WIDTH / 8;
21+
auto constexpr c_maxBurstSize = 4 * 1024; // 4KB
22+
auto constexpr c_burstLength = c_maxBurstSize / c_widthInBytes;
23+
24+
using TYPE = ap_uint<DATA_WIDTH>;
2125

2226
extern "C" {
2327
void read_bandwidth(TYPE* input0, int64_t buf_size, int64_t iter) {
2428
#pragma HLS INTERFACE m_axi port = input0 offset = slave bundle = gmem max_read_burst_length = \
25-
64 num_read_outstanding = 256
29+
64 num_read_outstanding = 16
2630
#pragma HLS INTERFACE s_axilite port = input0
2731
#pragma HLS INTERFACE s_axilite port = buf_size
2832
#pragma HLS INTERFACE s_axilite port = iter
2933
#pragma HLS INTERFACE s_axilite port = return
3034

3135
TYPE temp = 0;
32-
for (int64_t i = 0; i < iter; i++) {
33-
for (int64_t blockindex = 0; blockindex < buf_size; blockindex++) {
34-
temp |= input0[blockindex];
36+
uint32_t factor = buf_size / c_maxBurstSize;
37+
uint32_t Indx = 0;
38+
uint32_t baseAddr = 0;
39+
40+
if (buf_size <= 8 * 1024) {
41+
for (int itr = 0; itr < iter * factor; itr++) {
42+
#pragma HLS PIPELINE II = 1
43+
for (int i = 0; i < c_burstLength; i++) {
44+
#pragma HLS PIPELINE II = 1
45+
temp |= input0[baseAddr + i];
46+
}
47+
Indx = itr % factor;
48+
baseAddr = c_burstLength * Indx;
49+
}
50+
} else {
51+
buf_size = buf_size / c_widthInBytes;
52+
for (int64_t i = 0; i < iter; i++) {
53+
for (int64_t blockindex = 0; blockindex < buf_size; blockindex++) {
54+
temp |= input0[blockindex];
55+
}
3556
}
3657
}
3758
input0[0] = temp;

host/slave_bridge_bandwidth/src/write_bandwidth.cpp

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,44 @@
1616
#include <ap_int.h>
1717
#include <iostream>
1818

19-
auto constexpr DATAWIDTH = 512;
20-
using TYPE = ap_uint<DATAWIDTH>;
19+
auto constexpr DATA_WIDTH = 512;
20+
auto constexpr c_widthInBytes = DATA_WIDTH / 8;
21+
auto constexpr c_maxBurstSize = 4 * 1024; // 4KB
22+
auto constexpr c_burstLength = c_maxBurstSize / c_widthInBytes;
23+
24+
using TYPE = ap_uint<DATA_WIDTH>;
2125

2226
extern "C" {
2327
void write_bandwidth(TYPE* output0, int64_t buf_size, int64_t iter) {
2428
#pragma HLS INTERFACE m_axi port = output0 offset = slave bundle = gmem max_write_burst_length = \
25-
64 num_write_outstanding = 256
29+
64 num_write_outstanding = 16
2630
#pragma HLS INTERFACE s_axilite port = output0
2731
#pragma HLS INTERFACE s_axilite port = buf_size
2832
#pragma HLS INTERFACE s_axilite port = iter
2933
#pragma HLS INTERFACE s_axilite port = return
3034

31-
for (int64_t i = 0; i < iter; i++) {
32-
for (int64_t blockindex = 0; blockindex < buf_size; blockindex++) {
33-
output0[blockindex] = 1;
35+
TYPE temp = 0;
36+
37+
uint32_t factor = buf_size / c_maxBurstSize;
38+
uint32_t Indx = 0;
39+
uint32_t baseAddr = 0;
40+
41+
if (buf_size <= 8 * 1024) {
42+
for (int itr = 0; itr < iter * factor; itr++) {
43+
#pragma HLS PIPELINE II = 1
44+
for (int i = 0; i < c_burstLength; i++) {
45+
#pragma HLS PIPELINE II = 1
46+
output0[baseAddr + i] = 1;
47+
}
48+
Indx = itr % factor;
49+
baseAddr = c_burstLength * Indx;
50+
}
51+
} else {
52+
buf_size = buf_size / c_widthInBytes;
53+
for (int64_t i = 0; i < iter; i++) {
54+
for (int64_t blockindex = 0; blockindex < buf_size; blockindex++) {
55+
output0[blockindex] = 1;
56+
}
3457
}
3558
}
3659
}

0 commit comments

Comments
 (0)