Skip to content

Commit 880314f

Browse files
Bhaskar VishnuVardhan ChebroluGitHub Enterprise
authored andcommitted
p2p_bandwidth updated to run synchronous flow by default
2 parents 849fc16 + ae22dd1 commit 880314f

File tree

5 files changed

+65
-13
lines changed

5 files changed

+65
-13
lines changed

host/p2p_bandwidth/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ RESULT_STRING = TEST PASSED
5656
VPP := v++
5757

5858
include $(ABS_COMMON_REPO)/common/includes/opencl/opencl.mk
59+
include config.mk
60+
5961
CXXFLAGS += $(opencl_CXXFLAGS) -Wall -O0 -g -std=c++11
6062
LDFLAGS += $(opencl_LDFLAGS)
6163

@@ -68,7 +70,6 @@ HOST_SRCS += $(ABS_COMMON_REPO)/common/includes/xcl2/xcl2.cpp $(ABS_COMMON_REPO)
6870
# Host compiler global settings
6971
CXXFLAGS += -fmessage-length=0
7072
LDFLAGS += -lrt -lstdc++
71-
LDFLAGS += -laio
7273

7374
ifneq ($(HOST_ARCH), x86)
7475
LDFLAGS += --sysroot=$(SYSROOT)

host/p2p_bandwidth/config.mk

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
ifeq ($(ENABLE_ASYNC_FLOW), yes)
2+
LDFLAGS += -laio
3+
CXXFLAGS += -DASYNC_READ
4+
endif

host/p2p_bandwidth/description.json

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,6 @@
6464
"REPO_DIR/common/includes/cmdparser",
6565
"REPO_DIR/common/includes/logger"
6666
]
67-
},
68-
"linker" : {
69-
"options" : ["-laio"
70-
]
7167
}
7268
},
7369
"containers": [
@@ -81,6 +77,7 @@
8177
"name": "copy_kernel"
8278
}
8379
],
80+
"config_make": "config.mk",
8481
"contributors": [
8582
{
8683
"url": "http://www.xilinx.com",

host/p2p_bandwidth/details.rst

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,62 @@ In this Example we highlight the Synchronous and Asynchronous data
1010
transfer between SSD and FPGA and capture the performance numbers for
1111
p2p transfer, Kernel and XDMA.
1212

13-
Following is the real log reported while running the design on samsung
14-
platform:
13+
By default synchronous flow is ran and following is the real log
14+
reported while running the design on samsung platform:
1515

1616
::
1717

18-
./host copy_kernel.xclbin -w /dev/nvme0n1
18+
./p2p_bandwidth -x copy_kernel.xclbin -i /dev/nvme0n1
1919
INFO: Successfully opened NVME SSD /dev/nvme0n1
2020
INFO: Preparing 1048576KB test data in 8 pipelines
2121
############################################################
2222
Synchronous P2P
2323
############################################################
2424
INFO: Kick off test
2525
HOST -> FPGA(host BO) -> FPGA(p2p BO) -> SSD
26-
overall 556211ms 100.00% 1841.03MB/s
27-
p2p 359223ms 64.58% 2850.60MB/s
28-
kernel 199578ms 35.88% 5130.83MB/s
29-
XDMA 313203ms 56.31% 3269.45MB/s
26+
overall 556211ms 100.00% 1861.03MB/s
27+
p2p 359223ms 57.58% 3250.60MB/s
28+
kernel 199578ms 207.88% 871.83MB/s
29+
XDMA 313203ms 99.11% 1866.45MB/s
3030
INFO: Evaluating test result
3131
INFO: Test passed
32+
33+
If we need to run the asynchronous flow, build the host with
34+
ENABLE_ASYNC_FLOW=yes :
35+
36+
.. code:: cpp
37+
38+
make host ENABLE_ASYNC_FLOW=yes
39+
40+
Following is the real log reported while running the design
41+
on samsung platform for asynchronous flow:
42+
43+
::
44+
45+
./p2p_bandwidth -x copy_kernel.xclbin -i /dev/nvme0n1
46+
INFO: Successfully opened NVME SSD /dev/nvme0n1
47+
INFO: Preparing 1048576KB test data in 8 pipelines
48+
############################################################
49+
Synchronous P2P
50+
############################################################
51+
INFO: Kick off test
52+
HOST -> FPGA(host BO) -> FPGA(p2p BO) -> SSD
53+
overall 551211ms 100.00% 1862.23MB/s
54+
p2p 357223ms 58.58% 3200.30MB/s
55+
kernel 192578ms 211.88% 879.89MB/s
56+
XDMA 315203ms 99.31% 1867.35MB/s
57+
INFO: Evaluating test result
58+
INFO: Test passed
59+
############################################################
60+
Asynchronous P2P
61+
############################################################
62+
INFO: Kick off test
63+
HOST -> FPGA(host BO) -> FPGA(p2p BO) -> SSD
64+
overall 556211ms 100.00% 1548.03MB/s
65+
p2p 359223ms 61.58% 3371.60MB/s
66+
kernel 199578ms 198.88% 697.83MB/s
67+
XDMA 313203ms 98.23% 1806.45MB/s
68+
INFO: Evaluating test result
69+
INFO: Test passed
70+
71+

host/p2p_bandwidth/src/host.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,13 @@
3737
#include <fcntl.h>
3838
#include <iomanip>
3939
#include <iostream>
40-
#include <libaio.h>
4140
#include <string.h>
4241
#include <unistd.h>
4342

43+
#ifdef ASYNC_READ
44+
#include <libaio.h>
45+
#endif
46+
4447
#include "xcl2.hpp"
4548

4649
// Comment out below macro for testing P2P IO bandwidth b/w FPGA and SSD only.
@@ -270,6 +273,7 @@ void exec_write_test() {
270273
clWaitForEvents(1, &chunks[idx]->p2pEvt);
271274
}
272275

276+
#ifdef ASYNC_READ
273277
void exec_async_read_test(Chunk **chunks) {
274278
io_context_t myctx;
275279
memset(&myctx, 0, sizeof(myctx));
@@ -319,6 +323,7 @@ void exec_async_read_test(Chunk **chunks) {
319323
clWaitForEvents(1, &chunks[idx]->p2pEvt);
320324
#endif
321325
}
326+
#endif
322327

323328
void exec_read_test() {
324329
cout << "SSD -> FPGA(p2p BO) -> FPGA(host BO) -> HOST" << endl;
@@ -464,9 +469,12 @@ int main(int argc, char **argv) {
464469
cl_program program = clCreateProgramWithBinary(
465470
context, 1, &device, &binary_size, &binary_data, NULL, &err);
466471

472+
#ifdef ASYNC_READ
467473
io_context_t ctx;
468474
memset(&ctx, 0, sizeof(ctx));
469475
io_queue_init(128, &ctx);
476+
#endif
477+
470478
// Setting up OpenCL runtime environment.
471479
cl_kernel kernel = clCreateKernel(program, "copy", &err);
472480

@@ -537,6 +545,7 @@ int main(int argc, char **argv) {
537545
for (int idx = 0; idx < num_chunks; idx++)
538546
delete chunks[idx];
539547

548+
#ifdef ASYNC_READ
540549
if (!isWrite) {
541550
// Running Asynchronous P2P
542551
std::cout
@@ -590,6 +599,7 @@ int main(int argc, char **argv) {
590599
for (int idx = 0; idx < num_chunks; idx++)
591600
delete chunks[idx];
592601
}
602+
#endif
593603

594604
clReleaseKernel(kernel);
595605
clReleaseContext(context);

0 commit comments

Comments
 (0)