Skip to content

Commit 0b799b1

Browse files
authored
Wiredancer F2 Support (#5916)
* Add double-buffered terminal rendering to wd frank mon * Add double-buffered terminal rendering to fd frank mon * Make colorized output column-safe in monitor formatting functions * Add missing sha512_pre module from 902fb24 * Renaming top_f1 to top_wd * Add auto-generated aws-fpga hdk project files for cl_wiredancer Source: https://github.com/aws/aws-fpga Version: v2.1.1 License: Amazon Software License * Add wiredancer design for f2 platform * Update README with f2 instance support
1 parent ea30f21 commit 0b799b1

File tree

17 files changed

+1536
-126
lines changed

17 files changed

+1536
-126
lines changed

src/app/frank/fd_frank_mon.bin.c

Lines changed: 157 additions & 86 deletions
Large diffs are not rendered by default.

src/app/frank/wd_frank_f1_mon.c

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,9 @@ int pretty_num(char* st, uint64_t cnt, char* suffix)
330330
return sel;
331331
}
332332

333+
#define MAX_LINES 128
334+
#define MAX_LINE_WIDTH 512
335+
333336
void* mon_thread(void* arg)
334337
{
335338
uint64_t cnts[2][64];
@@ -338,6 +341,9 @@ void* mon_thread(void* arg)
338341

339342
memset(cnts, 0, sizeof(cnts));
340343

344+
char buffer_prev[MAX_LINES][MAX_LINE_WIDTH] = {0};
345+
char buffer_curr[MAX_LINES][MAX_LINE_WIDTH] = {0};
346+
341347
int first = 1;
342348
uint32_t from[2] = {0, 0};
343349
uint32_t to[2] = {0, 0};
@@ -356,6 +362,11 @@ void* mon_thread(void* arg)
356362

357363
if (!first)
358364
ascii_move_to(from, to);
365+
366+
/* switch to alternate buffer, clear, hide cursor */
367+
if (first) {
368+
printf("\033[?1049h\033[2J\033[?25l");
369+
}
359370

360371
from[0] = 0;
361372

@@ -491,11 +502,11 @@ void* mon_thread(void* arg)
491502
}
492503

493504
// draw ascii art
494-
for (uint32_t li = 0; li < 1024; li ++)
505+
for (uint32_t li = 0; li < MAX_LINES; li ++)
495506
{
496507
if (ascii_chart[li][0] == 0)
497508
break;
498-
char out_st[512*64];
509+
char* out_st = buffer_curr[li];
499510
int out_pos = 0;
500511
for (uint32_t ci = 0;; ci ++)
501512
{
@@ -529,9 +540,9 @@ void* mon_thread(void* arg)
529540
if (anm_data[anm_i][0] == 0)
530541
break;
531542
if (1
532-
& (anm_data[anm_i][0] + (anm_data[anm_i][2] * anm_data[anm_i][7]) == (int)li)
533-
& (anm_data[anm_i][1] + (anm_data[anm_i][3] * anm_data[anm_i][7]) == (int)ci)
534-
& (cnts[1][anm_data[anm_i][6]] != 0)
543+
&& (anm_data[anm_i][0] + (anm_data[anm_i][2] * anm_data[anm_i][7]) == (int)li)
544+
&& (anm_data[anm_i][1] + (anm_data[anm_i][3] * anm_data[anm_i][7]) == (int)ci)
545+
&& (cnts[1][anm_data[anm_i][6]] != 0)
535546
)
536547
{
537548
out_pos += ascii_color(out_st+out_pos, (uint32_t)anm_data[anm_i][5]);
@@ -547,8 +558,13 @@ void* mon_thread(void* arg)
547558
else
548559
out_pos += ascii_color(out_st+out_pos, 0);
549560
}
550-
printf ("%s\n", out_st);
551-
from[0] ++;
561+
out_st[out_pos] = '\0';
562+
/* only print changed lines – clear to EOL first */
563+
if (strcmp(buffer_prev[li], buffer_curr[li]) != 0)
564+
{
565+
printf("\033[%d;1H\033[K%s", li + 1, buffer_curr[li]);
566+
strcpy(buffer_prev[li], buffer_curr[li]);
567+
}
552568
}
553569

554570
// update animation states
@@ -574,5 +590,9 @@ void* mon_thread(void* arg)
574590
fflush(stdout);
575591
}
576592
state->stopped = 1;
593+
594+
/* show cursor, restore normal screen buffer */
595+
printf("\033[?25h\033[?1049l");
596+
577597
return 0;
578598
}

src/wiredancer/README.md

Lines changed: 28 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ This directory includes all resources to build and use Wiredancer functionalitie
33

44
## Supported Platforms ##
55
* AWS-F1 series
6+
* AWS-F2 series
67

78
## Available Functions ##
89
* SigVerify
@@ -18,51 +19,55 @@ WD adopts an asynchronous API. In fact WD uses the same Tango mcache mechanism
1819
- `wd_free_pci(wd_wksp_t*)`
1920
- Frees PCIe resources.
2021

22+
# Suggestion from our friends at ABK: Running WD Demo using Pulumi #
2123

24+
To quickly run the Wiredancer Demo without manually building and running everything from scratch, use ABK Lab's Pulumi project. It will provision an AWS F2 instance, install all required dependencies, and launch the demo application.
2225

26+
You can either provide your own AFGI ID or use the one included in the project.
2327

28+
[svmkit-examples-wiredancer](https://github.com/abklabs/svmkit-examples-wiredancer)
2429

25-
26-
27-
28-
29-
30-
31-
32-
33-
34-
# #
3530
# Building WD #
3631

37-
## AWS-F1 Series ##
38-
39-
+ To build for AWS-F1 series EC2 instances, you need an EC2 build machine. Detailes to provision such instance are provided in [AWS-FPGA github page](https://github.com/aws/aws-fpga).
32+
+ To build for AWS-F1 or AWS-F2 series EC2 instances, you need an EC2 build machine. Details to provision such instance are provided in [AWS-FPGA github page](https://github.com/aws/aws-fpga).
4033

4134
+ Inside the build machine, clone AWS-FPGA git repo
4235
- `git clone https://github.com/aws/aws-fpga`
36+
- for AWS-F2 series, you also need to checkout the `f2` branch
37+
38+
## AWS-F1 Series ##
4339

4440
- Follow the repo's instructions to build `<AWS-FPGA>/hdk/cl/examples/cl_dram_dma`
41+
+ Copy all files from `<FD>/src/wiredancer/platforms/f1/design` directory into `<AWS-FPGA>/hdk/cl/examples/cl_dram_dma/design` replacing existing files.
42+
+ Copy all files from `<FD>/src/wiredancer/rtl` directory into `<AWS-FPGA>/hdk/cl/examples/cl_dram_dma/design` replacing existing files.
43+
+ Rebuild `<AWS-FPGA>/hdk/cl/examples/cl_dram_dma` with the same instructions from AWS repo as before.
4544

46-
+ Copy all files from `<FD>/wiredancer/platforms/f1/build` directory into `<AWS-FPGA>/hdk/cl/examples/cl_dram_dma` replacing existing files.
45+
## AWS-F2 Series ##
4746

48-
+ Rebuild `<AWS-FPGA>/hdk/cl/examples/cl_dram_dma` with the same instructions from AWS repo as before.
47+
- Follow the repo's instructions to create a new cl example`<AWS-FPGA>/hdk/cl/examples/cl_wiredancer`
48+
+ Copy all files from `<FD>/src/wiredancer/platforms/f2/design` directory into `<AWS-FPGA>/hdk/cl/examples/cl_wiredancer/design` replacing existing files.
49+
+ Copy all files from `<FD>/src/wiredancer/rtl` directory into `<AWS-FPGA>/hdk/cl/examples/cl_wiredancer/design` replacing existing files.
50+
+ Rebuild `<AWS-FPGA>/hdk/cl/examples/cl_wiredancer` with the same instructions from AWS repo as before.
4951

52+
## Generate Bitstream ##
53+
54+
+ Follow the repo's instructions to generate the bitstream from the dcp file and get an AGFI for the cl example you just built.
5055

51-
# #
5256
# Running WD #
5357

54-
## AWS-F1 Series ##
58+
## AWS-F1 and AWS-F2 Series ##
5559

56-
+ To run FD with WD support, you need an EC2 F1 machine.
60+
+ To run FD with WD support, you need an EC2 F1 or F2 machine.
5761

5862
+ Inside the machine, clone AWS-FPGA git repo
5963
- `git clone https://github.com/aws/aws-fpga`
64+
- for AWS-F2 series, you also need to checkout the `f2` branch
6065

6166
+ Install the SDK inside the repo:
6267
- `source $AWS-FPGA/sdk_setup.sh`
6368

6469
+ Load WD image on FPGA slot-0
65-
- `sudo fpga-load-local-image -S 0 -I agfi-01051ff14d1bba4e0`
70+
- `sudo fpga-load-local-image -S 0 -I <AGFI-ID>`
6671

6772
+ Make FD with WD support
6873
- `./deps.sh`
@@ -77,8 +82,12 @@ WD adopts an asynchronous API. In fact WD uses the same Tango mcache mechanism
7782
- `sudo build/linux/gcc/x86_64/bin/fd_shmem_cfg query`
7883

7984
+ Configure app-frank
85+
- copy over the ./misc/solana_pcap file to the machine (e.g. `/tmp/solana.pcap`)
8086
- `sudo ./build/linux/gcc/x86_64/bin/fd_frank_init_demo frank 1-6 ./build/linux/gcc/x86_64 /tmp/solana.pcap 0 0 1 0`
8187

88+
+ Enable pcie bus mastering
89+
- `sudo setpci -s 34:00.0 command=06`
90+
8291
+ Run app-frank
8392
- `sudo ./build/linux/gcc/x86_64/bin/fd_frank_run frank "1-6"`
8493

@@ -88,12 +97,6 @@ WD adopts an asynchronous API. In fact WD uses the same Tango mcache mechanism
8897
+ Run fd-monitor
8998
- `sudo taskset -c 7 build/linux/gcc/x86_64/bin/fd_frank_mon frank --duration 10e12 --dt-min 1e7 --dt-max 1e7`
9099

91-
92-
93-
94-
95-
96-
# #
97100
# WD-SigVerify #
98101

99102
SigVerify is the verification process of [ED25519](https://en.wikipedia.org/wiki/EdDSA). This is a computationally intesive operation. In order to match SigVerify's throughput with the rest of the FD system, many high performance cores are required. However WD.SigVerify uses hardware acceleration to achieve 1Mps throughput using only one FPGA. Table below shows the throughput of a single core running FD.SigVerify on various architectures, and the number of cores required to reach a throughput of one million per second.

src/wiredancer/platform/f1/design/cl_dram_dma.sv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ showahead_fifo #(
491491
// AAAAAAA AAAAAAAPPPPPPPPPP PPPPPPPPPP
492492

493493
`ifndef TOP_NAME
494-
`define TOP_NAME top_f1
494+
`define TOP_NAME top_wd
495495
`endif
496496

497497
`TOP_NAME #(

src/wiredancer/platform/f1/scripts/synth_cl_dram_dma.tcl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ read_verilog -sv $CL_DIR/design/ed25519_sigverify_ecc.sv
103103
read_verilog -sv $CL_DIR/design/ed25519_sigverify_0.sv
104104
read_verilog -sv $CL_DIR/design/ed25519_sigverify_1.sv
105105
read_verilog -sv $CL_DIR/design/ed25519_sigverify_2.sv
106-
read_verilog -sv $CL_DIR/design/top_f1.sv
106+
read_verilog -sv $CL_DIR/design/top_wd.sv
107107

108108

109109
puts "AWS FPGA: Reading IP blocks";
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# =============================================================================
2+
# Amazon FPGA Hardware Development Kit
3+
#
4+
# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5+
#
6+
# Licensed under the Amazon Software License (the "License"). You may not use
7+
# this file except in compliance with the License. A copy of the License is
8+
# located at
9+
#
10+
# http://aws.amazon.com/asl/
11+
#
12+
# or in the "license" file accompanying this file. This file is distributed on
13+
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or
14+
# implied. See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
# =============================================================================
17+
18+
set_property MAX_FANOUT 50 [get_nets -of_objects [get_pins CL_PCIM_MSTR/CL_TST_PCI/sync_rst_n_reg/Q]]
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# =============================================================================
2+
# Amazon FPGA Hardware Development Kit
3+
#
4+
# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5+
#
6+
# Licensed under the Amazon Software License (the "License"). You may not use
7+
# this file except in compliance with the License. A copy of the License is
8+
# located at
9+
#
10+
# http://aws.amazon.com/asl/
11+
#
12+
# or in the "license" file accompanying this file. This file is distributed on
13+
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or
14+
# implied. See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
# =============================================================================
17+
18+
19+
#################################################################################
20+
### Generated Clocks
21+
#################################################################################
22+
# Alias of Shell interface clock
23+
set clk_main_a0 [get_clocks -of_objects [get_ports clk_main_a0]]
24+
25+
26+
#################################################################################
27+
### Clock Groups
28+
#################################################################################
29+
# false path inside sh_ddr
30+
set_false_path -from [get_pins -of_objects \
31+
[get_cells -hierarchical -filter { NAME =~ *ram_reg*}] -filter {REF_PIN_NAME == CLK}] \
32+
-to [get_cells -hierarchical -filter { NAME =~ *rd_do_reg[*]}]
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# =============================================================================
2+
# Amazon FPGA Hardware Development Kit
3+
#
4+
# Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5+
#
6+
# Licensed under the Amazon Software License (the "License"). You may not use
7+
# this file except in compliance with the License. A copy of the License is
8+
# located at
9+
#
10+
# http://aws.amazon.com/asl/
11+
#
12+
# or in the "license" file accompanying this file. This file is distributed on
13+
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or
14+
# implied. See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
# =============================================================================
17+
18+
# Level 1 Wiredancer floorplan for Small Shell
19+
20+
21+
###############################################################################
22+
# Child Pblock in SLR2
23+
###############################################################################
24+
########################################
25+
# Pblock
26+
########################################
27+
create_pblock pblock_CL_SLR2
28+
29+
# Complete CRs in SLR2
30+
# Match the parent pblock width to avoid DRC errors
31+
resize_pblock pblock_CL_SLR2 -add {CLOCKREGION_X0Y8:CLOCKREGION_X5Y11}
32+
33+
set_property parent pblock_CL [get_pblocks pblock_CL_SLR2]
34+
35+
###############################################################################
36+
# Child Pblock in SLR1
37+
###############################################################################
38+
########################################
39+
# Pblock
40+
########################################
41+
create_pblock pblock_CL_SLR1
42+
43+
# Complete CRs in SLR1
44+
resize_pblock pblock_CL_SLR1 -add {CLOCKREGION_X0Y4:CLOCKREGION_X3Y7}
45+
resize_pblock pblock_CL_SLR1 -add {CLOCKREGION_X5Y4:CLOCKREGION_X5Y7}
46+
resize_pblock pblock_CL_SLR1 -add {CLOCKREGION_X4Y6:CLOCKREGION_X4Y7}
47+
resize_pblock pblock_CL_SLR1 -add {CLOCKREGION_X4Y4:CLOCKREGION_X4Y4}
48+
49+
# Partial CRs
50+
resize_pblock pblock_CL_SLR1 -add {SLICE_X122Y300:SLICE_X145Y359 \
51+
DSP48E2_X16Y114:DSP48E2_X19Y137 \
52+
RAMB18_X9Y120:RAMB18_X9Y143 \
53+
RAMB36_X9Y60:RAMB36_X9Y71 \
54+
URAM288_X2Y80:URAM288_X2Y95}
55+
resize_pblock pblock_CL_SLR1 -add {RAMB18_X8Y120:RAMB18_X8Y143 \
56+
RAMB36_X8Y60:RAMB36_X8Y71}
57+
58+
set_property SNAPPING_MODE ON [get_pblocks pblock_CL_SLR1]
59+
60+
set_property parent pblock_CL [get_pblocks pblock_CL_SLR1]
61+
62+
###############################################################################
63+
# Child Pblock in SLR0
64+
###############################################################################
65+
########################################
66+
# Pblock
67+
########################################
68+
create_pblock pblock_CL_SLR0
69+
70+
# Complete CRs
71+
resize_pblock pblock_CL_SLR0 -add {CLOCKREGION_X0Y0:CLOCKREGION_X3Y3}
72+
resize_pblock pblock_CL_SLR0 -add {CLOCKREGION_X4Y0:CLOCKREGION_X7Y0}
73+
resize_pblock pblock_CL_SLR0 -add {CLOCKREGION_X4Y3:CLOCKREGION_X4Y3}
74+
resize_pblock pblock_CL_SLR0 -add {CLOCKREGION_X6Y1:CLOCKREGION_X6Y1}
75+
resize_pblock pblock_CL_SLR0 -add {CLOCKREGION_X5Y1:CLOCKREGION_X5Y3}
76+
77+
# Partial CRs
78+
resize_pblock pblock_CL_SLR0 -add {SLICE_X120Y120:SLICE_X145Y179 \
79+
DSP48E2_X16Y42:DSP48E2_X19Y65 \
80+
RAMB18_X8Y48:RAMB18_X9Y71 \
81+
RAMB36_X8Y24:RAMB36_X9Y35 \
82+
URAM288_X2Y32:URAM288_X2Y47}
83+
84+
resize_pblock pblock_CL_SLR0 -add {SLICE_X118Y60:SLICE_X145Y119 \
85+
DSP48E2_X16Y18:DSP48E2_X19Y41 \
86+
RAMB18_X8Y24:RAMB18_X9Y47 \
87+
RAMB36_X8Y12:RAMB36_X9Y23 \
88+
URAM288_X2Y16:URAM288_X2Y31}
89+
90+
resize_pblock pblock_CL_SLR0 -add {SLICE_X206Y60:SLICE_X219Y119 \
91+
DSP48E2_X30Y18:DSP48E2_X30Y41 \
92+
RAMB18_X12Y24:RAMB18_X12Y47 \
93+
RAMB36_X12Y12:RAMB36_X12Y23}
94+
95+
resize_pblock pblock_CL_SLR0 -add {SLICE_X221Y60:SLICE_X232Y119 \
96+
DSP48E2_X31Y18:DSP48E2_X31Y41 \
97+
RAMB18_X13Y24:RAMB18_X13Y47 \
98+
RAMB36_X13Y12:RAMB36_X13Y23 \
99+
BUFG_GT_X1Y24:BUFG_GT_X1Y47 \
100+
BUFG_GT_SYNC_X1Y15:BUFG_GT_SYNC_X1Y29 \
101+
GTYE4_COMMON_X1Y1:GTYE4_COMMON_X1Y1 \
102+
GTYE4_CHANNEL_X1Y4:GTYE4_CHANNEL_X1Y7}
103+
104+
105+
set_property parent pblock_CL [get_pblocks pblock_CL_SLR0]
106+
107+
108+
########################################
109+
# Wiredancer
110+
########################################
111+
112+
# PCIS logic: appears in DMA/PCIM bridging in cl_wiredancer
113+
add_cells_to_pblock [get_pblocks pblock_CL_SLR2] [get_cells -hierarchical -filter {NAME =~ WRAPPER/CL/cl_wiredancer/st_in_*}]
114+
add_cells_to_pblock [get_pblocks pblock_CL_SLR2] [get_cells -hierarchical -filter {NAME =~ WRAPPER/CL/cl_wiredancer/dma_*}]
115+
add_cells_to_pblock [get_pblocks pblock_CL_SLR2] [get_cells -hierarchical -filter {NAME =~ WRAPPER/CL/cl_wiredancer/pcim_*}]
116+
117+
# PCIM logic: PCIM bridge master interface
118+
add_cells_to_pblock [get_pblocks pblock_CL_SLR1] [get_cells -hierarchical -filter {NAME =~ WRAPPER/CL/cl_wiredancer/cl_sh_pcim*}]
119+
120+
# OCL AXI-lite logic: control path state machine
121+
add_cells_to_pblock [get_pblocks pblock_CL_SLR1] [get_cells -hierarchical -filter {NAME =~ WRAPPER/CL/cl_wiredancer/st_ocl*}]
122+
add_cells_to_pblock [get_pblocks pblock_CL_SLR1] [get_cells -hierarchical -filter {NAME =~ WRAPPER/CL/cl_wiredancer/avmm_fh_*}]
123+
124+
125+
# Monitoring/status (vdip, vled, bresp_status)
126+
add_cells_to_pblock [get_pblocks pblock_CL_SLR0] [get_cells -hierarchical -filter {NAME =~ WRAPPER/CL/cl_wiredancer/vdip_*}]
127+
add_cells_to_pblock [get_pblocks pblock_CL_SLR0] [get_cells -hierarchical -filter {NAME =~ WRAPPER/CL/cl_wiredancer/bresp_status}]
128+
129+
# Previously all `top_inst` modules were forced into SLR1. This can lead to
130+
# placement failures due to resource pressure. Allow Vivado to distribute the
131+
# logic across SLRs by not constraining these modules to SLR1.
132+
#add_cells_to_pblock [get_pblocks pblock_CL_SLR1] [get_cells -hierarchical -filter {NAME =~ WRAPPER/CL/top_inst*}]

0 commit comments

Comments
 (0)