diff --git a/book/api/metrics-generated.md b/book/api/metrics-generated.md
index ddbcdffdc9..01177aec61 100644
--- a/book/api/metrics-generated.md
+++ b/book/api/metrics-generated.md
@@ -1217,3 +1217,16 @@
| ibeth_tx_bytes_total | counter | Total number of bytes transmitted (including Ethernet header). |
+
+## Dpdk Tile
+
+
+
+| Metric | Type | Description |
+|--------|------|-------------|
+| dpdk_rx_pkt_cnt | counter | Packet receive count. |
+| dpdk_rx_bytes_total | counter | Total number of bytes received (including Ethernet header). |
+| dpdk_tx_pkt_cnt | counter | Number of packet transmit jobs marked as completed by the kernel. |
+| dpdk_tx_bytes_total | counter | Total number of bytes transmitted (including Ethernet header). |
+
+
diff --git a/src/app/firedancer/config/default.toml b/src/app/firedancer/config/default.toml
index 80bdbe5051..7000beacab 100644
--- a/src/app/firedancer/config/default.toml
+++ b/src/app/firedancer/config/default.toml
@@ -1025,6 +1025,7 @@ user = ""
# just fine.
native_bond = false
+ # This section is only active if [net.provider] is set to "socket".
[net.socket]
# Sets the socket receive buffer size via SO_RCVBUF.
# Raises net.core.rmem_max accordingly
@@ -1034,6 +1035,16 @@ user = ""
# Raises net.core.wmem_max accordingly
send_buffer_size = 134217728
+ # Configure embedded DPDK fast network stack (experimental).
+ # This section is only active if [net.provider] is set to "dpdk".
+ [net.dpdk]
+ # Which interface to take over with DPDK. The PCIe device
+ # backing this interface is taken over using vfio-pci and thus
+ # removed from Linux networking. Typically, this is a virtual
+ # function (VF) of the main NIC at [net.interface].
+ interface = ""
+"""
+
# Tiles are described in detail in the layout section above. While the
# layout configuration determines how many of each tile to place on
# which CPU core to create a functioning system, below is the individual
diff --git a/src/disco/metrics/generate/types.py b/src/disco/metrics/generate/types.py
index 20bc666cb6..68c2bc1279 100644
--- a/src/disco/metrics/generate/types.py
+++ b/src/disco/metrics/generate/types.py
@@ -44,6 +44,7 @@ class Tile(Enum):
SNAPLS = 38
TOWER = 39
IBETH = 40
+ DPDK = 41
class MetricType(Enum):
COUNTER = 0
diff --git a/src/disco/metrics/generated/fd_metrics_all.c b/src/disco/metrics/generated/fd_metrics_all.c
index 57f08bfc34..50b18d15f7 100644
--- a/src/disco/metrics/generated/fd_metrics_all.c
+++ b/src/disco/metrics/generated/fd_metrics_all.c
@@ -74,6 +74,7 @@ const char * FD_METRICS_TILE_KIND_NAMES[FD_METRICS_TILE_KIND_CNT] = {
"snapls",
"tower",
"ibeth",
+ "dpdk",
};
const ulong FD_METRICS_TILE_KIND_SIZES[FD_METRICS_TILE_KIND_CNT] = {
@@ -115,6 +116,7 @@ const ulong FD_METRICS_TILE_KIND_SIZES[FD_METRICS_TILE_KIND_CNT] = {
FD_METRICS_SNAPLS_TOTAL,
FD_METRICS_TOWER_TOTAL,
FD_METRICS_IBETH_TOTAL,
+ FD_METRICS_DPDK_TOTAL,
};
const fd_metrics_meta_t * FD_METRICS_TILE_KIND_METRICS[FD_METRICS_TILE_KIND_CNT] = {
FD_METRICS_NET,
@@ -155,4 +157,5 @@ const fd_metrics_meta_t * FD_METRICS_TILE_KIND_METRICS[FD_METRICS_TILE_KIND_CNT]
FD_METRICS_SNAPLS,
FD_METRICS_TOWER,
FD_METRICS_IBETH,
+ FD_METRICS_DPDK,
};
diff --git a/src/disco/metrics/generated/fd_metrics_all.h b/src/disco/metrics/generated/fd_metrics_all.h
index 2b48465437..5e18cf23f0 100644
--- a/src/disco/metrics/generated/fd_metrics_all.h
+++ b/src/disco/metrics/generated/fd_metrics_all.h
@@ -43,6 +43,7 @@
#include "fd_metrics_tower.h"
#include "fd_metrics_gui.h"
#include "fd_metrics_ibeth.h"
+#include "fd_metrics_dpdk.h"
/* Start of LINK OUT metrics */
#define FD_METRICS_COUNTER_LINK_SLOW_COUNT_OFF (0UL)
@@ -179,7 +180,7 @@ extern const fd_metrics_meta_t FD_METRICS_ALL_LINK_OUT[FD_METRICS_ALL_LINK_OUT_T
#define FD_METRICS_TOTAL_SZ (8UL*254UL)
-#define FD_METRICS_TILE_KIND_CNT 38
+#define FD_METRICS_TILE_KIND_CNT 39
extern const char * FD_METRICS_TILE_KIND_NAMES[FD_METRICS_TILE_KIND_CNT];
extern const ulong FD_METRICS_TILE_KIND_SIZES[FD_METRICS_TILE_KIND_CNT];
extern const fd_metrics_meta_t * FD_METRICS_TILE_KIND_METRICS[FD_METRICS_TILE_KIND_CNT];
diff --git a/src/disco/metrics/generated/fd_metrics_dpdk.c b/src/disco/metrics/generated/fd_metrics_dpdk.c
new file mode 100644
index 0000000000..bb23076bf2
--- /dev/null
+++ b/src/disco/metrics/generated/fd_metrics_dpdk.c
@@ -0,0 +1,9 @@
+/* THIS FILE IS GENERATED BY gen_metrics.py. DO NOT HAND EDIT. */
+#include "fd_metrics_dpdk.h"
+
+const fd_metrics_meta_t FD_METRICS_DPDK[FD_METRICS_DPDK_TOTAL] = {
+ DECLARE_METRIC( DPDK_RX_PKT_CNT, COUNTER ),
+ DECLARE_METRIC( DPDK_RX_BYTES_TOTAL, COUNTER ),
+ DECLARE_METRIC( DPDK_TX_PKT_CNT, COUNTER ),
+ DECLARE_METRIC( DPDK_TX_BYTES_TOTAL, COUNTER ),
+};
diff --git a/src/disco/metrics/generated/fd_metrics_dpdk.h b/src/disco/metrics/generated/fd_metrics_dpdk.h
new file mode 100644
index 0000000000..52cdf63306
--- /dev/null
+++ b/src/disco/metrics/generated/fd_metrics_dpdk.h
@@ -0,0 +1,36 @@
+#ifndef HEADER_fd_src_disco_metrics_generated_fd_metrics_dpdk_h
+#define HEADER_fd_src_disco_metrics_generated_fd_metrics_dpdk_h
+
+/* THIS FILE IS GENERATED BY gen_metrics.py. DO NOT HAND EDIT. */
+
+#include "../fd_metrics_base.h"
+#include "fd_metrics_enums.h"
+
+#define FD_METRICS_COUNTER_DPDK_RX_PKT_CNT_OFF (16UL)
+#define FD_METRICS_COUNTER_DPDK_RX_PKT_CNT_NAME "dpdk_rx_pkt_cnt"
+#define FD_METRICS_COUNTER_DPDK_RX_PKT_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
+#define FD_METRICS_COUNTER_DPDK_RX_PKT_CNT_DESC "Packet receive count."
+#define FD_METRICS_COUNTER_DPDK_RX_PKT_CNT_CVT (FD_METRICS_CONVERTER_NONE)
+
+#define FD_METRICS_COUNTER_DPDK_RX_BYTES_TOTAL_OFF (17UL)
+#define FD_METRICS_COUNTER_DPDK_RX_BYTES_TOTAL_NAME "dpdk_rx_bytes_total"
+#define FD_METRICS_COUNTER_DPDK_RX_BYTES_TOTAL_TYPE (FD_METRICS_TYPE_COUNTER)
+#define FD_METRICS_COUNTER_DPDK_RX_BYTES_TOTAL_DESC "Total number of bytes received (including Ethernet header)."
+#define FD_METRICS_COUNTER_DPDK_RX_BYTES_TOTAL_CVT (FD_METRICS_CONVERTER_NONE)
+
+#define FD_METRICS_COUNTER_DPDK_TX_PKT_CNT_OFF (18UL)
+#define FD_METRICS_COUNTER_DPDK_TX_PKT_CNT_NAME "dpdk_tx_pkt_cnt"
+#define FD_METRICS_COUNTER_DPDK_TX_PKT_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
+#define FD_METRICS_COUNTER_DPDK_TX_PKT_CNT_DESC "Number of packet transmit jobs marked as completed by the kernel."
+#define FD_METRICS_COUNTER_DPDK_TX_PKT_CNT_CVT (FD_METRICS_CONVERTER_NONE)
+
+#define FD_METRICS_COUNTER_DPDK_TX_BYTES_TOTAL_OFF (19UL)
+#define FD_METRICS_COUNTER_DPDK_TX_BYTES_TOTAL_NAME "dpdk_tx_bytes_total"
+#define FD_METRICS_COUNTER_DPDK_TX_BYTES_TOTAL_TYPE (FD_METRICS_TYPE_COUNTER)
+#define FD_METRICS_COUNTER_DPDK_TX_BYTES_TOTAL_DESC "Total number of bytes transmitted (including Ethernet header)."
+#define FD_METRICS_COUNTER_DPDK_TX_BYTES_TOTAL_CVT (FD_METRICS_CONVERTER_NONE)
+
+#define FD_METRICS_DPDK_TOTAL (4UL)
+extern const fd_metrics_meta_t FD_METRICS_DPDK[FD_METRICS_DPDK_TOTAL];
+
+#endif /* HEADER_fd_src_disco_metrics_generated_fd_metrics_dpdk_h */
diff --git a/src/disco/metrics/metrics.xml b/src/disco/metrics/metrics.xml
index d2027ecd70..45888b2319 100644
--- a/src/disco/metrics/metrics.xml
+++ b/src/disco/metrics/metrics.xml
@@ -1163,4 +1163,11 @@ metric introduced.
+
+
+
+
+
+
+
diff --git a/src/disco/net/dpdk/fd_dpdk_tile.c b/src/disco/net/dpdk/fd_dpdk_tile.c
new file mode 100644
index 0000000000..dbc1bb16a7
--- /dev/null
+++ b/src/disco/net/dpdk/fd_dpdk_tile.c
@@ -0,0 +1,167 @@
+/* The dpdk tile translates Ethernet frames between DPDK PMDs and
+ fd_tango. */
+
+#include "../../metrics/fd_metrics.h"
+#include "../../topo/fd_topo.h"
+
+#include
+
+#define PKT_BURST_MAX (32UL)
+
+#define MEMPOOL_CACHE_SIZE 256
+
+/* fd_dpdk_tile_t is private tile state */
+
+struct fd_dpdk_tile {
+ ushort port_id;
+ ushort queue_id;
+
+ struct {
+ ulong rx_pkt_cnt;
+ ulong rx_bytes_total;
+ ulong tx_pkt_cnt;
+ ulong tx_bytes_total;
+ } metrics;
+};
+
+typedef struct fd_dpdk_tile fd_dpdk_tile_t;
+
+static ulong
+scratch_align( void ) {
+ return alignof(fd_dpdk_tile_t);
+}
+
+static ulong
+scratch_footprint( fd_topo_tile_t const * tile ) {
+ (void)tile;
+ ulong l = FD_LAYOUT_INIT;
+ l = FD_LAYOUT_APPEND( l, alignof(fd_dpdk_tile_t), sizeof(fd_dpdk_tile_t) );
+ return FD_LAYOUT_FINI( l, scratch_align() );
+}
+
+FD_FN_UNUSED static void
+privileged_init( fd_topo_t * topo,
+ fd_topo_tile_t * tile ) {
+ uint pool_depth = 4096UL;
+
+ static struct rte_pktmbuf_extmem const ext_mem[1] = {{
+ .buf_ptr = umem,
+ .buf_iova = RTE_BAD_IOVA, /* unused */
+ .buf_len = umem_sz,
+ .elt_size = 2048UL
+ }};
+
+ struct rte_mempool * pool = rte_pktmbuf_pool_create_extbuf(
+ /* name */ "pkts",
+ /* n */ pool_depth,
+ /* cache_size */ MEMPOOL_CACHE_SIZE,
+ /* priv_size */ 0,
+ /* data_room_size */ 2048UL,
+ /* socket_id */ (int)rte_socket_id(),
+ /* ext_mem */ ext_mem,
+ /* ext_num */ 1UL
+ );
+ if( FD_UNLIKELY( !pool ) ) FD_LOG_ERR(( "rte_pktmbuf_pool_create_extbuf failed" ));
+
+ ushort port_id = 0;
+
+ struct rte_eth_dev_info dev_info;
+ int info_ret = rte_eth_dev_info_get( port_id, &dev_info );
+ if( info_ret<0 ) FD_LOG_ERR(( "rte_eth_dev_info_get(port_id=%u) failed (%d)", port_id, info_ret ));
+
+ struct rte_eth_conf eth_conf = {
+ .txmode = {
+ .mq_mode = RTE_ETH_MQ_TX_NONE
+ }
+ };
+ int conf_ret = rte_eth_dev_configure( port_id, 1, 1, ð_conf );
+ if( conf_ret<0 ) FD_LOG_ERR(( "rte_eth_dev_configure failed (%d)", conf_ret ));
+
+ int numa_id = rte_eth_dev_socket_id( port_id );
+
+ ushort rx_desc_max = 2048;
+ struct rte_eth_rxconf rx_conf = dev_info.default_rxconf;
+ int rxq_setup_ret = rte_eth_rx_queue_setup( port_id, 0, rx_desc_max, (uint)numa_id, &rx_conf, pool );
+ if( FD_UNLIKELY( rxq_setup_ret<0 ) ) FD_LOG_ERR(( "rte_eth_rx_queue_setup failed (%d)", rxq_setup_ret ));
+
+ ushort tx_desc_max = 2048;
+ struct rte_eth_txconf tx_conf = dev_info.default_txconf;
+ int txq_setup_ret = rte_eth_tx_queue_setup( port_id, 0, tx_desc_max, (uint)numa_id, &tx_conf );
+ if( FD_UNLIKELY( txq_setup_ret<0 ) ) FD_LOG_ERR(( "rte_eth_tx_queue_setup failed (%d)", txq_setup_ret ));
+
+ int start_ret = rte_eth_dev_start( port_id );
+ if( FD_UNLIKELY( start_ret<0 ) ) FD_LOG_ERR(( "rte_eth_dev_start failed (%d)", start_ret ));
+}
+
+FD_FN_UNUSED static void
+unprivileged_init( fd_topo_t * topo,
+ fd_topo_tile_t * tile ) {
+ (void)topo; (void)tile;
+}
+
+static void
+during_housekeeping( fd_dpdk_tile_t * ctx ) {
+ (void)ctx;
+}
+
+static void
+metrics_write( fd_dpdk_tile_t * ctx ) {
+ FD_MCNT_SET( DPDK, RX_PKT_CNT, ctx->metrics.rx_pkt_cnt );
+ FD_MCNT_SET( DPDK, RX_BYTES_TOTAL, ctx->metrics.rx_bytes_total );
+ FD_MCNT_SET( DPDK, TX_PKT_CNT, ctx->metrics.tx_pkt_cnt );
+ FD_MCNT_SET( DPDK, TX_BYTES_TOTAL, ctx->metrics.tx_bytes_total );
+}
+
+/* rx_burst_fwd forwards a batch of newly received packets to downstream
+ tiles. Assumes that packet frames are available in shm and exposed
+ to downstream tiles already. Publishes fragment metadatas to
+ descriptor rings (if possible), or returns frames back to
+ rte_mempool. */
+
+static void
+rx_burst_fwd( fd_dpdk_tile_t * ctx,
+ struct rte_mbuf ** pkt,
+ ulong pkt_cnt ) {
+ /* FIXME actually handle packets */
+ ctx->metrics.rx_pkt_cnt += pkt_cnt;
+ for( ulong i=0U; imetrics.rx_bytes_total += pkt[ i ]->data_len;
+ rte_pktmbuf_free( pkt[ i ] );
+ }
+}
+
+/* after_credit is executed every run loop iteration.
+ Checks for new RX packets and TX completions. */
+
+static void
+after_credit( fd_dpdk_tile_t * ctx,
+ fd_stem_context_t * stem,
+ int * poll_in,
+ int * charge_busy ) {
+ (void)stem; (void)poll_in;
+
+ struct rte_mbuf * rx_pkts[ PKT_BURST_MAX ];
+ ulong rx_cnt = rte_eth_rx_burst( ctx->port_id, ctx->queue_id, rx_pkts, PKT_BURST_MAX );
+ if( FD_LIKELY( rx_cnt ) ) {
+ rx_burst_fwd( ctx, rx_pkts, rx_cnt );
+ *charge_busy = 1;
+ }
+}
+
+#define STEM_CALLBACK_CONTEXT_TYPE fd_dpdk_tile_t
+#define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_dpdk_tile_t)
+#define STEM_CALLBACK_AFTER_CREDIT after_credit
+#define STEM_CALLBACK_METRICS_WRITE metrics_write
+#define STEM_CALLBACK_DURING_HOUSEKEEPING during_housekeeping
+#define STEM_BURST 1UL /* ignored */
+#define STEM_LAZY 130000UL /* 130us */
+#include "../../stem/fd_stem.c"
+
+fd_topo_run_tile_t fd_tile_dpdk = {
+ .name = "dpdk",
+ .scratch_align = scratch_align,
+ .scratch_footprint = scratch_footprint,
+ .privileged_init = privileged_init,
+ .unprivileged_init = unprivileged_init,
+ .run = stem_run
+};