diff --git a/book/api/cli.md b/book/api/cli.md
index 24c7d9cdfd..043aa33afd 100644
--- a/book/api/cli.md
+++ b/book/api/cli.md
@@ -70,7 +70,6 @@ following stages to each configure command:
device.
- `ethtool-gro` Disables generic receive offload (GRO) on the network
device.
- - `ethtool-loopback` Disables UDP segmentation on the loopback device.
| Arguments | Description |
|-------------------|-------------|
@@ -102,7 +101,6 @@ and configure the number of combined channels on the network device.
| `root` | increase `/proc/sys/vm/nr_hugepages` and mount hugetlbfs filesystems. Only applies for the `hugetlbfs` stage |
| `root` | increase network device channels with `ethtool --set-channels`. Only applies for the `ethtool-channels` stage |
| `root` | disable network device generic-receive-offload (gro) with `ethtool --offload IFACE generic-receive-offload off`. Only applies for the `ethtool-gro` stage |
-| `root` | disable network device tx-udp-segmentation with `ethtool --offload lo tx-udp-segmentation off`. Only applies for the `ethtool-loopback` stage |
| `CAP_SYS_ADMIN` | set kernel parameters in `/proc/sys`. Only applies for the `sysctl` stage |
:::
diff --git a/book/api/metrics-generated.md b/book/api/metrics-generated.md
index 451830dd95..3d6e96e4ac 100644
--- a/book/api/metrics-generated.md
+++ b/book/api/metrics-generated.md
@@ -49,7 +49,8 @@
| Metric | Type | Description |
|--------|------|-------------|
-| net_rx_pkt_cnt | counter | Packet receive count. |
+| net_rx_pkt_cnt
{pkt_kind="ip4_udp"} | counter | Packet receive count (ignoring tunnels) (IPv4 UDP packet (no options)) |
+| net_rx_pkt_cnt
{pkt_kind="ip4_opt_udp"} | counter | Packet receive count (ignoring tunnels) (IPv4 UDP packet (with options)) |
| net_rx_bytes_total | counter | Total number of bytes received (including Ethernet header). |
| net_rx_undersz_cnt | counter | Number of incoming packets dropped due to being too small. |
| net_rx_fill_blocked_cnt | counter | Number of incoming packets dropped due to fill ring being full. |
@@ -59,8 +60,8 @@
| net_tx_submit_cnt | counter | Number of packet transmit jobs submitted. |
| net_tx_complete_cnt | counter | Number of packet transmit jobs marked as completed by the kernel. |
| net_tx_bytes_total | counter | Total number of bytes transmitted (including Ethernet header). |
-| net_tx_route_fail_cnt | counter | Number of packet transmit jobs dropped due to route failure. |
-| net_tx_neighbor_fail_cnt | counter | Number of packet transmit jobs dropped due to unresolved neighbor. |
+| net_tx_corrupt_cnt | counter | Number of packet transmit jobs dropped due to malformed content. |
+| net_tx_fallback_cnt | counter | Number of packet transmit jobs handled via sockets fallback instead of XDP. |
| net_tx_full_fail_cnt | counter | Number of packet transmit jobs dropped due to XDP TX ring full or missing completions. |
| net_tx_busy_cnt | gauge | Number of transmit buffers currently busy. |
| net_tx_idle_cnt | gauge | Number of transmit buffers currently idle. |
@@ -76,7 +77,6 @@
| net_rx_gre_invalid_cnt | counter | Number of invalid GRE packets received |
| net_rx_gre_ignored_cnt | counter | Number of received but ignored GRE packets |
| net_tx_gre_cnt | counter | Number of GRE packet transmit jobs submitted |
-| net_tx_gre_route_fail_cnt | counter | Number of GRE packets transmit jobs dropped due to route failure |
@@ -699,10 +699,6 @@
| netlnk_interface_count | gauge | Number of network interfaces |
| netlnk_route_count
{route_table="local"} | gauge | Number of IPv4 routes (Local) |
| netlnk_route_count
{route_table="main"} | gauge | Number of IPv4 routes (Main) |
-| netlnk_neigh_probe_sent | counter | Number of neighbor solicit requests sent to kernel |
-| netlnk_neigh_probe_fails | counter | Number of neighbor solicit requests that failed to send (kernel too slow) |
-| netlnk_neigh_probe_rate_limit_host | counter | Number of neighbor solicit that exceeded the per-host rate limit |
-| netlnk_neigh_probe_rate_limit_global | counter | Number of neighbor solicit that exceeded the global rate limit |
diff --git a/book/guide/initializing.md b/book/guide/initializing.md
index 7453ff3ffe..b55060a8d3 100644
--- a/book/guide/initializing.md
+++ b/book/guide/initializing.md
@@ -11,8 +11,6 @@ so Firedancer can run correctly. It does the following:
device.
* **ethtool-gro** Disable generic-receive-offload (GRO) on the network
device.
-* **ethtool-loopback** Disable tx-udp-segmentation on the loopback
-device.
The `hugetlbfs` configuration must be performed every time the system
is rebooted, to remount the `hugetlbfs` filesystems, as do `sysctl`,
@@ -30,7 +28,7 @@ where `mode` is one of:
- `fini` Unconfigure (reverse) the stage if it is reversible.
`stage` can be one or more of `hugetlbfs`, `sysctl`, `hyperthreads`,
-`ethtool-channels`, `ethtool-gro`, `ethtool-loopback`, and `snapshots`
+`ethtool-channels`, `ethtool-gro`, and `snapshots`
and these stages are described below. You can also use the stage `all`
which will configure everything.
@@ -193,22 +191,6 @@ Firedancer. It has no dependencies on any other stage.
Changing device settings with `ethtool-gro` requires root privileges, and
cannot be performed with capabilities.
-## ethtool-loopback
-XDP is incompatible with localhost UDP traffic using a feature called
-`tx-udp-segmentation`. This feature must be disabled when connecting Agave
-clients to Firedancer over loopback, or when using Frankendancer.
-
-The command run by the stage is `ethtool --offload lo tx-udp-segmentation
-off`. We can check that it worked:
-
-<<< @/snippets/ethtool-loopback.ansi
-
-The stage only needs to be run once after boot but before running
-Firedancer. It has no dependencies on any other stage.
-
-Changing device settings with `ethtool-loopback` requires root privileges,
-and cannot be performed with capabilities.
-
## snapshots
When starting up, validators must load a snapshot to catch up to the
current state of the blockchain. Snapshots are downloaded from other
diff --git a/book/guide/internals/net_tile.md b/book/guide/internals/net_tile.md
index cf6cec0a1e..6b4265b3fa 100644
--- a/book/guide/internals/net_tile.md
+++ b/book/guide/internals/net_tile.md
@@ -426,28 +426,6 @@ completion ring.
The net tile moves completed frames back to the free ring.
-## Loopback
-
-The first net tile (`net:0`) sets up XDP on the loopback device, for
-two main reasons:
-
-* For testing and development.
-* The Agave code sends local traffic to itself to as part of routine
- operation (e.g., when it's the leader it sends votes to its own TPU
- socket).
-
-The Linux kernel routes outgoing packets addressed to IP addresses
-owned by the system via loopback. (See `ip route show table local`)
-The net tile partially matches this behavior. For better performance
-and simplicity, a second XDP socket is used.
-
-Alternatively, the net tile could have sent such traffic out to the
-public gateway, in hopes that the traffic gets mirrored back.
-
-But for now, Firedancer also binds XDP to loopback. This is a small performance hit for other traffic, but otherwise won't interfere.
-
-The loopback device only supports XDP in SKB mode.
-
## Development
### Network Namespace
diff --git a/book/guide/internals/netlink.md b/book/guide/internals/netlink.md
index 081a2f5485..999edb3e03 100644
--- a/book/guide/internals/netlink.md
+++ b/book/guide/internals/netlink.md
@@ -71,33 +71,12 @@ inputs.
Neighbor table updates are forwarded ot the netlink tile. This path
has limited throughput (few ~100K updates per second).
-- `[untrusted traffic] --> [net tile] --> [app tile]`
- `--> [net tile] --> [netlink tile] --> [neighbor discovery]`
- App tiles will blindly respond to the source IP found in untrusted
- packets. This source IP can be spoofed. Neighbor solicitation might
- be required in order to find out the MAC address of that IP. On IPv4,
- these are ARP requests broadcasted to the local network.
-
- Net tiles cannot solicit neighbors directly, so they notify the
- netlink tile that neighbor solicitation is needed. (Potentially at
- line rate if network configuration is part of a huge subnet)
-
- The netlink tile will deduplicate these requests and forward them to
- the kernel.
-
- This path is the only direct 'untrusted traffic' -> 'netlink tile'
- data flow, so the internal neighbor solicit message format is kept
- as simple as possible for security.
-
### Neighbor discovery (ARP)
A concurrent open addressed hash table is used to store ARP entries
(henceforth called "neighbor table"). This table attempts to
continuously stay in sync with the kernel.
-The netlink tile requests neighbor solicitations via the netlink
-equivalent of `ip neigh add dev DEVICE IP use`.
-
### Routing
The Firedancer network stack supports very simple routing tables as
diff --git a/book/snippets/commands/configure-check.ansi b/book/snippets/commands/configure-check.ansi
index 7178a77aa0..ecde9b432b 100644
--- a/book/snippets/commands/configure-check.ansi
+++ b/book/snippets/commands/configure-check.ansi
@@ -3,5 +3,4 @@ $ fdctl configure check all
[93mWARNING[0m sysctl ... not configured ... kernel parameter `/proc/sys/vm/max_map_count` is too low (got 65536 but expected at least 1000000)
[93mWARNING[0m ethtool-channels ... not configured ... device `ens3f0` does not have right number of channels (got 1 but expected 2)
[93mWARNING[0m ethtool-gro ... not configured ... device `ens3f0` has generic-receive-offload enabled. Should be disabled
-[93mWARNING[0m ethtool-loopback ... not configured ... device `lo` has tx-udp-segmentation enabled. Should be disabled
[31mERR [0m failed to configure some stages
diff --git a/book/snippets/commands/configure-init.ansi b/book/snippets/commands/configure-init.ansi
index e9e2757bc9..02dd29056c 100644
--- a/book/snippets/commands/configure-init.ansi
+++ b/book/snippets/commands/configure-init.ansi
@@ -23,6 +23,3 @@ $ fdctl configure init all
[32mNOTICE [0m ethtool-gro ... unconfigured ... device `ens3f0` has generic-receive-offload enabled. Should be disabled
[32mNOTICE [0m ethtool-gro ... configuring
[32mNOTICE [0m ethtool-gro ... RUN: `ethtool --offload ens3f0 generic-receive-offload off`
-[32mNOTICE [0m ethtool-loopback ... unconfigured ... device `lo` has tx-udp-segmentation enabled. Should be disabled
-[32mNOTICE [0m ethtool-loopback ... configuring
-[32mNOTICE [0m ethtool-loopback ... RUN: `ethtool --offload lo tx-udp-segmentation off`
diff --git a/book/snippets/configure.ansi b/book/snippets/configure.ansi
index f51b79f2ff..8f75488b2a 100644
--- a/book/snippets/configure.ansi
+++ b/book/snippets/configure.ansi
@@ -7,4 +7,3 @@
[32mNOTICE [0m sysctl ... already valid
[32mNOTICE [0m ethtool-channels ... already valid
[32mNOTICE [0m ethtool-gro ... already valid
-[32mNOTICE [0m ethtool-loopback ... already valid
diff --git a/book/snippets/ethtool-loopback.ansi b/book/snippets/ethtool-loopback.ansi
deleted file mode 100644
index 60be67ddf3..0000000000
--- a/book/snippets/ethtool-loopback.ansi
+++ /dev/null
@@ -1,7 +0,0 @@
-$ sudo fdctl configure init ethtool-loopback
-[32mNOTICE [0m ethtool-loopback ... unconfigured ... device `lo` has tx-udp-segmentation enabled. Should be disabled
-[32mNOTICE [0m ethtool-loopback ... configuring
-[32mNOTICE [0m ethtool-loopback ... RUN: `ethtool --offload lo tx-udp-segmentation off`
-
-$ ethtool --show-offload lo | grep tx-udp-segmentation
-tx-udp-segmentation: off
diff --git a/contrib/test/test_firedancer_leader.sh b/contrib/test/test_firedancer_leader.sh
index 3e9771ac58..18fe43bbbe 100755
--- a/contrib/test/test_firedancer_leader.sh
+++ b/contrib/test/test_firedancer_leader.sh
@@ -85,7 +85,6 @@ echo "
sudo $FD_DIR/$OBJDIR/bin/firedancer-dev configure init kill --config $(readlink -f firedancer-dev.toml)
sudo $FD_DIR/$OBJDIR/bin/firedancer-dev configure init hugetlbfs --config $(readlink -f firedancer-dev.toml)
sudo $FD_DIR/$OBJDIR/bin/firedancer-dev configure init ethtool-channels --config $(readlink -f firedancer-dev.toml)
-sudo $FD_DIR/$OBJDIR/bin/firedancer-dev configure init ethtool-gro ethtool-loopback --config $(readlink -f firedancer-dev.toml)
sudo $FD_DIR/$OBJDIR/bin/firedancer-dev configure init keys --config $(readlink -f firedancer-dev.toml)
sudo gdb -iex="set debuginfod enabled on" -ex=r --args $FD_DIR/$OBJDIR/bin/firedancer-dev dev --no-configure --log-path $(readlink -f firedancer-dev.log) --config $(readlink -f firedancer-dev.toml)
diff --git a/src/app/fdctl/main.c b/src/app/fdctl/main.c
index 153787422b..b3c98764c8 100644
--- a/src/app/fdctl/main.c
+++ b/src/app/fdctl/main.c
@@ -38,7 +38,6 @@ configure_stage_t * STAGES[] = {
&fd_cfg_stage_hyperthreads,
&fd_cfg_stage_ethtool_channels,
&fd_cfg_stage_ethtool_gro,
- &fd_cfg_stage_ethtool_loopback,
NULL,
};
diff --git a/src/app/fdctl/topology.c b/src/app/fdctl/topology.c
index dfaaeaba28..b2201288da 100644
--- a/src/app/fdctl/topology.c
+++ b/src/app/fdctl/topology.c
@@ -378,16 +378,19 @@ fd_topo_initialize( config_t * config ) {
}
FD_TEST( fd_pod_insertf_ulong( topo->props, poh_shred_obj->id, "poh_shred" ) );
- FOR(net_tile_cnt) fd_topos_net_tile_finish( topo, i );
+ fd_topo_net_rx_t rx_rules = {0};
+ fd_topo_net_rx_rule_push( &rx_rules, DST_PROTO_SHRED, "net_shred", config->tiles.shred.shred_listen_port );
+ fd_topo_net_rx_rule_push( &rx_rules, DST_PROTO_TPU_QUIC, "net_quic" , config->tiles.quic.quic_transaction_listen_port );
+ fd_topo_net_rx_rule_push( &rx_rules, DST_PROTO_TPU_UDP, "net_quic" , config->tiles.quic.regular_transaction_listen_port );
+
+ fd_topos_net_tile_finish( topo );
for( ulong i=0UL; itile_cnt; i++ ) {
fd_topo_tile_t * tile = &topo->tiles[ i ];
if( FD_UNLIKELY( !strcmp( tile->name, "net" ) || !strcmp( tile->name, "sock" ) ) ) {
- tile->net.shred_listen_port = config->tiles.shred.shred_listen_port;
- tile->net.quic_transaction_listen_port = config->tiles.quic.quic_transaction_listen_port;
- tile->net.legacy_transaction_listen_port = config->tiles.quic.regular_transaction_listen_port;
+ tile->net.rx_rules = rx_rules;
} else if( FD_UNLIKELY( !strcmp( tile->name, "netlnk" ) ) ) {
diff --git a/src/app/fddev/main.h b/src/app/fddev/main.h
index 3ed7980a94..8dac2580ec 100644
--- a/src/app/fddev/main.h
+++ b/src/app/fddev/main.h
@@ -48,7 +48,6 @@ configure_stage_t * STAGES[] = {
&fd_cfg_stage_hyperthreads,
&fd_cfg_stage_ethtool_channels,
&fd_cfg_stage_ethtool_gro,
- &fd_cfg_stage_ethtool_loopback,
&fd_cfg_stage_keys,
&fd_cfg_stage_genesis,
&fd_cfg_stage_blockstore,
diff --git a/src/app/firedancer-dev/commands/backtest.c b/src/app/firedancer-dev/commands/backtest.c
index 18fe8f56ac..10f4e90306 100644
--- a/src/app/firedancer-dev/commands/backtest.c
+++ b/src/app/firedancer-dev/commands/backtest.c
@@ -323,7 +323,7 @@ backtest_topo( config_t * config ) {
for( ulong i=0UL; itile_cnt; i++ ) {
fd_topo_tile_t * tile = &topo->tiles[ i ];
- if( !fd_topo_configure_tile( tile, config ) ) {
+ if( !fd_topo_configure_tile( tile, config, NULL ) ) {
FD_LOG_ERR(( "unknown tile name %lu `%s`", i, tile->name ));
}
diff --git a/src/app/firedancer-dev/commands/gossip.c b/src/app/firedancer-dev/commands/gossip.c
index 20c4470e35..e65c0076a3 100644
--- a/src/app/firedancer-dev/commands/gossip.c
+++ b/src/app/firedancer-dev/commands/gossip.c
@@ -42,7 +42,7 @@ gossip_topo( config_t * config ) {
if( net_tile_id==ULONG_MAX ) net_tile_id = fd_topo_find_tile( topo, "sock", 0UL );
if( FD_UNLIKELY( net_tile_id==ULONG_MAX ) ) FD_LOG_ERR(( "net tile not found" ));
fd_topo_tile_t * net_tile = &topo->tiles[ net_tile_id ];
- net_tile->net.gossip_listen_port = config->gossip.port;
+ fd_topo_net_rx_rule_push( &net_tile->net.rx_rules, DST_PROTO_GOSSIP, "net_gossip", config->gossip.port );
fd_topob_wksp( topo, "gossip" );
fd_topo_tile_t * gossip_tile = fd_topob_tile( topo, "gossip", "gossip", "metric_in", 0UL, 0, 0 );
@@ -86,7 +86,7 @@ gossip_topo( config_t * config ) {
FD_TEST( fd_pod_insertf_ulong( topo->props, poh_shred_obj->id, "poh_shred" ) );
fd_topob_tile_uses( topo, gossip_tile, poh_shred_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
- fd_topos_net_tile_finish( topo, 0UL );
+ fd_topos_net_tile_finish( topo );
fd_topob_auto_layout( topo, 0 );
topo->agave_affinity_cnt = 0;
fd_topob_finish( topo, CALLBACKS );
diff --git a/src/app/firedancer-dev/commands/repair.c b/src/app/firedancer-dev/commands/repair.c
index b6fa38d361..66cf534815 100644
--- a/src/app/firedancer-dev/commands/repair.c
+++ b/src/app/firedancer-dev/commands/repair.c
@@ -58,9 +58,8 @@ static void
repair_topo( config_t * config ) {
resolve_gossip_entrypoints( config );
- ulong net_tile_cnt = config->layout.net_tile_count;
- ulong shred_tile_cnt = config->layout.shred_tile_count;
- ulong quic_tile_cnt = config->layout.quic_tile_count;
+ ulong net_tile_cnt = config->layout.net_tile_count;
+ ulong shred_tile_cnt = config->layout.shred_tile_count;
fd_topo_t * topo = { fd_topob_new( &config->topo, config->name ) };
topo->max_page_size = fd_cstr_to_shmem_page_sz( config->hugetlbfs.max_page_size );
@@ -71,7 +70,6 @@ repair_topo( config_t * config ) {
fd_topob_wksp( topo, "net_shred" );
fd_topob_wksp( topo, "net_gossip" );
fd_topob_wksp( topo, "net_repair" );
- fd_topob_wksp( topo, "net_quic" );
fd_topob_wksp( topo, "shred_repair" );
fd_topob_wksp( topo, "stake_out" );
@@ -93,8 +91,6 @@ repair_topo( config_t * config ) {
fd_topob_wksp( topo, "sign_repair" );
fd_topob_wksp( topo, "repair_repla" );
- fd_topob_wksp( topo, "gossip_send" );
- fd_topob_wksp( topo, "send_txns" );
fd_topob_wksp( topo, "shred" );
fd_topob_wksp( topo, "sign" );
@@ -111,7 +107,6 @@ repair_topo( config_t * config ) {
ulong pending_fec_shreds_depth = fd_ulong_min( fd_ulong_pow2_up( config->tiles.shred.max_pending_shred_sets * FD_REEDSOL_DATA_SHREDS_MAX ), USHORT_MAX + 1 /* dcache max */ );
/* topo, link_name, wksp_name, depth, mtu, burst */
- FOR(quic_tile_cnt) fd_topob_link( topo, "quic_net", "net_quic", config->net.ingress_buffer_size, FD_NET_MTU, 1UL );
FOR(shred_tile_cnt) fd_topob_link( topo, "shred_net", "net_shred", config->net.ingress_buffer_size, FD_NET_MTU, 1UL );
/**/ fd_topob_link( topo, "stake_out", "stake_out", 128UL, 40UL + 40200UL * 40UL, 1UL );
@@ -127,7 +122,6 @@ repair_topo( config_t * config ) {
/**/ fd_topob_link( topo, "crds_shred", "crds_shred", 128UL, 8UL + 40200UL * 38UL, 1UL );
/**/ fd_topob_link( topo, "gossip_repai", "gossip_repai", 128UL, 40200UL * 38UL, 1UL );
- /**/ fd_topob_link( topo, "gossip_send", "gossip_send", 128UL, 40200UL * 38UL, 1UL );
/**/ fd_topob_link( topo, "gossip_net", "net_gossip", config->net.ingress_buffer_size, FD_NET_MTU, 1UL );
@@ -140,8 +134,6 @@ repair_topo( config_t * config ) {
/**/ fd_topob_link( topo, "repair_repla", "repair_repla", 65536UL, sizeof(fd_reasm_fec_t), 1UL );
/**/ fd_topob_link( topo, "poh_shred", "poh_shred", 16384UL, USHORT_MAX, 1UL );
- /**/ fd_topob_link( topo, "send_txns", "send_txns", 128UL, FD_TXN_MTU, 1UL );
-
FD_TEST( sizeof(fd_snapshot_manifest_t)<=(5UL*(1UL<<30UL)) );
/**/ fd_topob_link( topo, "snap_out", "snap_out", 2UL, 5UL*(1UL<<30UL), 1UL );
@@ -176,7 +168,6 @@ repair_topo( config_t * config ) {
FOR(net_tile_cnt) fd_topos_net_rx_link( topo, "net_gossip", i, config->net.ingress_buffer_size );
FOR(net_tile_cnt) fd_topos_net_rx_link( topo, "net_repair", i, config->net.ingress_buffer_size );
- FOR(net_tile_cnt) fd_topos_net_rx_link( topo, "net_quic", i, config->net.ingress_buffer_size );
FOR(net_tile_cnt) fd_topos_net_rx_link( topo, "net_shred", i, config->net.ingress_buffer_size );
/* topo, tile_name, tile_wksp, metrics_wksp, cpu_idx, is_agave, uses_keyswitch */
@@ -254,10 +245,6 @@ repair_topo( config_t * config ) {
/* topo, tile_name, tile_kind_id, fseq_wksp, link_name, link_kind_id, reliable, polled */
for( ulong j=0UL; jtiles.shred.shred_listen_port );
+ fd_topo_net_rx_rule_push( &rx_rules, DST_PROTO_GOSSIP, "net_gossip", config->gossip.port );
+ fd_topo_net_rx_rule_push( &rx_rules, DST_PROTO_REPAIR, "net_repair", config->tiles.repair.repair_intake_listen_port );
+ fd_topo_net_rx_rule_push( &rx_rules, DST_PROTO_REPAIR, "net_repair", config->tiles.repair.repair_serve_listen_port );
- FOR(net_tile_cnt) fd_topos_net_tile_finish( topo, i );
+ fd_topos_net_tile_finish( topo );
for( ulong i=0UL; itile_cnt; i++ ) {
fd_topo_tile_t * tile = &topo->tiles[ i ];
- if( !fd_topo_configure_tile( tile, config ) ) {
+ if( !fd_topo_configure_tile( tile, config, &rx_rules ) ) {
FD_LOG_ERR(( "unknown tile name %lu `%s`", i, tile->name ));
}
}
diff --git a/src/app/firedancer-dev/commands/sim.c b/src/app/firedancer-dev/commands/sim.c
index 2cd19d2196..52cc0b6289 100644
--- a/src/app/firedancer-dev/commands/sim.c
+++ b/src/app/firedancer-dev/commands/sim.c
@@ -174,7 +174,7 @@ sim_topo( config_t * config ) {
} else {
FD_LOG_NOTICE(( "Found archive file from config: %s", tile->archiver.rocksdb_path ));
}
- } else if( !fd_topo_configure_tile( tile, config ) ) {
+ } else if( !fd_topo_configure_tile( tile, config, NULL ) ) {
FD_LOG_ERR(( "unknown tile name %lu `%s`", i, tile->name ));
}
diff --git a/src/app/firedancer-dev/commands/snapshot_load.c b/src/app/firedancer-dev/commands/snapshot_load.c
index 165025db22..b6c7cdb47b 100644
--- a/src/app/firedancer-dev/commands/snapshot_load.c
+++ b/src/app/firedancer-dev/commands/snapshot_load.c
@@ -102,7 +102,7 @@ snapshot_load_topo( config_t * config,
for( ulong i=0UL; itile_cnt; i++ ) {
fd_topo_tile_t * tile = &topo->tiles[ i ];
- if( !fd_topo_configure_tile( tile, config ) ) {
+ if( !fd_topo_configure_tile( tile, config, NULL ) ) {
FD_LOG_ERR(( "unknown tile name %lu `%s`", i, tile->name ));
}
}
diff --git a/src/app/firedancer-dev/main.c b/src/app/firedancer-dev/main.c
index aeb3308996..7ce3e6d5b2 100644
--- a/src/app/firedancer-dev/main.c
+++ b/src/app/firedancer-dev/main.c
@@ -58,7 +58,6 @@ configure_stage_t * STAGES[] = {
&fd_cfg_stage_hyperthreads,
&fd_cfg_stage_ethtool_channels,
&fd_cfg_stage_ethtool_gro,
- &fd_cfg_stage_ethtool_loopback,
&fd_cfg_stage_keys,
&fd_cfg_stage_genesis,
&fd_cfg_stage_snapshots,
diff --git a/src/app/firedancer/main.c b/src/app/firedancer/main.c
index 937bb331d5..a52dd7cf18 100644
--- a/src/app/firedancer/main.c
+++ b/src/app/firedancer/main.c
@@ -53,7 +53,6 @@ configure_stage_t * STAGES[] = {
&fd_cfg_stage_hyperthreads,
&fd_cfg_stage_ethtool_channels,
&fd_cfg_stage_ethtool_gro,
- &fd_cfg_stage_ethtool_loopback,
&fd_cfg_stage_snapshots,
NULL,
};
diff --git a/src/app/firedancer/topology.c b/src/app/firedancer/topology.c
index ffddd7ac1b..dc954fc143 100644
--- a/src/app/firedancer/topology.c
+++ b/src/app/firedancer/topology.c
@@ -825,9 +825,18 @@ fd_topo_initialize( config_t * config ) {
FOR(net_tile_cnt) fd_topos_net_tile_finish( topo, i );
+ fd_topo_net_rx_t rx_rules = {0};
+ fd_topo_net_rx_rule_push( &rx_rules, DST_PROTO_TPU_QUIC, "net_quic", config->tiles.quic.quic_transaction_listen_port );
+ fd_topo_net_rx_rule_push( &rx_rules, DST_PROTO_TPU_QUIC, "net_quic", config->tiles.quic.quic_transaction_listen_port );
+ fd_topo_net_rx_rule_push( &rx_rules, DST_PROTO_SHRED, "net_shred", config->tiles.shred.shred_listen_port );
+ fd_topo_net_rx_rule_push( &rx_rules, DST_PROTO_GOSSIP, "net_gossip", config->gossip.port );
+ fd_topo_net_rx_rule_push( &rx_rules, DST_PROTO_REPAIR, "net_repair", config->tiles.repair.repair_intake_listen_port );
+ fd_topo_net_rx_rule_push( &rx_rules, DST_PROTO_REPAIR, "net_repair", config->tiles.repair.repair_serve_listen_port );
+ fd_topo_net_rx_rule_push( &rx_rules, DST_PROTO_SEND, "net_send", config->tiles.send.send_src_port );
+
for( ulong i=0UL; itile_cnt; i++ ) {
fd_topo_tile_t * tile = &topo->tiles[ i ];
- if( !fd_topo_configure_tile( tile, config ) ) {
+ if( !fd_topo_configure_tile( tile, config, &rx_rules ) ) {
FD_LOG_ERR(( "unknown tile name %lu `%s`", i, tile->name ));
}
}
@@ -858,17 +867,12 @@ fd_topo_initialize( config_t * config ) {
}
int
-fd_topo_configure_tile( fd_topo_tile_t * tile,
- fd_config_t * config ) {
- if( FD_UNLIKELY( !strcmp( tile->name, "net" ) || !strcmp( tile->name, "sock" ) ) ) {
-
- tile->net.shred_listen_port = config->tiles.shred.shred_listen_port;
- tile->net.quic_transaction_listen_port = config->tiles.quic.quic_transaction_listen_port;
- tile->net.legacy_transaction_listen_port = config->tiles.quic.regular_transaction_listen_port;
- tile->net.gossip_listen_port = config->gossip.port;
- tile->net.repair_intake_listen_port = config->tiles.repair.repair_intake_listen_port;
- tile->net.repair_serve_listen_port = config->tiles.repair.repair_serve_listen_port;
- tile->net.send_src_port = config->tiles.send.send_src_port;
+fd_topo_configure_tile( fd_topo_tile_t * tile,
+ fd_config_t * config,
+ fd_topo_net_rx_t const * rx_rules ) {
+ if( FD_UNLIKELY( rx_rules && (!strcmp( tile->name, "net" ) || !strcmp( tile->name, "sock" )) ) ) {
+
+ tile->net.rx_rules = *rx_rules;
} else if( FD_UNLIKELY( !strcmp( tile->name, "netlnk" ) ) ) {
diff --git a/src/app/firedancer/topology.h b/src/app/firedancer/topology.h
index 0aaf044353..4211d81083 100644
--- a/src/app/firedancer/topology.h
+++ b/src/app/firedancer/topology.h
@@ -49,8 +49,9 @@ setup_topo_txncache( fd_topo_t * topo,
ulong max_txn_per_slot );
int
-fd_topo_configure_tile( fd_topo_tile_t * tile,
- fd_config_t * config );
+fd_topo_configure_tile( fd_topo_tile_t * tile,
+ fd_config_t * config,
+ fd_topo_net_rx_t const * rx_rules );
FD_PROTOTYPES_END
diff --git a/src/app/shared/Local.mk b/src/app/shared/Local.mk
index 2d0ca1f294..a35c93e585 100644
--- a/src/app/shared/Local.mk
+++ b/src/app/shared/Local.mk
@@ -23,7 +23,6 @@ $(call add-objs,commands/version,fdctl_shared)
$(call add-objs,commands/configure/configure,fdctl_shared)
$(call add-objs,commands/configure/ethtool-channels,fdctl_shared)
$(call add-objs,commands/configure/ethtool-gro,fdctl_shared)
-$(call add-objs,commands/configure/ethtool-loopback,fdctl_shared)
$(call add-objs,commands/configure/hugetlbfs,fdctl_shared)
$(call add-objs,commands/configure/hyperthreads,fdctl_shared)
$(call add-objs,commands/configure/sysctl,fdctl_shared)
diff --git a/src/app/shared/commands/configure/configure.h b/src/app/shared/commands/configure/configure.h
index 5c3e82482e..c7075937e0 100644
--- a/src/app/shared/commands/configure/configure.h
+++ b/src/app/shared/commands/configure/configure.h
@@ -70,7 +70,6 @@ extern configure_stage_t fd_cfg_stage_sysctl;
extern configure_stage_t fd_cfg_stage_hyperthreads;
extern configure_stage_t fd_cfg_stage_ethtool_channels;
extern configure_stage_t fd_cfg_stage_ethtool_gro;
-extern configure_stage_t fd_cfg_stage_ethtool_loopback;
extern configure_stage_t fd_cfg_stage_snapshots;
extern configure_stage_t * STAGES[];
diff --git a/src/app/shared/commands/configure/ethtool-gro.c b/src/app/shared/commands/configure/ethtool-gro.c
index 3acd69bef1..bf9821e7e9 100644
--- a/src/app/shared/commands/configure/ethtool-gro.c
+++ b/src/app/shared/commands/configure/ethtool-gro.c
@@ -1,6 +1,6 @@
-/* This stage disables the "Generic Receive Offload" ethtool feature on the
- main and loopback interfaces. If left enabled, GRO will mangle UDP
- packets in a way that causes AF_XDP packets to get corrupted.
+/* This stage disables the "Generic Receive Offload" ethtool feature on
+ the main interface. If left enabled, GRO will mangle UDP packets in
+ a way that causes AF_XDP packets to get corrupted.
TLDR GRO and AF_XDP are incompatible. */
@@ -126,7 +126,6 @@ init( config_t const * config ) {
} else {
init_device( config->net.interface );
}
- init_device( "lo" );
}
static configure_result_t
diff --git a/src/app/shared/commands/configure/ethtool-loopback.c b/src/app/shared/commands/configure/ethtool-loopback.c
deleted file mode 100644
index 6be9c90ca8..0000000000
--- a/src/app/shared/commands/configure/ethtool-loopback.c
+++ /dev/null
@@ -1,211 +0,0 @@
-/* This stage disables the "tx-udp-segmentation" offload on the loopback
- interface. If left enabled, AF_XDP will drop loopback UDP packets sent
- by processes that enable TX segmentation via SOL_UDP/UDP_SEGMENT sockopt
- or cmsg.
-
- TLDR tx-udp-segmentation and AF_XDP are incompatible. */
-
-#include "configure.h"
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#define NAME "ethtool-loopback"
-#define MAX_FEATURES (1024)
-
-#define UDPSEG_FEATURE "tx-udp-segmentation"
-static char const udpseg_feature[] = UDPSEG_FEATURE;
-
-#define ETHTOOL_CMD_SZ( base_t, data_t, data_len ) ( sizeof(base_t) + (sizeof(data_t)*(data_len)) )
-
-static int
-enabled( config_t const * config ) {
-
- /* if we're running in a network namespace, we configure ethtool on
- the virtual device as part of netns setup, not here */
- if( config->development.netns.enabled ) return 0;
-
- /* only enable if network stack is XDP */
- if( 0!=strcmp( config->net.provider, "xdp" ) ) return 0;
-
- return 1;
-}
-
-static void
-init_perm( fd_cap_chk_t * chk,
- config_t const * config FD_PARAM_UNUSED ) {
- fd_cap_chk_root( chk, NAME, "disable loopback " UDPSEG_FEATURE " with `ethtool --offload lo " UDPSEG_FEATURE " off`" );
-}
-
-/* ethtool_ioctl wraps ioctl(sock,SIOCETHTOOL,"lo",*) */
-
-static int
-ethtool_ioctl( int sock,
- void * data ) {
- struct ifreq ifr = {0};
- strcpy( ifr.ifr_name, "lo" );
- ifr.ifr_data = data;
- return ioctl( sock, SIOCETHTOOL, &ifr );
-}
-
-/* find_feature_index finds the index of an ethtool feature. */
-
-static int
-find_feature_index( int sock,
- char const * feature ) {
-
- union {
- struct ethtool_sset_info r;
- uchar _[ ETHTOOL_CMD_SZ( struct ethtool_sset_info, uint, 1 ) ];
- } set_info = { .r = {
- .cmd = ETHTOOL_GSSET_INFO,
- .sset_mask = fd_ulong_mask_bit( ETH_SS_FEATURES )
- } };
- if( FD_UNLIKELY( ethtool_ioctl( sock, &set_info ) ) ) {
- FD_LOG_ERR(( "error configuring network device, ioctl(SIOCETHTOOL,ETHTOOL_GSSET_INFO) failed (%i-%s)",
- errno, fd_io_strerror( errno ) ));
- }
- fd_msan_unpoison( set_info.r.data, sizeof(uint) );
- uint const feature_cnt = fd_uint_min( set_info.r.data[0], MAX_FEATURES );
-
- static union {
- struct ethtool_gstrings r;
- uchar _[ ETHTOOL_CMD_SZ( struct ethtool_gstrings, uchar, MAX_FEATURES*ETH_GSTRING_LEN ) ];
- } get_strings;
- get_strings.r = (struct ethtool_gstrings) {
- .cmd = ETHTOOL_GSTRINGS,
- .string_set = ETH_SS_FEATURES,
- .len = feature_cnt
- };
- if( FD_UNLIKELY( ethtool_ioctl( sock, &get_strings ) ) ) {
- FD_LOG_ERR(( "error configuring network device, ioctl(SIOCETHTOOL,ETHTOOL_GSTRINGS) failed (%i-%s)",
- errno, fd_io_strerror( errno ) ));
- }
- fd_msan_unpoison( get_strings.r.data, ETH_GSTRING_LEN*feature_cnt );
-
- for( uint j=0UL; j0 && index0 && indextopo, &config->topo.workspaces[ shred_wksp_id ], FD_SHMEM_JOIN_MODE_READ_ONLY );
/* Cast to shred context structure */
- fd_shred_ctx_t const * shred_ctx = fd_topo_obj_laddr( &config->topo, shred_tile->tile_obj_id );
+ fd_shred_ctx_hdr_t const * shred_ctx = fd_topo_obj_laddr( &config->topo, shred_tile->tile_obj_id );
if( FD_UNLIKELY( !shred_ctx ) ) {
fd_topo_leave_workspaces( &config->topo );
FD_LOG_ERR(( "Failed to access shred tile object" ));
diff --git a/src/app/shared/commands/run/run.c b/src/app/shared/commands/run/run.c
index 45da4152af..cda61e97d8 100644
--- a/src/app/shared/commands/run/run.c
+++ b/src/app/shared/commands/run/run.c
@@ -692,7 +692,6 @@ initialize_stacks( config_t const * config ) {
extern configure_stage_t fd_cfg_stage_hugetlbfs;
extern configure_stage_t fd_cfg_stage_ethtool_channels;
extern configure_stage_t fd_cfg_stage_ethtool_gro;
-extern configure_stage_t fd_cfg_stage_ethtool_loopback;
extern configure_stage_t fd_cfg_stage_sysctl;
extern configure_stage_t fd_cfg_stage_hyperthreads;
@@ -714,11 +713,6 @@ fdctl_check_configure( config_t const * config ) {
if( FD_UNLIKELY( check.result!=CONFIGURE_OK ) )
FD_LOG_ERR(( "Network %s. You can run `fdctl configure init ethtool-gro` to disable generic-receive-offload "
"as required.", check.message ));
-
- check = fd_cfg_stage_ethtool_loopback.check( config );
- if( FD_UNLIKELY( check.result!=CONFIGURE_OK ) )
- FD_LOG_ERR(( "Network %s. You can run `fdctl configure init ethtool-loopback` to disable tx-udp-segmentation "
- "on the loopback device.", check.message ));
}
check = fd_cfg_stage_sysctl.check( config );
@@ -766,7 +760,6 @@ fdctl_setup_netns( config_t * config,
if( 0==strcmp( config->net.provider, "xdp" ) ) {
fd_cfg_stage_ethtool_channels.init( config );
fd_cfg_stage_ethtool_gro .init( config );
- fd_cfg_stage_ethtool_loopback.init( config );
}
if( FD_UNLIKELY( original_netns && -1==fd_net_util_netns_restore( original_netns_ ) ) )
diff --git a/src/app/shared_dev/commands/bench/fd_benchs.c b/src/app/shared_dev/commands/bench/fd_benchs.c
index ac52342ca5..fd4b17b9df 100644
--- a/src/app/shared_dev/commands/bench/fd_benchs.c
+++ b/src/app/shared_dev/commands/bench/fd_benchs.c
@@ -127,7 +127,12 @@ service_quic( fd_benchs_ctx_t * ctx ) {
if( getsockopt( ctx->poll_fd[j].fd, SOL_SOCKET, SO_ERROR, (void *)&error, &errlen ) == -1 ) {
FD_LOG_ERR(( "Unknown error on socket" ));
- } else {
+ }
+ if( error==ECONNREFUSED ) {
+ FD_LOG_WARNING(( "Connection refused ... retrying" ));
+ fd_log_sleep( (long)100e6 );
+ return;
+ } else if( error ) {
FD_LOG_ERR(( "Error on socket: %d %s", error, strerror( error ) ));
}
}
diff --git a/src/app/shared_dev/commands/pktgen/pktgen.c b/src/app/shared_dev/commands/pktgen/pktgen.c
index 347a9a5fd2..cfdd455023 100644
--- a/src/app/shared_dev/commands/pktgen/pktgen.c
+++ b/src/app/shared_dev/commands/pktgen/pktgen.c
@@ -69,7 +69,7 @@ pktgen_topo( config_t * config ) {
fd_topos_net_rx_link( topo, "net_quic", 0UL, config->net.ingress_buffer_size );
fd_topob_tile_in( topo, "pktgen", 0UL, "metric_in", "net_quic", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
- fd_topos_net_tile_finish( topo, 0UL );
+ fd_topos_net_tile_finish( topo );
if( FD_UNLIKELY( is_auto_affinity ) ) fd_topob_auto_layout( topo, 0 );
topo->agave_affinity_cnt = 0;
fd_topob_finish( topo, CALLBACKS );
@@ -197,7 +197,7 @@ pktgen_cmd_fn( args_t * args FD_PARAM_UNUSED,
fd_topo_tile_t * metric_tile = &topo->tiles[ fd_topo_find_tile( topo, "metric", 0UL ) ];
ushort const listen_port = 9000;
- net_tile->net.legacy_transaction_listen_port = listen_port;
+ fd_topo_net_rx_rule_push( &net_tile->net.rx_rules, DST_PROTO_TPU_UDP, "net_quic", listen_port );
if( FD_UNLIKELY( !fd_cstr_to_ip4_addr( config->tiles.metric.prometheus_listen_address, &metric_tile->metric.prometheus_listen_addr ) ) )
FD_LOG_ERR(( "failed to parse prometheus listen address `%s`", config->tiles.metric.prometheus_listen_address ));
diff --git a/src/app/shared_dev/commands/udpecho/udpecho.c b/src/app/shared_dev/commands/udpecho/udpecho.c
index f8a74d06eb..7c5d608e69 100644
--- a/src/app/shared_dev/commands/udpecho/udpecho.c
+++ b/src/app/shared_dev/commands/udpecho/udpecho.c
@@ -62,7 +62,7 @@ udpecho_topo( config_t * config ) {
fd_topos_net_rx_link( topo, "net_quic", 0UL, config->net.ingress_buffer_size );
fd_topob_tile_in( topo, "l4swap", 0UL, "metric_in", "net_quic", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
- fd_topos_net_tile_finish( topo, 0UL );
+ fd_topos_net_tile_finish( topo );
if( FD_UNLIKELY( is_auto_affinity ) ) fd_topob_auto_layout( topo, 0 );
topo->agave_affinity_cnt = 0;
fd_topob_finish( topo, CALLBACKS );
@@ -88,7 +88,7 @@ udpecho_cmd_fn( args_t * args,
fd_topo_tile_t * net_tile = &topo->tiles[ fd_topo_find_tile( topo, "net", 0UL ) ];
fd_topo_tile_t * metric_tile = &topo->tiles[ fd_topo_find_tile( topo, "metric", 0UL ) ];
- net_tile->net.legacy_transaction_listen_port = args->udpecho.listen_port;
+ fd_topo_net_rx_rule_push( &net_tile->net.rx_rules, DST_PROTO_TPU_UDP, "net_quic", args->udpecho.listen_port );
if( FD_UNLIKELY( !fd_cstr_to_ip4_addr( config->tiles.metric.prometheus_listen_address, &metric_tile->metric.prometheus_listen_addr ) ) )
FD_LOG_ERR(( "failed to parse prometheus listen address `%s`", config->tiles.metric.prometheus_listen_address ));
@@ -98,7 +98,6 @@ udpecho_cmd_fn( args_t * args,
configure_stage( &fd_cfg_stage_hugetlbfs, CONFIGURE_CMD_INIT, config );
configure_stage( &fd_cfg_stage_ethtool_channels, CONFIGURE_CMD_INIT, config );
configure_stage( &fd_cfg_stage_ethtool_gro, CONFIGURE_CMD_INIT, config );
- configure_stage( &fd_cfg_stage_ethtool_loopback, CONFIGURE_CMD_INIT, config );
fdctl_check_configure( config );
/* FIXME this allocates lots of memory unnecessarily */
diff --git a/src/disco/metrics/generated/fd_metrics_enums.h b/src/disco/metrics/generated/fd_metrics_enums.h
index 7f8aeb5236..b59ea62969 100644
--- a/src/disco/metrics/generated/fd_metrics_enums.h
+++ b/src/disco/metrics/generated/fd_metrics_enums.h
@@ -19,6 +19,13 @@
#define FD_METRICS_ENUM_TILE_REGIME_V_PROCESSING_POSTFRAG_IDX 7
#define FD_METRICS_ENUM_TILE_REGIME_V_PROCESSING_POSTFRAG_NAME "processing_postfrag"
+#define FD_METRICS_ENUM_PKT_KIND_NAME "pkt_kind"
+#define FD_METRICS_ENUM_PKT_KIND_CNT (2UL)
+#define FD_METRICS_ENUM_PKT_KIND_V_IP4_UDP_IDX 0
+#define FD_METRICS_ENUM_PKT_KIND_V_IP4_UDP_NAME "ip4_udp"
+#define FD_METRICS_ENUM_PKT_KIND_V_IP4_OPT_UDP_IDX 1
+#define FD_METRICS_ENUM_PKT_KIND_V_IP4_OPT_UDP_NAME "ip4_opt_udp"
+
#define FD_METRICS_ENUM_SOCK_ERR_NAME "sock_err"
#define FD_METRICS_ENUM_SOCK_ERR_CNT (6UL)
#define FD_METRICS_ENUM_SOCK_ERR_V_NO_ERROR_IDX 0
diff --git a/src/disco/metrics/generated/fd_metrics_net.c b/src/disco/metrics/generated/fd_metrics_net.c
index 9a1115d3d0..9be747d739 100644
--- a/src/disco/metrics/generated/fd_metrics_net.c
+++ b/src/disco/metrics/generated/fd_metrics_net.c
@@ -2,7 +2,8 @@
#include "fd_metrics_net.h"
const fd_metrics_meta_t FD_METRICS_NET[FD_METRICS_NET_TOTAL] = {
- DECLARE_METRIC( NET_RX_PKT_CNT, COUNTER ),
+ DECLARE_METRIC_ENUM( NET_RX_PKT_CNT, COUNTER, PKT_KIND, IP4_UDP ),
+ DECLARE_METRIC_ENUM( NET_RX_PKT_CNT, COUNTER, PKT_KIND, IP4_OPT_UDP ),
DECLARE_METRIC( NET_RX_BYTES_TOTAL, COUNTER ),
DECLARE_METRIC( NET_RX_UNDERSZ_CNT, COUNTER ),
DECLARE_METRIC( NET_RX_FILL_BLOCKED_CNT, COUNTER ),
@@ -12,8 +13,8 @@ const fd_metrics_meta_t FD_METRICS_NET[FD_METRICS_NET_TOTAL] = {
DECLARE_METRIC( NET_TX_SUBMIT_CNT, COUNTER ),
DECLARE_METRIC( NET_TX_COMPLETE_CNT, COUNTER ),
DECLARE_METRIC( NET_TX_BYTES_TOTAL, COUNTER ),
- DECLARE_METRIC( NET_TX_ROUTE_FAIL_CNT, COUNTER ),
- DECLARE_METRIC( NET_TX_NEIGHBOR_FAIL_CNT, COUNTER ),
+ DECLARE_METRIC( NET_TX_CORRUPT_CNT, COUNTER ),
+ DECLARE_METRIC( NET_TX_FALLBACK_CNT, COUNTER ),
DECLARE_METRIC( NET_TX_FULL_FAIL_CNT, COUNTER ),
DECLARE_METRIC( NET_TX_BUSY_CNT, GAUGE ),
DECLARE_METRIC( NET_TX_IDLE_CNT, GAUGE ),
@@ -29,5 +30,4 @@ const fd_metrics_meta_t FD_METRICS_NET[FD_METRICS_NET_TOTAL] = {
DECLARE_METRIC( NET_RX_GRE_INVALID_CNT, COUNTER ),
DECLARE_METRIC( NET_RX_GRE_IGNORED_CNT, COUNTER ),
DECLARE_METRIC( NET_TX_GRE_CNT, COUNTER ),
- DECLARE_METRIC( NET_TX_GRE_ROUTE_FAIL_CNT, COUNTER ),
};
diff --git a/src/disco/metrics/generated/fd_metrics_net.h b/src/disco/metrics/generated/fd_metrics_net.h
index b1bad7cca2..50f6e6ef79 100644
--- a/src/disco/metrics/generated/fd_metrics_net.h
+++ b/src/disco/metrics/generated/fd_metrics_net.h
@@ -6,170 +6,168 @@
#define FD_METRICS_COUNTER_NET_RX_PKT_CNT_OFF (16UL)
#define FD_METRICS_COUNTER_NET_RX_PKT_CNT_NAME "net_rx_pkt_cnt"
#define FD_METRICS_COUNTER_NET_RX_PKT_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
-#define FD_METRICS_COUNTER_NET_RX_PKT_CNT_DESC "Packet receive count."
+#define FD_METRICS_COUNTER_NET_RX_PKT_CNT_DESC "Packet receive count (ignoring tunnels)"
#define FD_METRICS_COUNTER_NET_RX_PKT_CNT_CVT (FD_METRICS_CONVERTER_NONE)
+#define FD_METRICS_COUNTER_NET_RX_PKT_CNT_CNT (2UL)
-#define FD_METRICS_COUNTER_NET_RX_BYTES_TOTAL_OFF (17UL)
+#define FD_METRICS_COUNTER_NET_RX_PKT_CNT_IP4_UDP_OFF (16UL)
+#define FD_METRICS_COUNTER_NET_RX_PKT_CNT_IP4_OPT_UDP_OFF (17UL)
+
+#define FD_METRICS_COUNTER_NET_RX_BYTES_TOTAL_OFF (18UL)
#define FD_METRICS_COUNTER_NET_RX_BYTES_TOTAL_NAME "net_rx_bytes_total"
#define FD_METRICS_COUNTER_NET_RX_BYTES_TOTAL_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_RX_BYTES_TOTAL_DESC "Total number of bytes received (including Ethernet header)."
#define FD_METRICS_COUNTER_NET_RX_BYTES_TOTAL_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_RX_UNDERSZ_CNT_OFF (18UL)
+#define FD_METRICS_COUNTER_NET_RX_UNDERSZ_CNT_OFF (19UL)
#define FD_METRICS_COUNTER_NET_RX_UNDERSZ_CNT_NAME "net_rx_undersz_cnt"
#define FD_METRICS_COUNTER_NET_RX_UNDERSZ_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_RX_UNDERSZ_CNT_DESC "Number of incoming packets dropped due to being too small."
#define FD_METRICS_COUNTER_NET_RX_UNDERSZ_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_RX_FILL_BLOCKED_CNT_OFF (19UL)
+#define FD_METRICS_COUNTER_NET_RX_FILL_BLOCKED_CNT_OFF (20UL)
#define FD_METRICS_COUNTER_NET_RX_FILL_BLOCKED_CNT_NAME "net_rx_fill_blocked_cnt"
#define FD_METRICS_COUNTER_NET_RX_FILL_BLOCKED_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_RX_FILL_BLOCKED_CNT_DESC "Number of incoming packets dropped due to fill ring being full."
#define FD_METRICS_COUNTER_NET_RX_FILL_BLOCKED_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_RX_BACKPRESSURE_CNT_OFF (20UL)
+#define FD_METRICS_COUNTER_NET_RX_BACKPRESSURE_CNT_OFF (21UL)
#define FD_METRICS_COUNTER_NET_RX_BACKPRESSURE_CNT_NAME "net_rx_backpressure_cnt"
#define FD_METRICS_COUNTER_NET_RX_BACKPRESSURE_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_RX_BACKPRESSURE_CNT_DESC "Number of incoming packets dropped due to backpressure."
#define FD_METRICS_COUNTER_NET_RX_BACKPRESSURE_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_GAUGE_NET_RX_BUSY_CNT_OFF (21UL)
+#define FD_METRICS_GAUGE_NET_RX_BUSY_CNT_OFF (22UL)
#define FD_METRICS_GAUGE_NET_RX_BUSY_CNT_NAME "net_rx_busy_cnt"
#define FD_METRICS_GAUGE_NET_RX_BUSY_CNT_TYPE (FD_METRICS_TYPE_GAUGE)
#define FD_METRICS_GAUGE_NET_RX_BUSY_CNT_DESC "Number of receive buffers currently busy."
#define FD_METRICS_GAUGE_NET_RX_BUSY_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_GAUGE_NET_RX_IDLE_CNT_OFF (22UL)
+#define FD_METRICS_GAUGE_NET_RX_IDLE_CNT_OFF (23UL)
#define FD_METRICS_GAUGE_NET_RX_IDLE_CNT_NAME "net_rx_idle_cnt"
#define FD_METRICS_GAUGE_NET_RX_IDLE_CNT_TYPE (FD_METRICS_TYPE_GAUGE)
#define FD_METRICS_GAUGE_NET_RX_IDLE_CNT_DESC "Number of receive buffers currently idle."
#define FD_METRICS_GAUGE_NET_RX_IDLE_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_TX_SUBMIT_CNT_OFF (23UL)
+#define FD_METRICS_COUNTER_NET_TX_SUBMIT_CNT_OFF (24UL)
#define FD_METRICS_COUNTER_NET_TX_SUBMIT_CNT_NAME "net_tx_submit_cnt"
#define FD_METRICS_COUNTER_NET_TX_SUBMIT_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_TX_SUBMIT_CNT_DESC "Number of packet transmit jobs submitted."
#define FD_METRICS_COUNTER_NET_TX_SUBMIT_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_TX_COMPLETE_CNT_OFF (24UL)
+#define FD_METRICS_COUNTER_NET_TX_COMPLETE_CNT_OFF (25UL)
#define FD_METRICS_COUNTER_NET_TX_COMPLETE_CNT_NAME "net_tx_complete_cnt"
#define FD_METRICS_COUNTER_NET_TX_COMPLETE_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_TX_COMPLETE_CNT_DESC "Number of packet transmit jobs marked as completed by the kernel."
#define FD_METRICS_COUNTER_NET_TX_COMPLETE_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_TX_BYTES_TOTAL_OFF (25UL)
+#define FD_METRICS_COUNTER_NET_TX_BYTES_TOTAL_OFF (26UL)
#define FD_METRICS_COUNTER_NET_TX_BYTES_TOTAL_NAME "net_tx_bytes_total"
#define FD_METRICS_COUNTER_NET_TX_BYTES_TOTAL_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_TX_BYTES_TOTAL_DESC "Total number of bytes transmitted (including Ethernet header)."
#define FD_METRICS_COUNTER_NET_TX_BYTES_TOTAL_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_TX_ROUTE_FAIL_CNT_OFF (26UL)
-#define FD_METRICS_COUNTER_NET_TX_ROUTE_FAIL_CNT_NAME "net_tx_route_fail_cnt"
-#define FD_METRICS_COUNTER_NET_TX_ROUTE_FAIL_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
-#define FD_METRICS_COUNTER_NET_TX_ROUTE_FAIL_CNT_DESC "Number of packet transmit jobs dropped due to route failure."
-#define FD_METRICS_COUNTER_NET_TX_ROUTE_FAIL_CNT_CVT (FD_METRICS_CONVERTER_NONE)
+#define FD_METRICS_COUNTER_NET_TX_CORRUPT_CNT_OFF (27UL)
+#define FD_METRICS_COUNTER_NET_TX_CORRUPT_CNT_NAME "net_tx_corrupt_cnt"
+#define FD_METRICS_COUNTER_NET_TX_CORRUPT_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
+#define FD_METRICS_COUNTER_NET_TX_CORRUPT_CNT_DESC "Number of packet transmit jobs dropped due to malformed content."
+#define FD_METRICS_COUNTER_NET_TX_CORRUPT_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_TX_NEIGHBOR_FAIL_CNT_OFF (27UL)
-#define FD_METRICS_COUNTER_NET_TX_NEIGHBOR_FAIL_CNT_NAME "net_tx_neighbor_fail_cnt"
-#define FD_METRICS_COUNTER_NET_TX_NEIGHBOR_FAIL_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
-#define FD_METRICS_COUNTER_NET_TX_NEIGHBOR_FAIL_CNT_DESC "Number of packet transmit jobs dropped due to unresolved neighbor."
-#define FD_METRICS_COUNTER_NET_TX_NEIGHBOR_FAIL_CNT_CVT (FD_METRICS_CONVERTER_NONE)
+#define FD_METRICS_COUNTER_NET_TX_FALLBACK_CNT_OFF (28UL)
+#define FD_METRICS_COUNTER_NET_TX_FALLBACK_CNT_NAME "net_tx_fallback_cnt"
+#define FD_METRICS_COUNTER_NET_TX_FALLBACK_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
+#define FD_METRICS_COUNTER_NET_TX_FALLBACK_CNT_DESC "Number of packet transmit jobs handled via sockets fallback instead of XDP."
+#define FD_METRICS_COUNTER_NET_TX_FALLBACK_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_TX_FULL_FAIL_CNT_OFF (28UL)
+#define FD_METRICS_COUNTER_NET_TX_FULL_FAIL_CNT_OFF (29UL)
#define FD_METRICS_COUNTER_NET_TX_FULL_FAIL_CNT_NAME "net_tx_full_fail_cnt"
#define FD_METRICS_COUNTER_NET_TX_FULL_FAIL_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_TX_FULL_FAIL_CNT_DESC "Number of packet transmit jobs dropped due to XDP TX ring full or missing completions."
#define FD_METRICS_COUNTER_NET_TX_FULL_FAIL_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_GAUGE_NET_TX_BUSY_CNT_OFF (29UL)
+#define FD_METRICS_GAUGE_NET_TX_BUSY_CNT_OFF (30UL)
#define FD_METRICS_GAUGE_NET_TX_BUSY_CNT_NAME "net_tx_busy_cnt"
#define FD_METRICS_GAUGE_NET_TX_BUSY_CNT_TYPE (FD_METRICS_TYPE_GAUGE)
#define FD_METRICS_GAUGE_NET_TX_BUSY_CNT_DESC "Number of transmit buffers currently busy."
#define FD_METRICS_GAUGE_NET_TX_BUSY_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_GAUGE_NET_TX_IDLE_CNT_OFF (30UL)
+#define FD_METRICS_GAUGE_NET_TX_IDLE_CNT_OFF (31UL)
#define FD_METRICS_GAUGE_NET_TX_IDLE_CNT_NAME "net_tx_idle_cnt"
#define FD_METRICS_GAUGE_NET_TX_IDLE_CNT_TYPE (FD_METRICS_TYPE_GAUGE)
#define FD_METRICS_GAUGE_NET_TX_IDLE_CNT_DESC "Number of transmit buffers currently idle."
#define FD_METRICS_GAUGE_NET_TX_IDLE_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_XSK_TX_WAKEUP_CNT_OFF (31UL)
+#define FD_METRICS_COUNTER_NET_XSK_TX_WAKEUP_CNT_OFF (32UL)
#define FD_METRICS_COUNTER_NET_XSK_TX_WAKEUP_CNT_NAME "net_xsk_tx_wakeup_cnt"
#define FD_METRICS_COUNTER_NET_XSK_TX_WAKEUP_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_XSK_TX_WAKEUP_CNT_DESC "Number of XSK sendto syscalls dispatched."
#define FD_METRICS_COUNTER_NET_XSK_TX_WAKEUP_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_XSK_RX_WAKEUP_CNT_OFF (32UL)
+#define FD_METRICS_COUNTER_NET_XSK_RX_WAKEUP_CNT_OFF (33UL)
#define FD_METRICS_COUNTER_NET_XSK_RX_WAKEUP_CNT_NAME "net_xsk_rx_wakeup_cnt"
#define FD_METRICS_COUNTER_NET_XSK_RX_WAKEUP_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_XSK_RX_WAKEUP_CNT_DESC "Number of XSK recvmsg syscalls dispatched."
#define FD_METRICS_COUNTER_NET_XSK_RX_WAKEUP_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_XDP_RX_DROPPED_OTHER_OFF (33UL)
+#define FD_METRICS_COUNTER_NET_XDP_RX_DROPPED_OTHER_OFF (34UL)
#define FD_METRICS_COUNTER_NET_XDP_RX_DROPPED_OTHER_NAME "net_xdp_rx_dropped_other"
#define FD_METRICS_COUNTER_NET_XDP_RX_DROPPED_OTHER_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_XDP_RX_DROPPED_OTHER_DESC "xdp_statistics_v0.rx_dropped: Dropped for other reasons"
#define FD_METRICS_COUNTER_NET_XDP_RX_DROPPED_OTHER_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_XDP_RX_INVALID_DESCS_OFF (34UL)
+#define FD_METRICS_COUNTER_NET_XDP_RX_INVALID_DESCS_OFF (35UL)
#define FD_METRICS_COUNTER_NET_XDP_RX_INVALID_DESCS_NAME "net_xdp_rx_invalid_descs"
#define FD_METRICS_COUNTER_NET_XDP_RX_INVALID_DESCS_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_XDP_RX_INVALID_DESCS_DESC "xdp_statistics_v0.rx_invalid_descs: Dropped due to invalid descriptor"
#define FD_METRICS_COUNTER_NET_XDP_RX_INVALID_DESCS_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_XDP_TX_INVALID_DESCS_OFF (35UL)
+#define FD_METRICS_COUNTER_NET_XDP_TX_INVALID_DESCS_OFF (36UL)
#define FD_METRICS_COUNTER_NET_XDP_TX_INVALID_DESCS_NAME "net_xdp_tx_invalid_descs"
#define FD_METRICS_COUNTER_NET_XDP_TX_INVALID_DESCS_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_XDP_TX_INVALID_DESCS_DESC "xdp_statistics_v0.tx_invalid_descs: Dropped due to invalid descriptor"
#define FD_METRICS_COUNTER_NET_XDP_TX_INVALID_DESCS_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_XDP_RX_RING_FULL_OFF (36UL)
+#define FD_METRICS_COUNTER_NET_XDP_RX_RING_FULL_OFF (37UL)
#define FD_METRICS_COUNTER_NET_XDP_RX_RING_FULL_NAME "net_xdp_rx_ring_full"
#define FD_METRICS_COUNTER_NET_XDP_RX_RING_FULL_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_XDP_RX_RING_FULL_DESC "xdp_statistics_v1.rx_ring_full: Dropped due to rx ring being full"
#define FD_METRICS_COUNTER_NET_XDP_RX_RING_FULL_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_XDP_RX_FILL_RING_EMPTY_DESCS_OFF (37UL)
+#define FD_METRICS_COUNTER_NET_XDP_RX_FILL_RING_EMPTY_DESCS_OFF (38UL)
#define FD_METRICS_COUNTER_NET_XDP_RX_FILL_RING_EMPTY_DESCS_NAME "net_xdp_rx_fill_ring_empty_descs"
#define FD_METRICS_COUNTER_NET_XDP_RX_FILL_RING_EMPTY_DESCS_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_XDP_RX_FILL_RING_EMPTY_DESCS_DESC "xdp_statistics_v1.rx_fill_ring_empty_descs: Failed to retrieve item from fill ring"
#define FD_METRICS_COUNTER_NET_XDP_RX_FILL_RING_EMPTY_DESCS_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_XDP_TX_RING_EMPTY_DESCS_OFF (38UL)
+#define FD_METRICS_COUNTER_NET_XDP_TX_RING_EMPTY_DESCS_OFF (39UL)
#define FD_METRICS_COUNTER_NET_XDP_TX_RING_EMPTY_DESCS_NAME "net_xdp_tx_ring_empty_descs"
#define FD_METRICS_COUNTER_NET_XDP_TX_RING_EMPTY_DESCS_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_XDP_TX_RING_EMPTY_DESCS_DESC "xdp_statistics_v1.tx_ring_empty_descs: Failed to retrieve item from tx ring"
#define FD_METRICS_COUNTER_NET_XDP_TX_RING_EMPTY_DESCS_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_RX_GRE_CNT_OFF (39UL)
+#define FD_METRICS_COUNTER_NET_RX_GRE_CNT_OFF (40UL)
#define FD_METRICS_COUNTER_NET_RX_GRE_CNT_NAME "net_rx_gre_cnt"
#define FD_METRICS_COUNTER_NET_RX_GRE_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_RX_GRE_CNT_DESC "Number of valid GRE packets received"
#define FD_METRICS_COUNTER_NET_RX_GRE_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_RX_GRE_INVALID_CNT_OFF (40UL)
+#define FD_METRICS_COUNTER_NET_RX_GRE_INVALID_CNT_OFF (41UL)
#define FD_METRICS_COUNTER_NET_RX_GRE_INVALID_CNT_NAME "net_rx_gre_invalid_cnt"
#define FD_METRICS_COUNTER_NET_RX_GRE_INVALID_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_RX_GRE_INVALID_CNT_DESC "Number of invalid GRE packets received"
#define FD_METRICS_COUNTER_NET_RX_GRE_INVALID_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_RX_GRE_IGNORED_CNT_OFF (41UL)
+#define FD_METRICS_COUNTER_NET_RX_GRE_IGNORED_CNT_OFF (42UL)
#define FD_METRICS_COUNTER_NET_RX_GRE_IGNORED_CNT_NAME "net_rx_gre_ignored_cnt"
#define FD_METRICS_COUNTER_NET_RX_GRE_IGNORED_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_RX_GRE_IGNORED_CNT_DESC "Number of received but ignored GRE packets"
#define FD_METRICS_COUNTER_NET_RX_GRE_IGNORED_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_TX_GRE_CNT_OFF (42UL)
+#define FD_METRICS_COUNTER_NET_TX_GRE_CNT_OFF (43UL)
#define FD_METRICS_COUNTER_NET_TX_GRE_CNT_NAME "net_tx_gre_cnt"
#define FD_METRICS_COUNTER_NET_TX_GRE_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
#define FD_METRICS_COUNTER_NET_TX_GRE_CNT_DESC "Number of GRE packet transmit jobs submitted"
#define FD_METRICS_COUNTER_NET_TX_GRE_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-#define FD_METRICS_COUNTER_NET_TX_GRE_ROUTE_FAIL_CNT_OFF (43UL)
-#define FD_METRICS_COUNTER_NET_TX_GRE_ROUTE_FAIL_CNT_NAME "net_tx_gre_route_fail_cnt"
-#define FD_METRICS_COUNTER_NET_TX_GRE_ROUTE_FAIL_CNT_TYPE (FD_METRICS_TYPE_COUNTER)
-#define FD_METRICS_COUNTER_NET_TX_GRE_ROUTE_FAIL_CNT_DESC "Number of GRE packets transmit jobs dropped due to route failure"
-#define FD_METRICS_COUNTER_NET_TX_GRE_ROUTE_FAIL_CNT_CVT (FD_METRICS_CONVERTER_NONE)
-
#define FD_METRICS_NET_TOTAL (28UL)
extern const fd_metrics_meta_t FD_METRICS_NET[FD_METRICS_NET_TOTAL];
diff --git a/src/disco/metrics/generated/fd_metrics_netlnk.c b/src/disco/metrics/generated/fd_metrics_netlnk.c
index 315e4bfbd7..b187412f31 100644
--- a/src/disco/metrics/generated/fd_metrics_netlnk.c
+++ b/src/disco/metrics/generated/fd_metrics_netlnk.c
@@ -11,8 +11,4 @@ const fd_metrics_meta_t FD_METRICS_NETLNK[FD_METRICS_NETLNK_TOTAL] = {
DECLARE_METRIC( NETLNK_INTERFACE_COUNT, GAUGE ),
DECLARE_METRIC_ENUM( NETLNK_ROUTE_COUNT, GAUGE, ROUTE_TABLE, LOCAL ),
DECLARE_METRIC_ENUM( NETLNK_ROUTE_COUNT, GAUGE, ROUTE_TABLE, MAIN ),
- DECLARE_METRIC( NETLNK_NEIGH_PROBE_SENT, COUNTER ),
- DECLARE_METRIC( NETLNK_NEIGH_PROBE_FAILS, COUNTER ),
- DECLARE_METRIC( NETLNK_NEIGH_PROBE_RATE_LIMIT_HOST, COUNTER ),
- DECLARE_METRIC( NETLNK_NEIGH_PROBE_RATE_LIMIT_GLOBAL, COUNTER ),
};
diff --git a/src/disco/metrics/generated/fd_metrics_netlnk.h b/src/disco/metrics/generated/fd_metrics_netlnk.h
index dd884acdb0..6c5d60d074 100644
--- a/src/disco/metrics/generated/fd_metrics_netlnk.h
+++ b/src/disco/metrics/generated/fd_metrics_netlnk.h
@@ -48,29 +48,5 @@
#define FD_METRICS_GAUGE_NETLNK_ROUTE_COUNT_LOCAL_OFF (23UL)
#define FD_METRICS_GAUGE_NETLNK_ROUTE_COUNT_MAIN_OFF (24UL)
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_SENT_OFF (25UL)
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_SENT_NAME "netlnk_neigh_probe_sent"
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_SENT_TYPE (FD_METRICS_TYPE_COUNTER)
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_SENT_DESC "Number of neighbor solicit requests sent to kernel"
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_SENT_CVT (FD_METRICS_CONVERTER_NONE)
-
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_FAILS_OFF (26UL)
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_FAILS_NAME "netlnk_neigh_probe_fails"
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_FAILS_TYPE (FD_METRICS_TYPE_COUNTER)
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_FAILS_DESC "Number of neighbor solicit requests that failed to send (kernel too slow)"
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_FAILS_CVT (FD_METRICS_CONVERTER_NONE)
-
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_RATE_LIMIT_HOST_OFF (27UL)
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_RATE_LIMIT_HOST_NAME "netlnk_neigh_probe_rate_limit_host"
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_RATE_LIMIT_HOST_TYPE (FD_METRICS_TYPE_COUNTER)
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_RATE_LIMIT_HOST_DESC "Number of neighbor solicit that exceeded the per-host rate limit"
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_RATE_LIMIT_HOST_CVT (FD_METRICS_CONVERTER_NONE)
-
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_RATE_LIMIT_GLOBAL_OFF (28UL)
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_RATE_LIMIT_GLOBAL_NAME "netlnk_neigh_probe_rate_limit_global"
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_RATE_LIMIT_GLOBAL_TYPE (FD_METRICS_TYPE_COUNTER)
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_RATE_LIMIT_GLOBAL_DESC "Number of neighbor solicit that exceeded the global rate limit"
-#define FD_METRICS_COUNTER_NETLNK_NEIGH_PROBE_RATE_LIMIT_GLOBAL_CVT (FD_METRICS_CONVERTER_NONE)
-
-#define FD_METRICS_NETLNK_TOTAL (13UL)
+#define FD_METRICS_NETLNK_TOTAL (9UL)
extern const fd_metrics_meta_t FD_METRICS_NETLNK[FD_METRICS_NETLNK_TOTAL];
diff --git a/src/disco/metrics/metrics.xml b/src/disco/metrics/metrics.xml
index 0de555fb6a..aa4cf9f59a 100644
--- a/src/disco/metrics/metrics.xml
+++ b/src/disco/metrics/metrics.xml
@@ -51,8 +51,13 @@ metric introduced.
+
+
+
+
+
-
+
@@ -63,8 +68,8 @@ metric introduced.
-
-
+
+
@@ -85,7 +90,6 @@ metric introduced.
-
@@ -893,10 +897,6 @@ metric introduced.
-
-
-
-
diff --git a/src/disco/net/Local.mk b/src/disco/net/Local.mk
index 0299677e5e..463cc1a639 100644
--- a/src/disco/net/Local.mk
+++ b/src/disco/net/Local.mk
@@ -2,3 +2,6 @@ ifdef FD_HAS_ALLOCA
$(call add-hdrs,fd_net_tile.h)
$(call add-objs,fd_net_tile_topo,fd_disco)
endif
+$(call add-hdrs,fd_find_16x16.h)
+$(call make-unit-test,test_find_16x16,test_find_16x16,fd_util)
+$(call run-unit-test,test_find_16x16)
diff --git a/src/disco/net/fd_find_16x16.h b/src/disco/net/fd_find_16x16.h
new file mode 100644
index 0000000000..f127a7ffda
--- /dev/null
+++ b/src/disco/net/fd_find_16x16.h
@@ -0,0 +1,47 @@
+#ifndef HEADER_fd_src_disco_net_fd_find_16x16_h
+#define HEADER_fd_src_disco_net_fd_find_16x16_h
+
+/* fd_find_16x16() provides an API to find an element in ushort[ 16 ].
+
+ If multiple elements match, returns the one at the lowest index.
+ If no element matched, returns 16. */
+
+#include "../../util/fd_util_base.h"
+
+#if FD_HAS_AVX
+
+#include "../../util/simd/fd_avx.h"
+
+static inline uint
+fd_find_16x16_avx( wu_t const ymm,
+ ushort x ) {
+ wc_t cmp_res = wh_eq( ymm, wh_bcast( x ) );
+ uint mask = (uint)_mm256_movemask_epi8( cmp_res );
+#if defined(__LZNCT__)
+ int lane_idx = _lzcnt_u32( mask ); /* lane_idx==32 if mask==0 */
+#else
+ int lane_idx = fd_uint_find_lsb_w_default( mask, 32 );
+#endif
+ return ((uint)lane_idx)>>1;
+}
+
+#endif
+
+static inline uint
+fd_find_16x16_generic( ushort const ele[ 16 ],
+ ushort x ) {
+ /* Generates surprisingly bad code on GCC 15 and Clang 20 */
+ uint i;
+ for( i=0; i<16; i++ ) {
+ if( ele[ i ]==x ) break;
+ }
+ return i;
+}
+
+#if FD_HAS_AVX
+static inline uint fd_find_16x16( ushort const ele[ 16 ], ushort x ) { return fd_find_16x16_avx( wu_ldu( ele ),x ); }
+#else
+#define fd_find_16x16 fd_find_16x16_generic
+#endif
+
+#endif /* HEADER_fd_src_disco_net_fd_find_16x16_h */
diff --git a/src/disco/net/fd_net_common.h b/src/disco/net/fd_net_common.h
deleted file mode 100644
index 91dc63b129..0000000000
--- a/src/disco/net/fd_net_common.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef HEADER_fd_src_disco_net_fd_net_common_h
-#define HEADER_fd_src_disco_net_fd_net_common_h
-
-/* fd_net_common.h contains common definitions across net tile implementations. */
-
-/* REPAIR_PING_SZ is the sz of a ping packet for the repair protocol.
- Because pings are routed to the same port as shreds without any
- discriminant encoding, we have to use the packet sz to interpret the
- payload. Note that any valid shred must be either FD_SHRED_MAX_SZ
- or FD_SHRED_MIN_SZ ie. will never be FD_REPAIR_PING_SZ.*/
-
-#define REPAIR_PING_SZ (174UL)
-
-#endif /* HEADER_fd_src_disco_net_fd_net_common_h */
diff --git a/src/disco/net/fd_net_router.h b/src/disco/net/fd_net_router.h
new file mode 100644
index 0000000000..fd00631536
--- /dev/null
+++ b/src/disco/net/fd_net_router.h
@@ -0,0 +1,101 @@
+#ifndef HEADER_fd_src_disco_net_fd_net_router_h
+#define HEADER_fd_src_disco_net_fd_net_router_h
+
+/* fd_net_router.h provides an internal API for userland routing. */
+
+#include "../../waltz/mib/fd_netdev_tbl.h"
+#include "../../waltz/ip/fd_fib4.h"
+#include "../../waltz/neigh/fd_neigh4_map.h"
+
+#include
+
+struct fd_net_router {
+ /* Route and neighbor tables */
+ fd_fib4_t const * fib_local;
+ fd_fib4_t const * fib_main;
+ fd_neigh4_hmap_t neigh4[1];
+ fd_netdev_tbl_join_t netdev_tbl;
+
+ uint if_idx;
+ uint bind_address;
+ uint default_address;
+};
+
+typedef struct fd_net_router fd_net_router_t;
+
+struct fd_net_next_hop {
+ uchar mac_addrs[12]; /* First 12 bytes of Ethernet header */
+ uint src_ip;
+
+ uint gre_src_ip;
+ uint gre_dst_ip;
+};
+
+typedef struct fd_net_next_hop fd_next_hop_t;
+
+/* FD_NET_HOP_* give the result types of a route lookup. */
+
+#define FD_NET_HOP_RAW 0
+#define FD_NET_HOP_GRE 1
+#define FD_NET_HOP_FALLBACK 2
+
+/* fd_net_tx_route routes an outgoing packet based on its destination IP
+ address. Returns an action FD_NET_HOP_*.
+
+ Saves out routing instructions to net_ctx->tx_op, including:
+ - XSK index
+ - source IP address
+ - source and dest MAC addresses
+ - GRE tunnelling info */
+
+static FD_FN_UNUSED uint
+fd_net_tx_route( fd_net_router_t const * router,
+ fd_next_hop_t * out,
+ uint dst_ip ) {
+ /* Route lookup */
+
+ fd_fib4_hop_t hop[2] = {0};
+ fd_fib4_lookup( router->fib_local, hop+0, dst_ip, 0UL );
+ fd_fib4_lookup( router->fib_main, hop+1, dst_ip, 0UL );
+ fd_fib4_hop_t const * next_hop = fd_fib4_hop_or( hop+0, hop+1 );
+
+ uint rtype = next_hop->rtype;
+ uint if_idx = next_hop->if_idx;
+ uint ip4_src = next_hop->ip4_src;
+
+ if( FD_UNLIKELY( rtype!=FD_FIB4_RTYPE_UNICAST ) ) return FD_NET_HOP_FALLBACK;
+ if( FD_UNLIKELY( if_idx>router->netdev_tbl.hdr->dev_cnt ) ) return FD_NET_HOP_FALLBACK;
+ fd_netdev_t const * netdev = &router->netdev_tbl.dev_tbl[ if_idx ];
+
+ ip4_src = fd_uint_if( !!router->bind_address, router->bind_address, ip4_src );
+ out->src_ip = ip4_src;
+
+ if( netdev->dev_type==ARPHRD_IPGRE ) {
+ /* Packet targets a GRE tunnel */
+ if( netdev->gre_src_ip ) out->gre_src_ip = netdev->gre_src_ip;
+ out->gre_dst_ip = netdev->gre_dst_ip;
+ return FD_NET_HOP_GRE;
+ }
+
+ if( FD_UNLIKELY( if_idx!=router->if_idx ) ) return FD_NET_HOP_FALLBACK;
+
+ /* Neighbor resolve */
+ uint neigh_ip = next_hop->ip4_gw;
+ if( !neigh_ip ) neigh_ip = dst_ip;
+
+ fd_neigh4_hmap_query_t neigh_query[1];
+ int neigh_res = fd_neigh4_hmap_query_try( router->neigh4, &neigh_ip, NULL, neigh_query, 0 );
+ if( FD_UNLIKELY( neigh_res!=FD_MAP_SUCCESS ) ) return FD_NET_HOP_FALLBACK;
+ fd_neigh4_entry_t const * neigh = fd_neigh4_hmap_query_ele_const( neigh_query );
+ if( FD_UNLIKELY( neigh->state != FD_NEIGH4_STATE_ACTIVE ) ) return FD_NET_HOP_FALLBACK;
+ ip4_src = fd_uint_if( !ip4_src, router->default_address, ip4_src );
+ out->src_ip = ip4_src;
+ memcpy( out->mac_addrs+0, neigh->mac_addr, 6 );
+ memcpy( out->mac_addrs+6, netdev->mac_addr, 6 );
+
+ if( FD_UNLIKELY( fd_neigh4_hmap_query_test( neigh_query ) ) ) return FD_NET_HOP_FALLBACK;
+
+ return FD_NET_HOP_RAW;
+}
+
+#endif /* HEADER_fd_src_disco_net_xdp_fd_xdp_route_h */
diff --git a/src/disco/net/fd_net_tile.h b/src/disco/net/fd_net_tile.h
index 2519f7ae68..dc3b1d157f 100644
--- a/src/disco/net/fd_net_tile.h
+++ b/src/disco/net/fd_net_tile.h
@@ -101,11 +101,10 @@ fd_topos_tile_in_net( fd_topo_t * topo,
int polled );
/* This should be called *after* all app<->net tile links have been
- created. Should be called once per net tile. */
+ created, and all flow steering rules have been set up. */
void
-fd_topos_net_tile_finish( fd_topo_t * topo,
- ulong net_kind_id );
+fd_topos_net_tile_finish( fd_topo_t * topo );
FD_PROTOTYPES_END
diff --git a/src/disco/net/fd_net_tile_topo.c b/src/disco/net/fd_net_tile_topo.c
index 1c198ca0eb..b544d3af89 100644
--- a/src/disco/net/fd_net_tile_topo.c
+++ b/src/disco/net/fd_net_tile_topo.c
@@ -15,9 +15,6 @@ setup_xdp_tile( fd_topo_t * topo,
ulong const * tile_to_cpu,
fd_config_net_t const * net_cfg ) {
fd_topo_tile_t * tile = fd_topob_tile( topo, "net", "net", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 );
- fd_topob_link( topo, "net_netlnk", "net_netlnk", 128UL, 0UL, 0UL );
- fd_topob_tile_in( topo, "netlnk", 0UL, "metric_in", "net_netlnk", i, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
- fd_topob_tile_out( topo, "net", i, "net_netlnk", i );
fd_netlink_topo_join( topo, netlink_tile, tile );
fd_topo_obj_t * umem_obj = fd_topob_obj( topo, "dcache", "net_umem" );
@@ -45,10 +42,6 @@ setup_xdp_tile( fd_topo_t * topo,
/* Allocate free ring */
tile->xdp.free_ring_depth = tile->xdp.xdp_tx_queue_size;
- if( i==0 ) {
- /* Allocate additional frames for loopback */
- tile->xdp.free_ring_depth += 16384UL;
- }
}
static void
@@ -77,6 +70,7 @@ fd_topos_net_tiles( fd_topo_t * topo,
/* Create workspaces */
+ fd_pod_insert_cstr( topo->props, "net.provider", net_cfg->provider );
if( 0==strcmp( net_cfg->provider, "xdp" ) ) {
/* net: private working memory of the net tiles */
@@ -85,14 +79,20 @@ fd_topos_net_tiles( fd_topo_t * topo,
fd_topob_wksp( topo, "netlnk" );
/* netbase: shared network config (config plane) */
fd_topob_wksp( topo, "netbase" );
- /* net_netlnk: net->netlnk ARP requests */
- fd_topob_wksp( topo, "net_netlnk" );
+ /* sock: private working memory of the fallback sock tile */
+ fd_topob_wksp( topo, "sock" );
+ /* net_sock: net->sock link for TX fallback */
+ fd_topob_wksp( topo, "net_sock" );
fd_topo_tile_t * netlink_tile = fd_topob_tile( topo, "netlnk", "netlnk", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 );
fd_netlink_topo_create( netlink_tile, topo, netlnk_max_routes, netlnk_max_peer_routes, netlnk_max_neighbors, net_cfg->interface );
+ setup_sock_tile( topo, tile_to_cpu, net_cfg );
for( ulong i=0UL; iprovider, "socket" ) ) {
@@ -181,24 +181,14 @@ fd_topos_tile_in_net( fd_topo_t * topo,
}
}
-void
-fd_topos_net_tile_finish( fd_topo_t * topo,
- ulong net_kind_id ) {
- if( !topo_is_xdp( topo ) ) return;
-
- fd_topo_tile_t * net_tile = &topo->tiles[ fd_topo_find_tile( topo, "net", net_kind_id ) ];
-
+static void
+fd_topos_xdp_tile_finish( fd_topo_t * topo,
+ fd_topo_tile_t * net_tile ) {
ulong rx_depth = net_tile->xdp.xdp_rx_queue_size;
ulong tx_depth = net_tile->xdp.xdp_tx_queue_size;
rx_depth += (rx_depth/2UL);
tx_depth += (tx_depth/2UL);
- if( net_kind_id==0 ) {
- /* Double it for loopback XSK */
- rx_depth *= 2UL;
- tx_depth *= 2UL;
- }
-
ulong cum_frame_cnt = rx_depth + tx_depth;
/* Count up the depth of all RX mcaches */
@@ -213,7 +203,7 @@ fd_topos_net_tile_finish( fd_topo_t * topo,
/* Create a dcache object */
- ulong umem_obj_id = fd_pod_queryf_ulong( topo->props, ULONG_MAX, "net.%lu.umem", net_kind_id );
+ ulong umem_obj_id = fd_pod_queryf_ulong( topo->props, ULONG_MAX, "net.%lu.umem", net_tile->kind_id );
FD_TEST( umem_obj_id!=ULONG_MAX );
FD_TEST( net_tile->net.umem_dcache_obj_id > 0 );
@@ -221,3 +211,16 @@ fd_topos_net_tile_finish( fd_topo_t * topo,
fd_pod_insertf_ulong( topo->props, 2UL, "obj.%lu.burst", umem_obj_id ); /* 4096 byte padding */
fd_pod_insertf_ulong( topo->props, 2048UL, "obj.%lu.mtu", umem_obj_id );
}
+
+void
+fd_topos_net_tile_finish( fd_topo_t * topo ) {
+ for( ulong i=0UL; i<(topo->tile_cnt); i++ ) {
+ if( 0==strcmp( topo->tiles[ i ].name, "net" ) ) {
+ fd_topos_xdp_tile_finish( topo, &topo->tiles[ i ] );
+ }
+ }
+
+ /* All net providers except "socket" use the sock tile as a fallback.
+ This means that the sock tile should steer all packets it receives
+ back to the high-performance tile. */
+}
diff --git a/src/disco/net/sock/fd_sock_tile.c b/src/disco/net/sock/fd_sock_tile.c
index 65cfc53b17..6ed1d1d4a4 100644
--- a/src/disco/net/sock/fd_sock_tile.c
+++ b/src/disco/net/sock/fd_sock_tile.c
@@ -1,6 +1,5 @@
#define _GNU_SOURCE /* dup3 */
#include "fd_sock_tile_private.h"
-#include "../fd_net_common.h"
#include "../../topo/fd_topo.h"
#include "../../../util/net/fd_eth.h"
#include "../../../util/net/fd_ip4.h"
@@ -29,11 +28,6 @@
Must be aligned by alignof(struct cmsghdr) */
#define FD_SOCK_CMSG_MAX (64UL)
-/* Value of the sock_idx for Firedancer repair intake.
- Used to determine whether repair packets should go to shred vs repair tile.
- This value is validated at startup. */
-#define REPAIR_SHRED_SOCKET_ID (4U)
-
static ulong
populate_allowed_seccomp( fd_topo_t const * topo,
fd_topo_tile_t const * tile,
@@ -185,49 +179,18 @@ privileged_init( fd_topo_t * topo,
descriptors starting at sock_fd_min. */
int sock_fd_min = RX_SOCK_FD_MIN;
- ushort udp_port_candidates[] = {
- (ushort)tile->sock.net.legacy_transaction_listen_port,
- (ushort)tile->sock.net.quic_transaction_listen_port,
- (ushort)tile->sock.net.shred_listen_port,
- (ushort)tile->sock.net.gossip_listen_port,
- (ushort)tile->sock.net.repair_intake_listen_port,
- (ushort)tile->sock.net.repair_serve_listen_port,
- (ushort)tile->sock.net.send_src_port
- };
- static char const * udp_port_links[] = {
- "net_quic", /* legacy_transaction_listen_port */
- "net_quic", /* quic_transaction_listen_port */
- "net_shred", /* shred_listen_port (turbine) */
- "net_gossip", /* gossip_listen_port */
- "net_shred", /* shred_listen_port (repair) */
- "net_repair", /* repair_serve_listen_port */
- "net_send" /* send_src_port */
- };
- static uchar const udp_port_protos[] = {
- DST_PROTO_TPU_UDP, /* legacy_transaction_listen_port */
- DST_PROTO_TPU_QUIC, /* quic_transaction_listen_port */
- DST_PROTO_SHRED, /* shred_listen_port (turbine) */
- DST_PROTO_GOSSIP, /* gossip_listen_port */
- DST_PROTO_REPAIR, /* shred_listen_port (repair) */
- DST_PROTO_REPAIR /* repair_serve_listen_port */
- };
- for( uint candidate_idx=0U; candidate_idx<6; candidate_idx++ ) {
- if( !udp_port_candidates[ candidate_idx ] ) continue;
- uint sock_idx = ctx->sock_cnt;
- if( candidate_idx>FD_SOCK_TILE_MAX_SOCKETS ) FD_LOG_ERR(( "too many sockets" ));
- ushort port = (ushort)udp_port_candidates[ candidate_idx ];
-
- /* Validate value of REPAIR_SHRED_SOCKET_ID */
- if( udp_port_candidates[sock_idx]==tile->sock.net.repair_intake_listen_port )
- FD_TEST( sock_idx==REPAIR_SHRED_SOCKET_ID );
- if( udp_port_candidates[sock_idx]==tile->sock.net.repair_serve_listen_port )
- FD_TEST( sock_idx==REPAIR_SHRED_SOCKET_ID+1 );
-
- char const * target_link = udp_port_links[ candidate_idx ];
+ ulong const rule_cnt = tile->net.rx_rules.rx_rule_cnt;
+ if( FD_UNLIKELY( !rule_cnt ) ) FD_LOG_ERR(( "sock tile has no RX rules" ));
+ for( uint rule_idx=0U; rule_idxnet.rx_rules.rx_rules[ rule_idx ];
+ uint const sock_idx = ctx->sock_cnt;
+ ushort const port = (ushort)rule->port;
+
+ char const * target_link = rule->link;
ctx->link_rx_map[ sock_idx ] = 0xFF;
for( ulong j=0UL; j<(tile->out_cnt); j++ ) {
if( 0==strcmp( topo->links[ tile->out_link_id[ j ] ].name, target_link ) ) {
- ctx->proto_id [ sock_idx ] = (uchar)udp_port_protos[ candidate_idx ];
+ ctx->proto_id [ sock_idx ] = (uchar)rule->proto_id;
ctx->link_rx_map [ sock_idx ] = (uchar)j;
ctx->rx_sock_port[ sock_idx ] = (ushort)port;
break;
@@ -242,6 +205,8 @@ privileged_init( fd_topo_t * topo,
ctx->pollfd[ sock_idx ].fd = sock_fd;
ctx->pollfd[ sock_idx ].events = POLLIN;
ctx->sock_cnt++;
+ FD_LOG_INFO(( "Listening on " FD_IP4_ADDR_FMT ":%hu",
+ FD_IP4_ADDR_FMT_ARGS( tile->sock.net.bind_address ), port ));
}
/* Create transmit socket */
@@ -285,7 +250,9 @@ unprivileged_init( fd_topo_t * topo,
}
for( ulong i=0UL; i<(tile->in_cnt); i++ ) {
- if( !strstr( topo->links[ tile->in_link_id[ i ] ].name, "_net" ) ) {
+ char const * link_name = topo->links[ tile->in_link_id[ i ] ].name;
+ if( !strstr( link_name, "_net" ) &&
+ !strstr( link_name, "_sock" ) ) {
FD_LOG_ERR(( "in link %lu is not a net TX link", i ));
}
fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
@@ -411,18 +378,7 @@ poll_rx_socket( fd_sock_tile_t * ctx,
ulong sig = fd_disco_netmux_sig( sa->sin_addr.s_addr, fd_ushort_bswap( sa->sin_port ), sa->sin_addr.s_addr, proto, hdr_sz );
ulong tspub = fd_frag_meta_ts_comp( ts );
- /* default for repair intake is to send to [shreds] to shred tile.
- ping messages should be routed to the repair. */
- if( FD_UNLIKELY( sock_idx==REPAIR_SHRED_SOCKET_ID && frame_sz==REPAIR_PING_SZ ) ) {
- uchar repair_rx_link = ctx->link_rx_map[ REPAIR_SHRED_SOCKET_ID+1 ];
- fd_sock_link_rx_t * repair_link = ctx->link_rx + repair_rx_link;
- uchar * repair_buf = fd_chunk_to_laddr( repair_link->base, repair_link->chunk );
- memcpy( repair_buf, eth_hdr, frame_sz );
- fd_stem_publish( stem, repair_rx_link, sig, repair_link->chunk, frame_sz, 0UL, 0UL, tspub );
- repair_link->chunk = fd_dcache_compact_next( repair_link->chunk, FD_NET_MTU, repair_link->chunk0, repair_link->wmark );
- } else {
- fd_stem_publish( stem, rx_link, sig, chunk, frame_sz, 0UL, 0UL, tspub );
- }
+ fd_stem_publish( stem, rx_link, sig, chunk, frame_sz, 0UL, 0UL, tspub );
last_chunk = chunk;
}
diff --git a/src/disco/net/sock/fd_sock_tile_private.h b/src/disco/net/sock/fd_sock_tile_private.h
index d404940141..1865df789d 100644
--- a/src/disco/net/sock/fd_sock_tile_private.h
+++ b/src/disco/net/sock/fd_sock_tile_private.h
@@ -3,7 +3,7 @@
#if FD_HAS_HOSTED
-#include "../../../util/fd_util_base.h"
+#include "../../topo/fd_topo.h"
#include "../../metrics/generated/fd_metrics_enums.h"
#include
#include
@@ -11,7 +11,7 @@
/* FD_SOCK_TILE_MAX_SOCKETS controls the max number of UDP ports that a
sock tile can bind to. */
-#define FD_SOCK_TILE_MAX_SOCKETS (8)
+#define FD_SOCK_TILE_MAX_SOCKETS FD_TOPO_NET_RX_RULE_MAX
/* MAX_NET_INS controls the max number of TX links that a sock tile can
serve. */
diff --git a/src/disco/net/test_find_16x16.c b/src/disco/net/test_find_16x16.c
new file mode 100644
index 0000000000..7bb419553e
--- /dev/null
+++ b/src/disco/net/test_find_16x16.c
@@ -0,0 +1,28 @@
+/* Bit wasteful to have this as a separate test executable, consider
+ merging this with another test. */
+
+#include "../../util/fd_util.h"
+#include "fd_find_16x16.h"
+
+int
+main( int argc,
+ char ** argv ) {
+ fd_boot( &argc, &argv );
+
+ ushort ti[16];
+#define INIT_TI( EXPR ) do { for( ulong j=0UL; j<16UL; j++ ) { ti[j] = (EXPR); } } while( 0 )
+
+ INIT_TI( 0 );
+ FD_TEST( fd_find_16x16( ti, 0 )==0 );
+ for( ulong j=0UL; j<16UL; j++ ) {
+ ti[ j ] = (ushort)( USHORT_MAX-j );
+ FD_TEST( fd_find_16x16( ti, 0 )==j+1UL );
+ FD_TEST( fd_find_16x16( ti, ti[ j ] )==j );
+ }
+
+#undef INIT_TI
+
+ FD_LOG_NOTICE(( "pass" ));
+ fd_halt();
+ return 0;
+}
diff --git a/src/disco/net/xdp/fd_xdp_tile.c b/src/disco/net/xdp/fd_xdp_tile.c
index f54b9d2eed..1582df9652 100644
--- a/src/disco/net/xdp/fd_xdp_tile.c
+++ b/src/disco/net/xdp/fd_xdp_tile.c
@@ -1,6 +1,16 @@
-/* The xdp tile translates between AF_XDP and fd_tango
- traffic. It is responsible for setting up the XDP and
- XSK socket configuration. */
+/* The xdp tile translates between AF_XDP and fd_tango traffic. It is
+ responsible for setting up the XDP and XSK socket configuration.
+
+ ┌──────┐
+ RX │ sock │
+ ┌─────────┼ tile │
+ │ └─▲────┘
+ │ TX│
+ │ │
+ ┌───▼──┐ TX ┌─┴────┐ TX ┌──────┐
+ │ quic ├─────►│ xdp ├─────►│ UMEM │
+ │ tile │◄─────┤ tile │◄─────┤ XSK │
+ └──────┘ RX └──────┘ RX └──────┘ */
#include
#include
@@ -9,9 +19,10 @@
#include /* MSG_DONTWAIT needed before importing the net seccomp filter */
#include
-#include "../fd_net_common.h"
+#include "../fd_find_16x16.h"
+#include "../fd_net_router.h"
#include "../../metrics/fd_metrics.h"
-#include "../../netlink/fd_netlink_tile.h" /* neigh4_solicit */
+#include "../../netlink/fd_netlink_tile.h"
#include "../../topo/fd_topo.h"
#include "../../../waltz/ip/fd_fib4.h"
@@ -38,6 +49,13 @@
#define MAX_NET_INS (32UL)
+/* MAX_NET_OUTS controls the max number of RX-to-tango forwarding links
+ that a net tile can serve. Also bounds the number of UDP listen
+ ports. Not trivial to change because of algorithms optimized for
+ this particular value (fd_find_16x16). */
+
+#define MAX_NET_OUTS (16UL)
+
/* FD_XDP_STATS_INTERVAL_NS controls the XDP stats refresh interval.
This should be lower than the interval at which the metrics tile
collects metrics. */
@@ -48,7 +66,6 @@
Only net tile 0 has XSK_IDX_LO, all net tiles have XSK_IDX_MAIN. */
#define XSK_IDX_MAIN 0
-#define XSK_IDX_LO 1
/* fd_net_in_ctx_t contains consumer information for an incoming tango
link. It is used as part of the TX path. */
@@ -158,11 +175,17 @@ struct fd_net_free_ring {
};
typedef struct fd_net_free_ring fd_net_free_ring_t;
+struct fd_xdp_rx_rule {
+ ushort port;
+ ushort out_idx;
+};
+typedef struct fd_xdp_rx_rule fd_xdp_rx_rule_t;
+
typedef struct {
/* An "XSK" is an AF_XDP socket */
uint xsk_cnt;
- fd_xsk_t xsk[ 2 ];
- int prog_link_fds[ 2 ];
+ fd_xsk_t xsk[ 1 ];
+ int prog_link_fds[ 1 ];
/* UMEM frame region within dcache */
void * umem_frame0; /* First UMEM frame */
@@ -181,16 +204,10 @@ typedef struct {
uint net_tile_cnt;
/* Details pertaining to an inflight send op */
- struct {
- uint xsk_idx;
- void * frame;
- uchar mac_addrs[12]; /* First 12 bytes of Ethernet header */
- uint src_ip; /* src_ip in net order */
-
- uint use_gre; /* The tx packet will be GRE-encapsulated */
- uint gre_outer_src_ip; /* For GRE: Outer iphdr's src_ip in net order */
- uint gre_outer_dst_ip; /* For GRE: Outer iphdr's dst_ip in net order */
- } tx_op;
+ uchar * tx_frame;
+ fd_next_hop_t next_hop;
+ uint tx_action;
+ uint tx_ok : 1;
/* Round-robin cycle serivce operations */
uint rr_idx;
@@ -198,26 +215,40 @@ typedef struct {
/* Ring tracking free packet buffers */
fd_net_free_ring_t free_tx;
- uchar src_mac_addr[6];
+ uint default_address;
+ uint bind_address;
+
+ /* RX flow steering (by UDP ports) */
+ uint rx_port_cnt; /* in [0,MAX_NET_OUTS) */
- uint default_address;
- uint bind_address;
- ushort shred_listen_port;
- ushort quic_transaction_listen_port;
- ushort legacy_transaction_listen_port;
- ushort gossip_listen_port;
- ushort repair_intake_listen_port;
- ushort repair_serve_listen_port;
- ushort send_src_port;
+ union {
+#if FD_HAS_AVX
+ wh_t wh[1]; /* forces alignment */
+#endif
+ ushort h[16];
+ } rx_port_keys;
+
+ struct {
+ uchar dst_proto;
+ uchar out_link_idx;
+ } rx_port_vals[ MAX_NET_OUTS ];
+ /* Tango out links. Only initialized for RX packet links, not for
+ management plane links (e.g. netlink). */
+ fd_net_out_ctx_t out[ MAX_NET_OUTS ];
+
+ /* Tango in links (for TX packet jobs) */
ulong in_cnt;
fd_net_in_ctx_t in[ MAX_NET_INS ];
- fd_net_out_ctx_t quic_out[1];
- fd_net_out_ctx_t shred_out[1];
- fd_net_out_ctx_t gossip_out[1];
- fd_net_out_ctx_t repair_out[1];
- fd_net_out_ctx_t send_out[1];
+ /* Fallback out link (for TX packet jobs that can't be handled with XDP) */
+ struct {
+ uint out_idx;
+ void * out_base;
+ ulong chunk0;
+ ulong wmark;
+ ulong chunk;
+ } fallback;
/* XDP stats refresh timer */
long xdp_stats_interval_ticks;
@@ -226,21 +257,18 @@ typedef struct {
/* TX flush timers */
fd_net_flusher_t tx_flusher[2]; /* one per XSK */
- /* Route and neighbor tables */
- fd_fib4_t const * fib_local;
- fd_fib4_t const * fib_main;
- fd_neigh4_hmap_t neigh4[1];
- fd_netlink_neigh4_solicit_link_t neigh4_solicit[1];
+ /* Routing configuration (device, route, neighbor tables) */
+ fd_net_router_t router;
/* Netdev table */
- fd_dbl_buf_t * netdev_dbl_buf; /* remote copy of device table */
- uchar * netdev_buf; /* local copy of device table */
- ulong netdev_buf_sz;
- fd_netdev_tbl_join_t netdev_tbl; /* join to local copy of device table */
- int has_gre_interface; /* enable GRE support? */
+ fd_dbl_buf_t * netdev_dbl_buf; /* remote copy of device table */
+ uchar * netdev_buf; /* local copy of device table */
+ ulong netdev_buf_sz;
+ int has_gre_interface; /* enable GRE support? */
struct {
- ulong rx_pkt_cnt;
+ ulong rx_pkt_cnt_ip4_udp;
+ ulong rx_pkt_cnt_ip4_opt_udp;
ulong rx_bytes_total;
ulong rx_undersz_cnt;
ulong rx_fill_blocked_cnt;
@@ -251,9 +279,8 @@ typedef struct {
ulong tx_submit_cnt;
ulong tx_complete_cnt;
ulong tx_bytes_total;
- ulong tx_route_fail_cnt;
- ulong tx_no_xdp_cnt;
- ulong tx_neigh_fail_cnt;
+ ulong tx_corrupt_cnt;
+ ulong tx_fallback_cnt;
ulong tx_full_fail_cnt;
long tx_busy_cnt;
long tx_idle_cnt;
@@ -265,7 +292,6 @@ typedef struct {
ulong rx_gre_ignored_cnt;
ulong rx_gre_inv_pkt_cnt;
ulong tx_gre_cnt;
- ulong tx_gre_route_fail_cnt;
} metrics;
} fd_net_ctx_t;
@@ -285,7 +311,8 @@ scratch_footprint( fd_topo_tile_t const * tile ) {
static void
metrics_write( fd_net_ctx_t * ctx ) {
- FD_MCNT_SET( NET, RX_PKT_CNT, ctx->metrics.rx_pkt_cnt );
+ FD_MCNT_SET( NET, RX_PKT_CNT_IP4_UDP, ctx->metrics.rx_pkt_cnt_ip4_udp );
+ FD_MCNT_SET( NET, RX_PKT_CNT_IP4_OPT_UDP, ctx->metrics.rx_pkt_cnt_ip4_opt_udp );
FD_MCNT_SET( NET, RX_BYTES_TOTAL, ctx->metrics.rx_bytes_total );
FD_MCNT_SET( NET, RX_UNDERSZ_CNT, ctx->metrics.rx_undersz_cnt );
FD_MCNT_SET( NET, RX_FILL_BLOCKED_CNT, ctx->metrics.rx_fill_blocked_cnt );
@@ -295,21 +322,20 @@ metrics_write( fd_net_ctx_t * ctx ) {
FD_MGAUGE_SET( NET, TX_BUSY_CNT, (ulong)fd_long_max( ctx->metrics.tx_busy_cnt, 0L ) );
FD_MGAUGE_SET( NET, TX_IDLE_CNT, (ulong)fd_long_max( ctx->metrics.tx_idle_cnt, 0L ) );
- FD_MCNT_SET( NET, TX_SUBMIT_CNT, ctx->metrics.tx_submit_cnt );
- FD_MCNT_SET( NET, TX_COMPLETE_CNT, ctx->metrics.tx_complete_cnt );
- FD_MCNT_SET( NET, TX_BYTES_TOTAL, ctx->metrics.tx_bytes_total );
- FD_MCNT_SET( NET, TX_ROUTE_FAIL_CNT, ctx->metrics.tx_route_fail_cnt );
- FD_MCNT_SET( NET, TX_NEIGHBOR_FAIL_CNT, ctx->metrics.tx_neigh_fail_cnt );
- FD_MCNT_SET( NET, TX_FULL_FAIL_CNT, ctx->metrics.tx_full_fail_cnt );
-
- FD_MCNT_SET( NET, XSK_TX_WAKEUP_CNT, ctx->metrics.xsk_tx_wakeup_cnt );
- FD_MCNT_SET( NET, XSK_RX_WAKEUP_CNT, ctx->metrics.xsk_rx_wakeup_cnt );
-
- FD_MCNT_SET( NET, RX_GRE_CNT, ctx->metrics.rx_gre_cnt );
- FD_MCNT_SET( NET, RX_GRE_INVALID_CNT, ctx->metrics.rx_gre_inv_pkt_cnt );
- FD_MCNT_SET( NET, RX_GRE_IGNORED_CNT, ctx->metrics.rx_gre_ignored_cnt );
- FD_MCNT_SET( NET, TX_GRE_CNT, ctx->metrics.tx_gre_cnt );
- FD_MCNT_SET( NET, TX_GRE_ROUTE_FAIL_CNT, ctx->metrics.tx_gre_route_fail_cnt );
+ FD_MCNT_SET( NET, TX_SUBMIT_CNT, ctx->metrics.tx_submit_cnt );
+ FD_MCNT_SET( NET, TX_COMPLETE_CNT, ctx->metrics.tx_complete_cnt );
+ FD_MCNT_SET( NET, TX_BYTES_TOTAL, ctx->metrics.tx_bytes_total );
+ FD_MCNT_SET( NET, TX_CORRUPT_CNT, ctx->metrics.tx_corrupt_cnt );
+ FD_MCNT_SET( NET, TX_FALLBACK_CNT, ctx->metrics.tx_fallback_cnt );
+ FD_MCNT_SET( NET, TX_FULL_FAIL_CNT, ctx->metrics.tx_full_fail_cnt );
+
+ FD_MCNT_SET( NET, XSK_TX_WAKEUP_CNT, ctx->metrics.xsk_tx_wakeup_cnt );
+ FD_MCNT_SET( NET, XSK_RX_WAKEUP_CNT, ctx->metrics.xsk_rx_wakeup_cnt );
+
+ FD_MCNT_SET( NET, RX_GRE_CNT, ctx->metrics.rx_gre_cnt );
+ FD_MCNT_SET( NET, RX_GRE_INVALID_CNT, ctx->metrics.rx_gre_inv_pkt_cnt );
+ FD_MCNT_SET( NET, RX_GRE_IGNORED_CNT, ctx->metrics.rx_gre_ignored_cnt );
+ FD_MCNT_SET( NET, TX_GRE_CNT, ctx->metrics.tx_gre_cnt );
}
struct xdp_statistics_v0 {
@@ -376,35 +402,19 @@ net_load_netdev_tbl( fd_net_ctx_t * ctx ) {
if( FD_UNLIKELY( !fd_dbl_buf_read( ctx->netdev_dbl_buf, ctx->netdev_buf_sz, ctx->netdev_buf, NULL ) ) ) return;
/* Join local copy */
- if( FD_UNLIKELY( !fd_netdev_tbl_join( &ctx->netdev_dbl_buf, ctx->netdev_buf ) ) ) FD_LOG_ERR(("netdev table join failed"));
-}
-
-/* Query the netdev table. Return a fd_netdev_t pointer to the net device of the
-interface specified by if_idx. Null if the if_idx is invalid */
-
-static fd_netdev_t *
-net_query_netdev_tbl( fd_net_ctx_t * ctx,
- uint if_idx ) {
- /* dev_tbl is one-indexed */
- if( if_idx>ctx->netdev_tbl.hdr->dev_cnt ) return NULL;
- return &ctx->netdev_tbl.dev_tbl[ if_idx ];
-}
-
-/* Iterates the netdev table and returns 1 if a GRE interface exists, 0 otherwise.
- Only called in privileged_init and during_housekeeping */
-
-static int
-net_check_gre_interface_exists( fd_net_ctx_t * ctx ) {
- fd_netdev_t * dev_tbl = ctx->netdev_tbl.dev_tbl;
- ushort dev_cnt = ctx->netdev_tbl.hdr->dev_cnt;
+ if( FD_UNLIKELY( !fd_netdev_tbl_join( &ctx->netdev_dbl_buf, ctx->netdev_buf ) ) ) {
+ FD_LOG_ERR(( "fd_netdev_tbl_join: received invalid device table copy" ));
+ }
+ /* Remember if GRE routing is enabled */
+ ctx->has_gre_interface = 0;
+ fd_netdev_t const * dev_tbl = ctx->router.netdev_tbl.dev_tbl;
+ ulong dev_cnt = ctx->router.netdev_tbl.hdr->dev_cnt;
for( ushort if_idx = 0; if_idxhas_gre_interface = 1;
}
- return 0;
}
-
/* net_tx_ready returns 1 if the current XSK is ready to submit a TX send
job. If the XSK is blocked for sends, returns 0. Reasons for block
include:
@@ -493,7 +503,6 @@ static void
during_housekeeping( fd_net_ctx_t * ctx ) {
long now = fd_tickcount();
net_load_netdev_tbl( ctx );
- ctx->has_gre_interface = net_check_gre_interface_exists( ctx );
ctx->metrics.rx_busy_cnt = 0UL;
ctx->metrics.rx_idle_cnt = 0UL;
@@ -524,103 +533,10 @@ during_housekeeping( fd_net_ctx_t * ctx ) {
}
}
-
-/* net_tx_route resolves the xsk index, src ip address, src MAC address, and
- dst MAC address. Returns 1 on success, 0 on failure.
- On success, tx_op->{xsk_idx,src_ip,mac_addrs} is set, and if the dst_ip
- belongs to a GRE interface, is_gre_inf will set to 1 and
- tx_op->{gre_outer_src_ip, gre_outer_dst_ip} will be loaded from the netdev
- table. is_gre_inf is set to 0 if dst_ip doesn't belong to a GRE interface. */
-
-static int
+static uint
net_tx_route( fd_net_ctx_t * ctx,
- uint dst_ip,
- uint * is_gre_inf ) {
-
- /* Route lookup */
-
- fd_fib4_hop_t hop[2] = {0};
- fd_fib4_lookup( ctx->fib_local, hop+0, dst_ip, 0UL );
- fd_fib4_lookup( ctx->fib_main, hop+1, dst_ip, 0UL );
- fd_fib4_hop_t const * next_hop = fd_fib4_hop_or( hop+0, hop+1 );
-
- uint rtype = next_hop->rtype;
- uint if_idx = next_hop->if_idx;
- uint ip4_src = next_hop->ip4_src;
-
- if( FD_UNLIKELY( rtype==FD_FIB4_RTYPE_LOCAL ) ) {
- rtype = FD_FIB4_RTYPE_UNICAST;
- if_idx = 1;
- }
-
- if( FD_UNLIKELY( rtype!=FD_FIB4_RTYPE_UNICAST ) ) {
- ctx->metrics.tx_route_fail_cnt++;
- return 0;
- }
-
- fd_netdev_t * netdev = net_query_netdev_tbl( ctx, if_idx );
- if( !netdev ) {
- ctx->metrics.tx_route_fail_cnt++;
- return 0;
- }
-
- ip4_src = fd_uint_if( !!ctx->bind_address, ctx->bind_address, ip4_src );
- ctx->tx_op.src_ip = ip4_src;
- ctx->tx_op.xsk_idx = UINT_MAX;
-
- FD_TEST( is_gre_inf );
- *is_gre_inf = 0;
- if( netdev->dev_type==ARPHRD_LOOPBACK ) {
- /* Set Ethernet src and dst address to 00:00:00:00:00:00 */
- memset( ctx->tx_op.mac_addrs, 0, 12UL );
- ctx->tx_op.xsk_idx = XSK_IDX_LO;
- /* Set preferred src address to 127.0.0.1 if no bind address is set */
- if( !ctx->tx_op.src_ip ) ctx->tx_op.src_ip = FD_IP4_ADDR( 127,0,0,1 );
- return 1;
- } else if( netdev->dev_type==ARPHRD_IPGRE ) {
- /* skip MAC addrs lookup for GRE inner dst ip */
- if( netdev->gre_src_ip ) ctx->tx_op.gre_outer_src_ip = netdev->gre_src_ip;
- ctx->tx_op.gre_outer_dst_ip = netdev->gre_dst_ip;
- *is_gre_inf = 1;
- return 1;
- }
-
- if( FD_UNLIKELY( netdev->dev_type!=ARPHRD_ETHER ) ) return 0; // drop
-
- if( FD_UNLIKELY( if_idx!=ctx->xsk[ XSK_IDX_MAIN ].if_idx ) ) {
- ctx->metrics.tx_no_xdp_cnt++;
- return 0;
- }
- ctx->tx_op.xsk_idx = XSK_IDX_MAIN;
-
- /* Neighbor resolve */
- uint neigh_ip = next_hop->ip4_gw;
- if( !neigh_ip ) neigh_ip = dst_ip;
-
- fd_neigh4_hmap_query_t neigh_query[1];
- int neigh_res = fd_neigh4_hmap_query_try( ctx->neigh4, &neigh_ip, NULL, neigh_query, 0 );
- if( FD_UNLIKELY( neigh_res!=FD_MAP_SUCCESS ) ) {
- /* Neighbor not found */
- fd_netlink_neigh4_solicit( ctx->neigh4_solicit, neigh_ip, if_idx, fd_frag_meta_ts_comp( fd_tickcount() ) );
- ctx->metrics.tx_neigh_fail_cnt++;
- return 0;
- }
- fd_neigh4_entry_t const * neigh = fd_neigh4_hmap_query_ele_const( neigh_query );
- if( FD_UNLIKELY( neigh->state != FD_NEIGH4_STATE_ACTIVE ) ) {
- ctx->metrics.tx_neigh_fail_cnt++;
- return 0;
- }
- ip4_src = fd_uint_if( !ip4_src, ctx->default_address, ip4_src );
- ctx->tx_op.src_ip = ip4_src;
- memcpy( ctx->tx_op.mac_addrs+0, neigh->mac_addr, 6 );
- memcpy( ctx->tx_op.mac_addrs+6, netdev->mac_addr, 6 );
-
- if( FD_UNLIKELY( fd_neigh4_hmap_query_test( neigh_query ) ) ) {
- ctx->metrics.tx_neigh_fail_cnt++;
- return 0;
- }
-
- return 1;
+ uint dst_ip ) {
+ return fd_net_tx_route( &ctx->router, &ctx->next_hop, dst_ip );
}
/* before_frag is called when a new metadata descriptor for a TX job is
@@ -634,10 +550,11 @@ before_frag( fd_net_ctx_t * ctx,
ulong seq,
ulong sig ) {
(void)in_idx; (void)seq;
+ ctx->tx_ok = 0;
/* Find interface index of next packet */
ulong proto = fd_disco_netmux_sig_proto( sig );
- if( FD_UNLIKELY( proto!=DST_PROTO_OUTGOING ) ) return 1;
+ if( FD_UNLIKELY( proto!=DST_PROTO_OUTGOING ) ) return 1; /* drop */
/* Load balance TX */
uint net_tile_cnt = ctx->net_tile_cnt;
@@ -645,72 +562,41 @@ before_frag( fd_net_ctx_t * ctx,
uint target_idx = hash % net_tile_cnt;
uint net_tile_id = ctx->net_tile_id;
uint dst_ip = fd_disco_netmux_sig_ip( sig );
+ if( net_tile_id!=target_idx ) return 1; /* ignore */
- ctx->tx_op.use_gre = 0;
- ctx->tx_op.gre_outer_dst_ip = 0;
- ctx->tx_op.gre_outer_src_ip = 0;
- uint is_gre_inf = 0;
-
- if( FD_UNLIKELY( !net_tx_route( ctx, dst_ip, &is_gre_inf ) ) ) {
- return 1; /* metrics incremented by net_tx_route */
+ fd_memset( &ctx->next_hop, 0, sizeof(fd_next_hop_t) );
+ uint route_res = net_tx_route( ctx, dst_ip );
+ ctx->tx_action = route_res;
+ if( FD_UNLIKELY( route_res!=FD_NET_HOP_RAW ) ) switch( route_res ) {
+ case FD_NET_HOP_GRE: {
+ /* Remember details pertaining to inner IP header */
+ uint inner_src_ip = ctx->next_hop.src_ip;
+ /* Retry routing against GRE peer IP */
+ route_res = net_tx_route( ctx, ctx->next_hop.gre_dst_ip );
+ if( FD_UNLIKELY( route_res!=FD_NET_HOP_RAW ) ) goto net_tx_route_fallback;
+ /* Override GRE outer IP hdr src addr */
+ if( !ctx->next_hop.gre_src_ip ) ctx->next_hop.gre_src_ip = ctx->next_hop.src_ip;
+ if( !ctx->next_hop.gre_dst_ip ) goto net_tx_route_fallback;
+ /* Restore inner IP header details */
+ ctx->next_hop.src_ip = inner_src_ip;
+ break; /* fall through to XDP send handler */
}
-
- uint xsk_idx = ctx->tx_op.xsk_idx;
-
- if( is_gre_inf ) {
- uint inner_src_ip = ctx->tx_op.src_ip;
- if( FD_UNLIKELY( !inner_src_ip ) ) {
- ctx->metrics.tx_gre_route_fail_cnt++;
- return 1;
- }
- /* Find the MAC addrs for the eth hdr, and src ip for outer ip4 hdr if not found in netdev tbl */
- ctx->tx_op.src_ip = 0;
- is_gre_inf = 0;
- if( FD_UNLIKELY( !net_tx_route( ctx, ctx->tx_op.gre_outer_dst_ip, &is_gre_inf ) ) ) {
- ctx->metrics.tx_gre_route_fail_cnt++;
- return 1;
- }
- if( is_gre_inf ) {
- /* Only one layer of tunnelling supported */
- ctx->metrics.tx_gre_route_fail_cnt++;
- return 1;
- }
- if( !ctx->tx_op.gre_outer_src_ip ) {
- ctx->tx_op.gre_outer_src_ip = ctx->tx_op.src_ip;
- }
- ctx->tx_op.use_gre = 1; /* indicate to during_frag to use GRE header */
- ctx->tx_op.src_ip = inner_src_ip;
- xsk_idx = XSK_IDX_MAIN;
+ net_tx_route_fallback:
+ case FD_NET_HOP_FALLBACK: {
+ ctx->metrics.tx_fallback_cnt++;
+ return 0;
}
-
- if( FD_UNLIKELY( xsk_idx>=ctx->xsk_cnt ) ) {
- /* Packet does not route to an XDP interface */
- ctx->metrics.tx_no_xdp_cnt++;
- return 1;
+ default:
+ FD_LOG_CRIT(( "Unexpected net_tx_route return code %u for IP " FD_IP4_ADDR_FMT, route_res, FD_IP4_ADDR_FMT_ARGS( dst_ip ) ));
}
- if( xsk_idx==XSK_IDX_LO ) target_idx = 0; /* loopback always targets tile 0 */
-
- /* Skip if another net tile is responsible for this packet */
-
- if( net_tile_id!=target_idx ) return 1; /* ignore */
-
/* Skip if TX is blocked */
- if( FD_UNLIKELY( !net_tx_ready( ctx, xsk_idx ) ) ) {
+ if( FD_UNLIKELY( !net_tx_ready( ctx, 0 ) ) ) {
ctx->metrics.tx_full_fail_cnt++;
return 1;
}
- /* Allocate buffer for receive */
-
- fd_net_free_ring_t * free = &ctx->free_tx;
- ulong alloc_seq = free->cons;
- void * frame = (void *)free->queue[ alloc_seq % free->depth ];
- free->cons = fd_seq_inc( alloc_seq, 1UL );
-
- ctx->tx_op.frame = frame;
-
return 0; /* continue */
}
@@ -734,63 +620,39 @@ during_frag( fd_net_ctx_t * ctx,
if( FD_UNLIKELY( sz>FD_ETH_PAYLOAD_MAX ) )
FD_LOG_ERR(( "packet too big %lu (in_idx=%lu)", sz, in_idx ));
- void * frame = ctx->tx_op.frame;
- if( FD_UNLIKELY( (ulong)frame < (ulong)ctx->umem_frame0 ) )
- FD_LOG_ERR(( "frame %p out of bounds (below %p)", frame, (void *)ctx->umem_frame0 ));
- ulong umem_off = (ulong)frame - (ulong)ctx->umem_frame0;
- if( FD_UNLIKELY( (ulong)umem_off > (ulong)ctx->umem_sz ) )
- FD_LOG_ERR(( "frame %p out of bounds (beyond %p)", frame, (void *)ctx->umem_sz ));
-
- /* Speculatively copy frame into XDP buffer */
- uchar const * src = fd_chunk_to_laddr_const( ctx->in[ in_idx ].mem, chunk );
-
- if( ctx->tx_op.use_gre ) {
- /* Discard the ethernet hdr from src. Copy the rest to where the inner ip4_hdr is.
- Safe from overflow: FD_ETH_PAYLOAD_MAX + header overhead < frame size (2048UL) */
- ulong overhead = sizeof(fd_eth_hdr_t) + sizeof(fd_ip4_hdr_t) + sizeof(fd_gre_hdr_t);
- fd_memcpy( (void *)( (ulong)ctx->tx_op.frame + overhead ), src + sizeof(fd_eth_hdr_t), sz - sizeof(fd_eth_hdr_t) );
+ uchar * frame;
+ if( FD_UNLIKELY( ctx->tx_action==FD_NET_HOP_FALLBACK ) ) {
+ frame = fd_chunk_to_laddr( ctx->fallback.out_base, ctx->fallback.chunk );
} else {
- fd_memcpy( ctx->tx_op.frame, src, sz );
+ fd_net_free_ring_t * free = &ctx->free_tx;
+ frame = (void *)free->queue[ free->cons % free->depth ];
+ if( FD_UNLIKELY( (ulong)frame < (ulong)ctx->umem_frame0 ) )
+ FD_LOG_ERR(( "frame %p out of bounds (below %p)", (void *)frame, (void *)ctx->umem_frame0 ));
+ ulong umem_off = (ulong)frame - (ulong)ctx->umem_frame0;
+ if( FD_UNLIKELY( (ulong)umem_off > (ulong)ctx->umem_sz ) )
+ FD_LOG_ERR(( "frame %p out of bounds (beyond %p)", (void *)frame, (void *)ctx->umem_sz ));
}
-}
-
-/* after_frag is called when the during_frag memcpy was _not_ overrun. */
-
-static void
-after_frag( fd_net_ctx_t * ctx,
- ulong in_idx,
- ulong seq,
- ulong sig,
- ulong sz,
- ulong tsorig,
- ulong tspub,
- fd_stem_context_t * stem ) {
- (void)in_idx; (void)seq; (void)sig; (void)tsorig; (void)tspub; (void)stem;
-
- /* Current send operation */
+ ctx->tx_frame = frame;
- uchar * frame = ctx->tx_op.frame;
- uint xsk_idx = ctx->tx_op.xsk_idx;
+ memcpy( frame, ctx->next_hop.mac_addrs, 12 );
+ FD_STORE( ushort, frame+12, fd_ushort_bswap( FD_ETH_HDR_TYPE_IP ) );
- /* Select Ethernet addresses */
- memcpy( frame, ctx->tx_op.mac_addrs, 12 );
+ uchar const * src = fd_chunk_to_laddr_const( ctx->in[ in_idx ].mem, chunk );
+ uchar * iphdr = frame + sizeof(fd_eth_hdr_t);
+ if( FD_LIKELY( ctx->tx_action!=FD_NET_HOP_GRE ) ) {
- uchar * iphdr = frame + sizeof(fd_eth_hdr_t);
+ fd_memcpy( frame+sizeof(fd_eth_hdr_t), src+sizeof(fd_eth_hdr_t), sz-sizeof(fd_eth_hdr_t) );
- if( ctx->tx_op.use_gre ) {
-
- /* For GRE packets, the ethertype will always be FD_ETH_HDR_TYPE_IP. outer source ip can't be 0 */
- if( FD_UNLIKELY( ctx->tx_op.gre_outer_src_ip==0 ) ) {
- ctx->metrics.tx_gre_route_fail_cnt++;
- return;
- }
+ } else {
- /* Write the last two bytes for eth_hdr */
- FD_STORE( ushort, frame+12, fd_ushort_bswap( FD_ETH_HDR_TYPE_IP ) );
+ /* Discard the ethernet hdr from src. Copy the rest to where the inner ip4_hdr is.
+ Safe from overflow: FD_ETH_PAYLOAD_MAX + header overhead < frame size (2048UL) */
+ ulong overhead = sizeof(fd_eth_hdr_t) + sizeof(fd_ip4_hdr_t) + sizeof(fd_gre_hdr_t);
+ fd_memcpy( frame+overhead, src+sizeof(fd_eth_hdr_t), sz-sizeof(fd_eth_hdr_t) );
- uchar * outer_iphdr = frame + sizeof(fd_eth_hdr_t);
- uchar * gre_hdr = outer_iphdr + sizeof(fd_ip4_hdr_t);
- uchar * inner_iphdr = gre_hdr + sizeof(fd_gre_hdr_t);
+ uchar * outer_iphdr = frame + sizeof(fd_eth_hdr_t);
+ uchar * gre_hdr = outer_iphdr + sizeof(fd_ip4_hdr_t);
+ uchar * inner_iphdr = gre_hdr + sizeof(fd_gre_hdr_t);
/* outer hdr + gre hdr + inner net_tot_len */
ushort outer_net_tot_len = (ushort)( sizeof(fd_ip4_hdr_t) + sizeof(fd_gre_hdr_t) + fd_ushort_bswap( ( (fd_ip4_hdr_t *)inner_iphdr )->net_tot_len ) );
@@ -805,8 +667,8 @@ after_frag( fd_net_ctx_t * ctx,
.ttl = 64,
.protocol = FD_IP4_HDR_PROTOCOL_GRE,
.check = 0,
- .saddr = ctx->tx_op.gre_outer_src_ip,
- .daddr = ctx->tx_op.gre_outer_dst_ip,
+ .saddr = ctx->next_hop.gre_src_ip,
+ .daddr = ctx->next_hop.gre_dst_ip,
};
ip4_outer.check = fd_ip4_hdr_check_fast( &ip4_outer );
FD_STORE( fd_ip4_hdr_t, outer_iphdr, ip4_outer );
@@ -818,44 +680,74 @@ after_frag( fd_net_ctx_t * ctx,
};
FD_STORE( fd_gre_hdr_t, gre_hdr, gre_hdr_ );
- iphdr = inner_iphdr;
- sz = sizeof(fd_eth_hdr_t) + outer_net_tot_len;
- xsk_idx = 0;
+ iphdr = inner_iphdr;
+
}
- /* Construct (inner) ip header */
+ /* Mangle IP header */
uint ihl = FD_IP4_GET_LEN( *(fd_ip4_hdr_t *)iphdr );
uint ver = FD_IP4_GET_VERSION( *(fd_ip4_hdr_t *)iphdr );
uint ip4_saddr = FD_LOAD( uint, iphdr+12 );
ushort ethertype = FD_LOAD( ushort, frame+12 );
if( ethertype==fd_ushort_bswap( FD_ETH_HDR_TYPE_IP ) && ver!=0x4 ) {
- ctx->metrics.tx_route_fail_cnt++; // Not an IPv4 packet. drop
+ ctx->metrics.tx_corrupt_cnt++; /* upstream tile attempted to send a pkt with odd IP version */
return;
}
if( ethertype==fd_ushort_bswap( FD_ETH_HDR_TYPE_IP ) && ip4_saddr==0 ) {
- if( FD_UNLIKELY( ctx->tx_op.src_ip==0 ||
+ if( FD_UNLIKELY( ctx->next_hop.src_ip==0 ||
ihlsz ) ) {
/* Outgoing IPv4 packet with unknown src IP or invalid IHL */
/* FIXME should select first IPv4 address of device table here */
- ctx->metrics.tx_route_fail_cnt++;
+ ctx->metrics.tx_corrupt_cnt++;
return;
}
/* Recompute checksum after changing header */
- FD_STORE( uint, iphdr+12, ctx->tx_op.src_ip );
+ FD_STORE( uint, iphdr+12, ctx->next_hop.src_ip );
FD_STORE( ushort, iphdr+10, 0 );
FD_STORE( ushort, iphdr+10, fd_ip4_hdr_check( iphdr ) );
}
+ ctx->tx_ok = 1;
+}
+
+/* after_frag is called when the during_frag memcpy was _not_ overrun. */
+
+static void
+after_frag( fd_net_ctx_t * ctx,
+ ulong in_idx,
+ ulong seq,
+ ulong sig,
+ ulong sz,
+ ulong tsorig,
+ ulong tspub,
+ fd_stem_context_t * stem ) {
+ (void)in_idx; (void)seq; (void)sig; (void)tsorig; (void)tspub; (void)stem;
+ if( !ctx->tx_ok ) return;
+
+ if( FD_UNLIKELY( ctx->tx_action==FD_NET_HOP_FALLBACK ) ) {
+ if( FD_UNLIKELY( !ctx->fallback.out_base ) ) return;
+ ulong out_idx = ctx->fallback.out_idx;
+ ulong out_chunk = ctx->fallback.chunk;
+ ulong out_tspub = fd_frag_meta_ts_comp( fd_tickcount() );
+ fd_stem_publish( stem, out_idx, sig, out_chunk, sz, 0, tsorig, out_tspub );
+ ctx->fallback.chunk = fd_dcache_compact_next( out_chunk, sz, ctx->fallback.chunk0, ctx->fallback.wmark );
+ return;
+ }
+ if( ctx->tx_action==FD_NET_HOP_GRE ) {
+ sz += sizeof(fd_ip4_hdr_t)+sizeof(fd_gre_hdr_t);
+ }
+
/* Submit packet TX job
Invariant for ring_tx: prod-consxsk[ xsk_idx ];
+ uchar * frame = ctx->tx_frame;
+ uint xsk_idx = 0u;
+ fd_xsk_t * xsk = &ctx->xsk[ xsk_idx ];
fd_xdp_ring_t * tx_ring = &xsk->ring_tx;
uint tx_seq = FD_VOLATILE_CONST( *tx_ring->prod );
uint tx_mask = tx_ring->depth - 1U;
@@ -865,14 +757,14 @@ after_frag( fd_net_ctx_t * ctx,
.options = 0
};
- /* Frame is now owned by kernel. Clear tx_op. */
- ctx->tx_op.frame = NULL;
+ /* Mark frame as used */
+ ctx->free_tx.cons++;
/* Register newly enqueued packet */
FD_VOLATILE( *xsk->ring_tx.prod ) = tx_ring->cached_prod = tx_seq+1U;
ctx->metrics.tx_submit_cnt++;
ctx->metrics.tx_bytes_total += sz;
- if( ctx->tx_op.use_gre ) ctx->metrics.tx_gre_cnt++;
+ if( ctx->tx_action==FD_NET_HOP_GRE ) ctx->metrics.tx_gre_cnt++;
fd_net_flusher_inc( ctx->tx_flusher+xsk_idx, fd_tickcount() );
}
@@ -937,8 +829,8 @@ net_rx_packet( fd_net_ctx_t * ctx,
( iphdr->protocol!=FD_IP4_HDR_PROTOCOL_UDP ) ) ) return;
/* IPv4 is variable-length, so lookup IHL to find start of UDP */
- uint iplen = FD_IP4_GET_LEN( *iphdr );
- uchar const * udp = (uchar *)iphdr + iplen;
+ uint iplen = FD_IP4_GET_LEN( *iphdr );
+ uchar const * udp = (uchar *)iphdr + iplen;
if( FD_UNLIKELY( udp+sizeof(fd_udp_hdr_t) > packet_end ) ) {
FD_DTRACE_PROBE( net_tile_err_rx_undersz );
@@ -955,45 +847,31 @@ net_rx_packet( fd_net_ctx_t * ctx,
FD_DTRACE_PROBE_4( net_tile_pkt_rx, ip_srcaddr, udp_srcport, udp_dstport, sz );
/* Route packet to downstream tile */
- ushort proto;
- fd_net_out_ctx_t * out;
- if( FD_UNLIKELY( udp_dstport==ctx->shred_listen_port ) ) {
- proto = DST_PROTO_SHRED;
- out = ctx->shred_out;
- } else if( FD_UNLIKELY( udp_dstport==ctx->quic_transaction_listen_port ) ) {
- proto = DST_PROTO_TPU_QUIC;
- out = ctx->quic_out;
- } else if( FD_UNLIKELY( udp_dstport==ctx->legacy_transaction_listen_port ) ) {
- proto = DST_PROTO_TPU_UDP;
- out = ctx->quic_out;
- } else if( FD_UNLIKELY( udp_dstport==ctx->gossip_listen_port ) ) {
- proto = DST_PROTO_GOSSIP;
- out = ctx->gossip_out;
- } else if( FD_UNLIKELY( udp_dstport==ctx->repair_intake_listen_port ) ) {
- proto = DST_PROTO_REPAIR;
- if( FD_UNLIKELY( sz == REPAIR_PING_SZ ) ) out = ctx->repair_out; /* ping-pong */
- else out = ctx->shred_out;
- } else if( FD_UNLIKELY( udp_dstport==ctx->repair_serve_listen_port ) ) {
- proto = DST_PROTO_REPAIR;
- out = ctx->repair_out;
- } else if( FD_UNLIKELY( udp_dstport==ctx->send_src_port ) ) {
- proto = DST_PROTO_SEND;
- out = ctx->send_out;
- } else {
+#if FD_HAS_AVX
+ uint port_idx = fd_find_16x16_avx( *ctx->rx_port_keys.wh, udp_dstport );
+#else
+ uint port_idx = fd_find_16x16( ctx->rx_port_keys.h, udp_dstport );
+#endif
+ if( FD_UNLIKELY( port_idx >= ctx->rx_port_cnt ) ) {
+ /* Dump out the listen port configuration to aid debugging */
+ FD_LOG_NOTICE(( "Fatal error occurred.\nDumping XDP RX UDP port configuration to aid debugging:" ));
+ for( uint i=0UL; irx_port_cnt; i++ ) {
+ FD_LOG_NOTICE(( " ( idx=%u udp.dport=%hu proto=%x out_link_idx=%u )",
+ i,
+ ctx->rx_port_keys.h[ i ],
+ ctx->rx_port_vals[ i ].dst_proto,
+ ctx->rx_port_vals[ i ].out_link_idx ));
+ }
FD_LOG_ERR(( "Firedancer received a UDP packet on port %hu which was not expected. "
- "Only the following ports should be configured to forward packets: "
- "%hu, %hu, %hu, %hu, %hu, %hu (excluding any 0 ports, which can be ignored)."
- "Please report this error to Firedancer maintainers.",
- udp_dstport,
- ctx->shred_listen_port,
- ctx->quic_transaction_listen_port,
- ctx->legacy_transaction_listen_port,
- ctx->gossip_listen_port,
- ctx->repair_intake_listen_port,
- ctx->repair_serve_listen_port ));
+ "Please report this error to Firedancer maintainers along with your config file.",
+ udp_dstport ));
}
+ uint out_idx = ctx->rx_port_vals[ port_idx ].out_link_idx;
+ ushort proto = ctx->rx_port_vals[ port_idx ].dst_proto;
+ fd_net_out_ctx_t * out = &ctx->out[ out_idx ];
+
/* tile can decide how to partition based on src ip addr and src port */
ulong sig = fd_disco_netmux_sig( ip_srcaddr, udp_srcport, ip_srcaddr, proto, 14UL+8UL+iplen );
@@ -1009,7 +887,10 @@ net_rx_packet( fd_net_ctx_t * ctx,
out->seq = fd_seq_inc( out->seq, 1UL );
if( is_packet_gre ) ctx->metrics.rx_gre_cnt++;
- ctx->metrics.rx_pkt_cnt++;
+ ulong * rx_metric = iplen==sizeof(fd_ip4_hdr_t) ?
+ &ctx->metrics.rx_pkt_cnt_ip4_udp :
+ &ctx->metrics.rx_pkt_cnt_ip4_opt_udp;
+ (*rx_metric)++;
ctx->metrics.rx_bytes_total += sz;
}
@@ -1112,18 +993,6 @@ before_credit( fd_net_ctx_t * ctx,
fd_stem_context_t * stem,
int * charge_busy ) {
(void)stem;
- /* A previous send attempt was overrun. A corrupt copy of the packet was
- placed into an XDP frame, but the frame was not yet submitted to the
- TX ring. Return the tx buffer to the free list. */
-
- if( ctx->tx_op.frame ) {
- *charge_busy = 1;
- fd_net_free_ring_t * free = &ctx->free_tx;
- ulong alloc_seq = free->prod;
- free->queue[ alloc_seq % free->depth ] = (ulong)ctx->tx_op.frame;
- free->prod = fd_seq_inc( alloc_seq, 1UL );
- ctx->tx_op.frame = NULL;
- }
/* Check if new packets are available or if TX frames are free again
(Round-robin through sockets) */
@@ -1177,20 +1046,15 @@ net_xsk_bootstrap( fd_net_ctx_t * ctx,
return frame_off;
}
-/* FIXME source MAC address from netlnk tile instead */
+/* FIXME get default IPv4 address from netdev tbl instead */
static void
interface_addrs( const char * interface,
- uchar * mac,
uint * ip4_addr ) {
int fd = socket( AF_INET, SOCK_DGRAM, 0 );
struct ifreq ifr;
ifr.ifr_addr.sa_family = AF_INET;
-
strncpy( ifr.ifr_name, interface, IFNAMSIZ );
- if( FD_UNLIKELY( ioctl( fd, SIOCGIFHWADDR, &ifr ) ) )
- FD_LOG_ERR(( "could not get MAC address of interface `%s`: (%i-%s)", interface, errno, fd_io_strerror( errno ) ));
- fd_memcpy( mac, ifr.ifr_hwaddr.sa_data, 6 );
if( FD_UNLIKELY( ioctl( fd, SIOCGIFADDR, &ifr ) ) )
FD_LOG_ERR(( "could not get IP address of interface `%s`: (%i-%s)", interface, errno, fd_io_strerror( errno ) ));
@@ -1207,9 +1071,6 @@ interface_addrs( const char * interface,
- Register UMEM data region with socket
- Insert AF_XDP socket into xsk_map
- Net tile 0 also runs fd_xdp_install and repeats the above step for
- the loopback device. (Unless the main interface is already loopback)
-
Kernel object references:
BPF_LINK file descriptor
@@ -1234,7 +1095,7 @@ privileged_init( fd_topo_t * topo,
uint if_idx = if_nametoindex( tile->xdp.interface );
if( FD_UNLIKELY( !if_idx ) ) FD_LOG_ERR(( "if_nametoindex(%s) failed", tile->xdp.interface ));
- interface_addrs( tile->xdp.interface, ctx->src_mac_addr, &ctx->default_address );
+ interface_addrs( tile->xdp.interface, &ctx->default_address );
/* Load up dcache containing UMEM */
@@ -1305,42 +1166,6 @@ privileged_init( fd_topo_t * topo,
if( FD_UNLIKELY( -1==close( xsk_map_fd ) ) ) FD_LOG_ERR(( "close(%d) failed (%d-%s)", xsk_map_fd, errno, fd_io_strerror( errno ) ));
}
- /* Networking tile at index 0 also binds to loopback (only queue 0 available on lo) */
-
- if( FD_UNLIKELY( strcmp( tile->xdp.interface, "lo" ) && !tile->kind_id ) ) {
- ctx->xsk_cnt = 2;
-
- ushort udp_port_candidates[] = {
- (ushort)tile->xdp.net.legacy_transaction_listen_port,
- (ushort)tile->xdp.net.quic_transaction_listen_port,
- (ushort)tile->xdp.net.shred_listen_port,
- (ushort)tile->xdp.net.gossip_listen_port,
- (ushort)tile->xdp.net.repair_intake_listen_port,
- (ushort)tile->xdp.net.repair_serve_listen_port,
- (ushort)tile->xdp.net.send_src_port
- };
-
- uint lo_idx = if_nametoindex( "lo" );
- if( FD_UNLIKELY( !lo_idx ) ) FD_LOG_ERR(( "if_nametoindex(lo) failed" ));
-
- /* FIXME move this to fd_topo_run */
- fd_xdp_fds_t lo_fds = fd_xdp_install( lo_idx,
- tile->net.bind_address,
- sizeof(udp_port_candidates)/sizeof(udp_port_candidates[0]),
- udp_port_candidates,
- "skb" );
-
- ctx->prog_link_fds[ 1 ] = lo_fds.prog_link_fd;
- /* init xsk 1 */
- fd_xsk_params_t params1 = params0;
- params1.if_idx = lo_idx; /* probably always 1 */
- params1.if_queue_id = 0;
- params1.bind_flags = 0;
- if( FD_UNLIKELY( !fd_xsk_init( &ctx->xsk[ 1 ], ¶ms1 ) ) ) FD_LOG_ERR(( "failed to bind lo_xsk" ));
- if( FD_UNLIKELY( !fd_xsk_activate( &ctx->xsk[ 1 ], lo_fds.xsk_map_fd ) ) ) FD_LOG_ERR(( "failed to activate lo_xsk" ));
- if( FD_UNLIKELY( -1==close( lo_fds.xsk_map_fd ) ) ) FD_LOG_ERR(( "close(%d) failed (%d-%s)", xsk_map_fd, errno, fd_io_strerror( errno ) ));
- }
-
double tick_per_ns = fd_tempo_tick_per_ns( NULL );
ctx->xdp_stats_interval_ticks = (long)( FD_XDP_STATS_INTERVAL_NS * tick_per_ns );
@@ -1369,8 +1194,44 @@ init_device_table( fd_net_ctx_t * ctx,
ctx->netdev_buf_sz = fd_netdev_tbl_footprint( NETDEV_MAX, BOND_MASTER_MAX );
/* Create temporary empty device table during startup */
- FD_TEST( fd_netdev_tbl_join( &ctx->netdev_tbl, fd_netdev_tbl_new( ctx->netdev_buf, 1, 1 ) ) );
+ FD_TEST( fd_netdev_tbl_join( &ctx->router.netdev_tbl, fd_netdev_tbl_new( ctx->netdev_buf, 1, 1 ) ) );
+
+}
+
+/* setup_out_link ensures an output link is set up for the given link
+ name. Idempotent. */
+
+static uint
+setup_out_link( fd_net_ctx_t * ctx,
+ fd_topo_t const * topo,
+ fd_topo_tile_t const * tile,
+ char const * link_name,
+ ulong tile_kind_id ) {
+ /* For a given output link kind (e.g. "net_quic"), each net tile
+ produces one output link, even if there are multiple downstream
+ consumer tiles. Each consumer tile receives all frags, but skips
+ frags based on a shared load balancing policy, making the tiles
+ effectively take turns processing frags. */
+ ulong out_link_idx = fd_topo_find_tile_out_link( topo, tile, link_name, tile_kind_id );
+ if( FD_UNLIKELY( out_link_idx==ULONG_MAX ) ) {
+ FD_LOG_ERR(( "link \"%s\" is not an output links of net:%lu", link_name, tile_kind_id ));
+ }
+ if( FD_UNLIKELY( out_link_idx>=MAX_NET_OUTS ) ) {
+ FD_LOG_ERR(( "out link \"%s\" out of bounds: index %lu >= MAX_NET_OUTS (%lu)", link_name, out_link_idx, (ulong)MAX_NET_OUTS ));
+ }
+
+ fd_net_out_ctx_t * out_ctx = &ctx->out[ out_link_idx ];
+ if( !out_ctx->mcache ) {
+ /* First time initialization */
+ ulong const link_id = tile->out_link_id[ out_link_idx ];
+ fd_topo_link_t const * out_link = &topo->links[ link_id ];
+ if( FD_UNLIKELY( !out_link->mcache ) ) FD_LOG_CRIT(( "out_link[%lu]->mcache is NULL (missing topo_fill?)", out_link_idx ));
+ out_ctx->mcache = out_link->mcache;
+ out_ctx->depth = fd_mcache_depth ( out_ctx->mcache );
+ out_ctx->sync = fd_mcache_seq_laddr( out_ctx->mcache );
+ }
+ return (uint)out_link_idx;
}
FD_FN_UNUSED static void
@@ -1383,22 +1244,14 @@ unprivileged_init( fd_topo_t * topo,
FD_TEST( ctx->xsk_cnt!=0 );
FD_TEST( ctx->free_tx.queue!=NULL );
(void)FD_SCRATCH_ALLOC_APPEND( l, alignof(ulong), tile->xdp.free_ring_depth * sizeof(ulong) );
- ctx->netdev_buf = FD_SCRATCH_ALLOC_APPEND( l, fd_netdev_tbl_align(), ctx->netdev_buf_sz );
+ ctx->netdev_buf = FD_SCRATCH_ALLOC_APPEND( l, fd_netdev_tbl_align(), ctx->netdev_buf_sz );
ctx->net_tile_id = (uint)tile->kind_id;
ctx->net_tile_cnt = (uint)fd_topo_tile_name_cnt( topo, tile->name );
- ctx->bind_address = tile->net.bind_address;
- ctx->shred_listen_port = tile->net.shred_listen_port;
- ctx->quic_transaction_listen_port = tile->net.quic_transaction_listen_port;
- ctx->legacy_transaction_listen_port = tile->net.legacy_transaction_listen_port;
- ctx->gossip_listen_port = tile->net.gossip_listen_port;
- ctx->repair_intake_listen_port = tile->net.repair_intake_listen_port;
- ctx->repair_serve_listen_port = tile->net.repair_serve_listen_port;
- ctx->send_src_port = tile->net.send_src_port;
+ ctx->bind_address = tile->net.bind_address;
- /* Put a bound on chunks we read from the input, to make sure they
- are within in the data region of the workspace. */
+ /* Net TX links (tango input links from net tile POV) */
if( FD_UNLIKELY( !tile->in_cnt ) ) FD_LOG_ERR(( "net tile in link cnt is zero" ));
if( FD_UNLIKELY( tile->in_cnt>MAX_NET_INS ) ) FD_LOG_ERR(( "net tile in link cnt %lu exceeds MAX_NET_INS %lu", tile->in_cnt, MAX_NET_INS ));
@@ -1412,79 +1265,35 @@ unprivileged_init( fd_topo_t * topo,
ctx->in[ i ].wmark = fd_dcache_compact_wmark( ctx->in[ i ].mem, link->dcache, link->mtu );
}
- for( ulong i = 0; i < tile->out_cnt; i++ ) {
- fd_topo_link_t * out_link = &topo->links[ tile->out_link_id[ i ] ];
- if( strcmp( out_link->name, "net_quic" ) == 0 ) {
- fd_topo_link_t * quic_out = out_link;
- ctx->quic_out->mcache = quic_out->mcache;
- ctx->quic_out->sync = fd_mcache_seq_laddr( ctx->quic_out->mcache );
- ctx->quic_out->depth = fd_mcache_depth( ctx->quic_out->mcache );
- ctx->quic_out->seq = fd_mcache_seq_query( ctx->quic_out->sync );
- } else if( strcmp( out_link->name, "net_shred" ) == 0 ) {
- fd_topo_link_t * shred_out = out_link;
- ctx->shred_out->mcache = shred_out->mcache;
- ctx->shred_out->sync = fd_mcache_seq_laddr( ctx->shred_out->mcache );
- ctx->shred_out->depth = fd_mcache_depth( ctx->shred_out->mcache );
- ctx->shred_out->seq = fd_mcache_seq_query( ctx->shred_out->sync );
- } else if( strcmp( out_link->name, "net_gossip" ) == 0 ) {
- fd_topo_link_t * gossip_out = out_link;
- ctx->gossip_out->mcache = gossip_out->mcache;
- ctx->gossip_out->sync = fd_mcache_seq_laddr( ctx->gossip_out->mcache );
- ctx->gossip_out->depth = fd_mcache_depth( ctx->gossip_out->mcache );
- ctx->gossip_out->seq = fd_mcache_seq_query( ctx->gossip_out->sync );
- } else if( strcmp( out_link->name, "net_repair" ) == 0 ) {
- fd_topo_link_t * repair_out = out_link;
- ctx->repair_out->mcache = repair_out->mcache;
- ctx->repair_out->sync = fd_mcache_seq_laddr( ctx->repair_out->mcache );
- ctx->repair_out->depth = fd_mcache_depth( ctx->repair_out->mcache );
- ctx->repair_out->seq = fd_mcache_seq_query( ctx->repair_out->sync );
- } else if( strcmp( out_link->name, "net_netlnk" ) == 0 ) {
- fd_topo_link_t * netlink_out = out_link;
- ctx->neigh4_solicit->mcache = netlink_out->mcache;
- ctx->neigh4_solicit->depth = fd_mcache_depth( ctx->neigh4_solicit->mcache );
- ctx->neigh4_solicit->seq = fd_mcache_seq_query( fd_mcache_seq_laddr( ctx->neigh4_solicit->mcache ) );
- } else if( strcmp( out_link->name, "net_send" ) == 0 ) {
- fd_topo_link_t * send_out = out_link;
- ctx->send_out->mcache = send_out->mcache;
- ctx->send_out->sync = fd_mcache_seq_laddr( ctx->send_out->mcache );
- ctx->send_out->depth = fd_mcache_depth( ctx->send_out->mcache );
- ctx->send_out->seq = fd_mcache_seq_query( ctx->send_out->sync );
- } else {
- FD_LOG_ERR(( "unrecognized out link `%s`", out_link->name ));
- }
- }
+ /* Net RX links (tango output links from net tile POV) */
- /* Check if any of the tiles we set a listen port for do not have an outlink. */
- if( FD_UNLIKELY( ctx->shred_listen_port!=0 && ctx->shred_out->mcache==NULL ) ) {
- FD_LOG_ERR(( "shred listen port set but no out link was found" ));
- } else if( FD_UNLIKELY( ctx->quic_transaction_listen_port!=0 && ctx->quic_out->mcache==NULL ) ) {
- FD_LOG_ERR(( "quic transaction listen port set but no out link was found" ));
- } else if( FD_UNLIKELY( ctx->legacy_transaction_listen_port!=0 && ctx->quic_out->mcache==NULL ) ) {
- FD_LOG_ERR(( "legacy transaction listen port set but no out link was found" ));
- } else if( FD_UNLIKELY( ctx->gossip_listen_port!=0 && ctx->gossip_out->mcache==NULL ) ) {
- FD_LOG_ERR(( "gossip listen port set but no out link was found" ));
- } else if( FD_UNLIKELY( ctx->repair_intake_listen_port!=0 && ctx->repair_out->mcache==NULL ) ) {
- FD_LOG_ERR(( "repair intake port set but no out link was found" ));
- } else if( FD_UNLIKELY( ctx->repair_serve_listen_port!=0 && ctx->repair_out->mcache==NULL ) ) {
- FD_LOG_ERR(( "repair serve listen port set but no out link was found" ));
- } else if( FD_UNLIKELY( ctx->neigh4_solicit->mcache==NULL ) ) {
- FD_LOG_ERR(( "netlink request link not found" ));
- } else if( FD_UNLIKELY( ctx->send_src_port!=0 && ctx->send_out->mcache==NULL ) ) {
- FD_LOG_ERR(( "send listen port set but no out link was found" ));
+ fd_topo_net_rx_t const * rx_cfg = &tile->net.rx_rules;
+ ctx->rx_port_cnt = (uint)( rx_cfg->rx_rule_cnt );
+ for( ulong i=0uL; i<(rx_cfg->rx_rule_cnt); i++ ) {
+ char const * link_name = rx_cfg->rx_rules[ i ].link;
+ uint out_link_idx = setup_out_link( ctx, topo, tile, link_name, ctx->net_tile_id );
+ ctx->rx_port_keys.h[ i ] = rx_cfg->rx_rules[ i ].port;
+ ctx->rx_port_vals [ i ].out_link_idx = (uchar)out_link_idx;
+ ctx->rx_port_vals [ i ].dst_proto = (uchar)rx_cfg->rx_rules[ i ].proto_id;
}
+ /* XDP flush timing objects */
+
for( uint j=0U; j<2U; j++ ) {
ctx->tx_flusher[ j ].pending_wmark = (ulong)( (double)tile->xdp.xdp_tx_queue_size * 0.7 );
ctx->tx_flusher[ j ].tail_flush_backoff = (long)( (double)tile->xdp.tx_flush_timeout_ns * fd_tempo_tick_per_ns( NULL ) );
ctx->tx_flusher[ j ].next_tail_flush_ticks = LONG_MAX;
}
- /* Join netbase objects */
- ctx->fib_local = fd_fib4_join( fd_topo_obj_laddr( topo, tile->xdp.fib4_local_obj_id ) );
- ctx->fib_main = fd_fib4_join( fd_topo_obj_laddr( topo, tile->xdp.fib4_main_obj_id ) );
- if( FD_UNLIKELY( !ctx->fib_local || !ctx->fib_main ) ) FD_LOG_ERR(( "fd_fib4_join failed" ));
+ /* Netlink tile shared memory objects */
+
+ fd_net_router_t * router = &ctx->router;
+ router->if_idx = ctx->xsk[ 0 ].if_idx;
+ router->fib_local = fd_fib4_join( fd_topo_obj_laddr( topo, tile->xdp.fib4_local_obj_id ) );
+ router->fib_main = fd_fib4_join( fd_topo_obj_laddr( topo, tile->xdp.fib4_main_obj_id ) );
+ if( FD_UNLIKELY( !ctx->router.fib_local || !ctx->router.fib_main ) ) FD_LOG_ERR(( "fd_fib4_join failed" ));
if( FD_UNLIKELY( !fd_neigh4_hmap_join(
- ctx->neigh4,
+ router->neigh4,
fd_topo_obj_laddr( topo, tile->xdp.neigh4_obj_id ),
fd_topo_obj_laddr( topo, tile->xdp.neigh4_ele_obj_id ) ) ) ) {
FD_LOG_ERR(( "fd_neigh4_hmap_join failed" ));
@@ -1537,12 +1346,7 @@ populate_allowed_seccomp( fd_topo_t const * topo,
FD_SCRATCH_ALLOC_INIT( l, scratch );
fd_net_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof( fd_net_ctx_t ), sizeof( fd_net_ctx_t ) );
- /* A bit of a hack, if there is no loopback XSK for this tile, we still need to pass
- two "allow" FD arguments to the net policy, so we just make them both the same. */
- int allow_fd2 = ctx->xsk_cnt>1UL ? ctx->xsk[ 1 ].xsk_fd : ctx->xsk[ 0 ].xsk_fd;
- FD_TEST( ctx->xsk[ 0 ].xsk_fd >= 0 && allow_fd2 >= 0 );
-
- populate_sock_filter_policy_fd_xdp_tile( out_cnt, out, (uint)fd_log_private_logfile_fd(), (uint)ctx->xsk[ 0 ].xsk_fd, (uint)allow_fd2 );
+ populate_sock_filter_policy_fd_xdp_tile( out_cnt, out, (uint)fd_log_private_logfile_fd(), (uint)ctx->xsk[ 0 ].xsk_fd );
return sock_filter_policy_fd_xdp_tile_instr_cnt;
}
@@ -1563,10 +1367,8 @@ populate_allowed_fds( fd_topo_t const * topo,
if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
- out_fds[ out_cnt++ ] = ctx->xsk[ 0 ].xsk_fd;
- out_fds[ out_cnt++ ] = ctx->prog_link_fds[ 0 ];
- if( FD_LIKELY( ctx->xsk_cnt>1UL ) ) out_fds[ out_cnt++ ] = ctx->xsk[ 1 ].xsk_fd;
- if( FD_LIKELY( ctx->xsk_cnt>1UL ) ) out_fds[ out_cnt++ ] = ctx->prog_link_fds[ 1 ];
+ out_fds[ out_cnt++ ] = ctx->xsk[ 0 ].xsk_fd;
+ out_fds[ out_cnt++ ] = ctx->prog_link_fds[ 0 ];
return out_cnt;
}
diff --git a/src/disco/net/xdp/fd_xdp_tile.seccomppolicy b/src/disco/net/xdp/fd_xdp_tile.seccomppolicy
index e4622dc139..3912f2e8bc 100644
--- a/src/disco/net/xdp/fd_xdp_tile.seccomppolicy
+++ b/src/disco/net/xdp/fd_xdp_tile.seccomppolicy
@@ -3,11 +3,7 @@
#
# xsk_fd: This is the file descriptor for the kernel XDP socket we
# created for the primary network device.
-#
-# lo_xsk_fd: This is the file descriptor for the kernel XDP socket we
-# created for the loopback network device. This is currently
-# needed because Solana sends packets to itself on loopback.
-unsigned int logfile_fd, unsigned int xsk_fd, unsigned int lo_xsk_fd
+unsigned int logfile_fd, unsigned int xsk_fd
# logging: all log messages are written to a file and/or pipe
#
@@ -34,10 +30,8 @@ fsync: (eq (arg 0) logfile_fd)
# purpose.
#
# arg 0 is the file descriptor of the XSK that the kernel should poll
-# for entries. There are two possible XSKs, since we can send packets
-# on a network device or the loopback device.
-sendto: (and (or (eq (arg 0) xsk_fd)
- (eq (arg 0) lo_xsk_fd))
+# for entries.
+sendto: (and (eq (arg 0) xsk_fd)
(eq (arg 1) 0)
(eq (arg 2) 0)
(eq (arg 3) MSG_DONTWAIT)
@@ -55,15 +49,12 @@ sendto: (and (or (eq (arg 0) xsk_fd)
# overloaded by Linux for this purpose.
#
# arg 0 is the file descriptor of the XSK that the kernel should poll
-# for entries. There are two possible XSKs, since we can receive
-# packets on a network device or the loopback device.
-recvmsg: (and (or (eq (arg 0) xsk_fd)
- (eq (arg 0) lo_xsk_fd))
+# for entries.
+recvmsg: (and (eq (arg 0) xsk_fd)
(eq (arg 2) MSG_DONTWAIT))
# XDP: We use getsockopt( SOL_XDP, XDP_STATISTICS ) to periodically
# retrieve packet drop counters for the XDP socket.
-getsockopt: (and (or (eq (arg 0) xsk_fd)
- (eq (arg 0) lo_xsk_fd))
+getsockopt: (and (eq (arg 0) xsk_fd)
(eq (arg 1) SOL_XDP)
(eq (arg 2) XDP_STATISTICS))
diff --git a/src/disco/net/xdp/generated/fd_xdp_tile_seccomp.h b/src/disco/net/xdp/generated/fd_xdp_tile_seccomp.h
index e62247cbaa..05133e8f8d 100644
--- a/src/disco/net/xdp/generated/fd_xdp_tile_seccomp.h
+++ b/src/disco/net/xdp/generated/fd_xdp_tile_seccomp.h
@@ -21,14 +21,14 @@
#else
# error "Target architecture is unsupported by seccomp."
#endif
-static const unsigned int sock_filter_policy_fd_xdp_tile_instr_cnt = 45;
+static const unsigned int sock_filter_policy_fd_xdp_tile_instr_cnt = 39;
-static void populate_sock_filter_policy_fd_xdp_tile( ulong out_cnt, struct sock_filter * out, unsigned int logfile_fd, unsigned int xsk_fd, unsigned int lo_xsk_fd ) {
- FD_TEST( out_cnt >= 45 );
- struct sock_filter filter[45] = {
+static void populate_sock_filter_policy_fd_xdp_tile( ulong out_cnt, struct sock_filter * out, unsigned int logfile_fd, unsigned int xsk_fd ) {
+ FD_TEST( out_cnt >= 39 );
+ struct sock_filter filter[39] = {
/* Check: Jump to RET_KILL_PROCESS if the script's arch != the runtime arch */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, ( offsetof( struct seccomp_data, arch ) ) ),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, ARCH_NR, 0, /* RET_KILL_PROCESS */ 41 ),
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, ARCH_NR, 0, /* RET_KILL_PROCESS */ 35 ),
/* loading syscall number in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, ( offsetof( struct seccomp_data, nr ) ) ),
/* allow write based on expression */
@@ -38,76 +38,64 @@ static void populate_sock_filter_policy_fd_xdp_tile( ulong out_cnt, struct sock_
/* allow sendto based on expression */
BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, SYS_sendto, /* check_sendto */ 9, 0 ),
/* allow recvmsg based on expression */
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, SYS_recvmsg, /* check_recvmsg */ 22, 0 ),
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, SYS_recvmsg, /* check_recvmsg */ 20, 0 ),
/* allow getsockopt based on expression */
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, SYS_getsockopt, /* check_getsockopt */ 27, 0 ),
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, SYS_getsockopt, /* check_getsockopt */ 23, 0 ),
/* none of the syscalls matched */
- { BPF_JMP | BPF_JA, 0, 0, /* RET_KILL_PROCESS */ 34 },
+ { BPF_JMP | BPF_JA, 0, 0, /* RET_KILL_PROCESS */ 28 },
// check_write:
/* load syscall argument 0 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 2, /* RET_ALLOW */ 33, /* lbl_1 */ 0 ),
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 2, /* RET_ALLOW */ 27, /* lbl_1 */ 0 ),
// lbl_1:
/* load syscall argument 0 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, logfile_fd, /* RET_ALLOW */ 31, /* RET_KILL_PROCESS */ 30 ),
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, logfile_fd, /* RET_ALLOW */ 25, /* RET_KILL_PROCESS */ 24 ),
// check_fsync:
/* load syscall argument 0 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, logfile_fd, /* RET_ALLOW */ 29, /* RET_KILL_PROCESS */ 28 ),
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, logfile_fd, /* RET_ALLOW */ 23, /* RET_KILL_PROCESS */ 22 ),
// check_sendto:
/* load syscall argument 0 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, xsk_fd, /* lbl_2 */ 2, /* lbl_3 */ 0 ),
-// lbl_3:
- /* load syscall argument 0 in accumulator */
- BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, lo_xsk_fd, /* lbl_2 */ 0, /* RET_KILL_PROCESS */ 24 ),
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, xsk_fd, /* lbl_2 */ 0, /* RET_KILL_PROCESS */ 20 ),
// lbl_2:
/* load syscall argument 1 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[1])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_4 */ 0, /* RET_KILL_PROCESS */ 22 ),
-// lbl_4:
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_3 */ 0, /* RET_KILL_PROCESS */ 18 ),
+// lbl_3:
/* load syscall argument 2 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[2])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_5 */ 0, /* RET_KILL_PROCESS */ 20 ),
-// lbl_5:
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_4 */ 0, /* RET_KILL_PROCESS */ 16 ),
+// lbl_4:
/* load syscall argument 3 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[3])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, MSG_DONTWAIT, /* lbl_6 */ 0, /* RET_KILL_PROCESS */ 18 ),
-// lbl_6:
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, MSG_DONTWAIT, /* lbl_5 */ 0, /* RET_KILL_PROCESS */ 14 ),
+// lbl_5:
/* load syscall argument 4 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[4])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_7 */ 0, /* RET_KILL_PROCESS */ 16 ),
-// lbl_7:
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_6 */ 0, /* RET_KILL_PROCESS */ 12 ),
+// lbl_6:
/* load syscall argument 5 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[5])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* RET_ALLOW */ 15, /* RET_KILL_PROCESS */ 14 ),
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* RET_ALLOW */ 11, /* RET_KILL_PROCESS */ 10 ),
// check_recvmsg:
/* load syscall argument 0 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, xsk_fd, /* lbl_8 */ 2, /* lbl_9 */ 0 ),
-// lbl_9:
- /* load syscall argument 0 in accumulator */
- BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, lo_xsk_fd, /* lbl_8 */ 0, /* RET_KILL_PROCESS */ 10 ),
-// lbl_8:
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, xsk_fd, /* lbl_7 */ 0, /* RET_KILL_PROCESS */ 8 ),
+// lbl_7:
/* load syscall argument 2 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[2])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, MSG_DONTWAIT, /* RET_ALLOW */ 9, /* RET_KILL_PROCESS */ 8 ),
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, MSG_DONTWAIT, /* RET_ALLOW */ 7, /* RET_KILL_PROCESS */ 6 ),
// check_getsockopt:
/* load syscall argument 0 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, xsk_fd, /* lbl_10 */ 2, /* lbl_11 */ 0 ),
-// lbl_11:
- /* load syscall argument 0 in accumulator */
- BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, lo_xsk_fd, /* lbl_10 */ 0, /* RET_KILL_PROCESS */ 4 ),
-// lbl_10:
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, xsk_fd, /* lbl_8 */ 0, /* RET_KILL_PROCESS */ 4 ),
+// lbl_8:
/* load syscall argument 1 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[1])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, SOL_XDP, /* lbl_12 */ 0, /* RET_KILL_PROCESS */ 2 ),
-// lbl_12:
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, SOL_XDP, /* lbl_9 */ 0, /* RET_KILL_PROCESS */ 2 ),
+// lbl_9:
/* load syscall argument 2 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[2])),
BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, XDP_STATISTICS, /* RET_ALLOW */ 1, /* RET_KILL_PROCESS */ 0 ),
diff --git a/src/disco/net/xdp/test_xdp_tile.c b/src/disco/net/xdp/test_xdp_tile.c
index 8b23c4e707..56944337b6 100644
--- a/src/disco/net/xdp/test_xdp_tile.c
+++ b/src/disco/net/xdp/test_xdp_tile.c
@@ -1,14 +1,5 @@
-#include
-#include
-#include
#include "fd_xdp_tile.c"
#include "../../../disco/topo/fd_topob.h"
-#include "../../../waltz/neigh/fd_neigh4_map.h"
-#include "../../../util/net/fd_ip4.h"
-#include "../../../waltz/ip/fd_fib4.h"
-#include "../../../util/tmpl/fd_map.h"
-#include "../../../tango/dcache/fd_dcache.h"
-#include "../../../tango/mcache/fd_mcache.h"
#if defined(__GNUC__) && (__GNUC__ >= 9)
#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
@@ -69,16 +60,14 @@ static ulong const frame_sz = 2048UL;
static void
add_neighbor( fd_neigh4_hmap_t * join,
uint ip4_addr,
- uchar mac0, uchar mac1, uchar mac2,
- uchar mac3, uchar mac4, uchar mac5 ) {
+ uchar const mac[6] ) {
fd_neigh4_hmap_query_t query[1];
int prepare_res = fd_neigh4_hmap_prepare( join, &ip4_addr, NULL, query, FD_MAP_FLAG_BLOCKING );
FD_TEST( prepare_res==FD_MAP_SUCCESS );
fd_neigh4_entry_t * ele = fd_neigh4_hmap_query_ele( query );
ele->state = FD_NEIGH4_STATE_ACTIVE;
ele->ip4_addr = ip4_addr;
- ele->mac_addr[0] = mac0; ele->mac_addr[1] = mac1; ele->mac_addr[2] = mac2;
- ele->mac_addr[3] = mac3; ele->mac_addr[4] = mac4; ele->mac_addr[5] = mac5;
+ memcpy( ele->mac_addr, mac, 6 );
fd_neigh4_hmap_publish( query );
}
@@ -133,42 +122,44 @@ setup_routing_table( fd_net_ctx_t * ctx,
FD_TEST( fd_fib4_insert( fib_main, banned_ip, 32, 0U, &hop5 ) );
FD_TEST( fd_fib4_insert( fib_main, gre1_dst_ip, 32, 0U, &hop6 ) );
FD_TEST( fd_fib4_insert( fib_main, gre1_outer_dst_ip, 32, 0U, &hop7 ) );
- ctx->fib_local = fib_local;
- ctx->fib_main = fib_main;
+ fd_net_router_t * router = &ctx->router;
+ router->fib_local = fib_local;
+ router->fib_main = fib_main;
}
static void
setup_netdev_table( fd_net_ctx_t * ctx ) {
/* GRE interfaces */
- ctx->netdev_tbl.dev_tbl[IF_IDX_GRE0] = (fd_netdev_t) {
+ fd_net_router_t * router = &ctx->router;
+ router->netdev_tbl.dev_tbl[IF_IDX_GRE0] = (fd_netdev_t) {
.if_idx = IF_IDX_GRE0,
.dev_type = ARPHRD_IPGRE,
.gre_dst_ip = gre0_outer_dst_ip,
.gre_src_ip = gre0_outer_src_ip
};
- ctx->netdev_tbl.dev_tbl[IF_IDX_GRE1] = (fd_netdev_t) {
+ router->netdev_tbl.dev_tbl[IF_IDX_GRE1] = (fd_netdev_t) {
.if_idx = IF_IDX_GRE1,
.dev_type = ARPHRD_IPGRE,
.gre_dst_ip = gre1_outer_dst_ip,
};
/* Eth0 interface */
- ctx->netdev_tbl.dev_tbl[IF_IDX_ETH0] = (fd_netdev_t) {
+ router->netdev_tbl.dev_tbl[IF_IDX_ETH0] = (fd_netdev_t) {
.if_idx = IF_IDX_ETH0,
.dev_type = ARPHRD_ETHER,
};
/* Eth1 interface */
- ctx->netdev_tbl.dev_tbl[IF_IDX_ETH1] = (fd_netdev_t) {
+ router->netdev_tbl.dev_tbl[IF_IDX_ETH1] = (fd_netdev_t) {
.if_idx = IF_IDX_ETH1,
.dev_type = ARPHRD_ETHER,
};
/* Lo interface */
- ctx->netdev_tbl.dev_tbl[IF_IDX_LO] = (fd_netdev_t) {
+ router->netdev_tbl.dev_tbl[IF_IDX_LO] = (fd_netdev_t) {
.if_idx = IF_IDX_LO,
.dev_type = ARPHRD_LOOPBACK,
};
- fd_memcpy( (fd_netdev_t *)ctx->netdev_tbl.dev_tbl[IF_IDX_ETH0].mac_addr, eth0_src_mac_addr, 6 );
- fd_memcpy( (fd_netdev_t *)ctx->netdev_tbl.dev_tbl[IF_IDX_ETH1].mac_addr, eth1_src_mac_addr, 6 );
- ctx->netdev_tbl.hdr->dev_cnt = IF_IDX_GRE1 + 1;
+ fd_memcpy( router->netdev_tbl.dev_tbl[IF_IDX_ETH0].mac_addr, eth0_src_mac_addr, 6 );
+ fd_memcpy( router->netdev_tbl.dev_tbl[IF_IDX_ETH1].mac_addr, eth1_src_mac_addr, 6 );
+ router->netdev_tbl.hdr->dev_cnt = IF_IDX_GRE1 + 1;
}
@@ -290,14 +281,14 @@ main( int argc,
fd_topob_tile_in( topo, "net", 0UL, "wksp", "shred_net", 0UL, 0, 1 );
/* Manual "privileged_init/unprivileged init" */
- void * scratch = fd_topo_obj_laddr( topo, topo_tile->tile_obj_id );
+ void * scratch = fd_topo_obj_laddr( topo, topo_tile->tile_obj_id );
FD_SCRATCH_ALLOC_INIT( l, scratch );
- fd_net_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof( fd_net_ctx_t ), sizeof( fd_net_ctx_t ) );
+ fd_net_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof( fd_net_ctx_t ), sizeof( fd_net_ctx_t ) );
fd_memset( ctx, 0, sizeof(fd_net_ctx_t) );
- ctx->net_tile_cnt = 1;
- ctx->free_tx.queue = FD_SCRATCH_ALLOC_APPEND( l, alignof(ulong), topo_tile->xdp.free_ring_depth * sizeof(ulong) );
- ctx->free_tx.depth = topo_tile->xdp.free_ring_depth;
- ctx->netdev_buf = FD_SCRATCH_ALLOC_APPEND( l, fd_netdev_tbl_align(), ctx->netdev_buf_sz );
+ ctx->net_tile_cnt = 1;
+ ctx->free_tx.queue = FD_SCRATCH_ALLOC_APPEND( l, alignof(ulong), topo_tile->xdp.free_ring_depth * sizeof(ulong) );
+ ctx->free_tx.depth = topo_tile->xdp.free_ring_depth;
+ ctx->netdev_buf = FD_SCRATCH_ALLOC_APPEND( l, fd_netdev_tbl_align(), ctx->netdev_buf_sz );
init_device_table( ctx, netdev_dbl_buf_mem );
@@ -310,19 +301,25 @@ main( int argc,
ulong umem_sz = umem_dcache_data_sz - ( (ulong)umem_frame0 - (ulong)umem_dcache );
umem_sz = fd_ulong_align_dn( umem_sz, umem_frame_sz );
- ulong const umem_chunk0 = ( (ulong)umem_frame0 - (ulong)umem_base )>>FD_CHUNK_LG_SZ;
- ulong const umem_wmark = umem_chunk0 + ( ( umem_sz-umem_frame_sz )>>FD_CHUNK_LG_SZ );
+ ulong const umem_chunk0 = ( (ulong)umem_frame0 - (ulong)umem_base )>>FD_CHUNK_LG_SZ;
+ ulong const umem_wmark = umem_chunk0 + ( ( umem_sz-umem_frame_sz )>>FD_CHUNK_LG_SZ );
ctx->umem_frame0 = umem_frame0;
ctx->umem_chunk0 = (uint)umem_chunk0;
ctx->umem_wmark = (uint)umem_wmark;
ctx->umem_sz = umem_sz;
- ctx->shred_listen_port = SHRED_PORT;
- ctx->shred_out->mcache = rx_link->mcache;
- ctx->shred_out->sync = fd_mcache_seq_laddr( ctx->shred_out->mcache );
- ctx->shred_out->depth = fd_mcache_depth( ctx->shred_out->mcache );
- ctx->shred_out->seq = fd_mcache_seq_query( ctx->shred_out->sync );
+ /* RX flow steer rule */
+ uint rx_port_idx = ctx->rx_port_cnt++;
+ ctx->rx_port_keys.h[ rx_port_idx ] = SHRED_PORT;
+ ctx->rx_port_vals [ rx_port_idx ].dst_proto = DST_PROTO_SHRED;
+ ctx->rx_port_vals [ rx_port_idx ].out_link_idx = 0;
+
+ /* RX out link */
+ ctx->out[ 0 ].mcache = rx_link->mcache;
+ ctx->out[ 0 ].sync = fd_mcache_seq_laddr( rx_link->mcache );
+ ctx->out[ 0 ].depth = fd_mcache_depth ( rx_link->mcache );
+ ctx->out[ 0 ].seq = 0UL;
/* Initialize out link mcache chunks (RX links) */
ulong frame_off = 0UL;
@@ -388,17 +385,14 @@ main( int argc,
/* Routing table */
setup_routing_table( ctx, fib4_local_mem, fib4_main_mem );
- /* Ensure initial (fake) device table is valid */
- FD_TEST( net_check_gre_interface_exists( ctx )==0 );
- uint is_gre_inf = 0U;
- FD_TEST( net_tx_route( ctx, FD_IP4_ADDR( 1,1,1,1 ), &is_gre_inf )==0 );
+ FD_TEST( net_tx_route( ctx, FD_IP4_ADDR( 1,1,1,1 ) )==FD_NET_HOP_FALLBACK );
/* Neighbor table */
- add_neighbor( neigh4_hmap, gre0_outer_dst_ip, eth0_dst_mac_addr[0], eth0_dst_mac_addr[1], eth0_dst_mac_addr[2], eth0_dst_mac_addr[3], eth0_dst_mac_addr[4], eth0_dst_mac_addr[5] );
- add_neighbor( neigh4_hmap, gre1_outer_dst_ip, eth1_dst_mac_addr[0], eth1_dst_mac_addr[1], eth1_dst_mac_addr[2], eth1_dst_mac_addr[3], eth1_dst_mac_addr[4], eth1_dst_mac_addr[5] );
- add_neighbor( neigh4_hmap, gw_ip, eth1_dst_mac_addr[0], eth1_dst_mac_addr[1], eth1_dst_mac_addr[2], eth1_dst_mac_addr[3], eth1_dst_mac_addr[4], eth1_dst_mac_addr[5] );
+ add_neighbor( neigh4_hmap, gre0_outer_dst_ip, eth0_dst_mac_addr );
+ add_neighbor( neigh4_hmap, gre1_outer_dst_ip, eth1_dst_mac_addr );
+ add_neighbor( neigh4_hmap, gw_ip, eth1_dst_mac_addr );
FD_TEST( fd_neigh4_hmap_join(
- ctx->neigh4,
+ ctx->router.neigh4,
fd_topo_obj_laddr( topo, topo_tile->xdp.neigh4_obj_id ),
fd_topo_obj_laddr( topo, topo_tile->xdp.neigh4_ele_obj_id ) ) );
@@ -408,7 +402,7 @@ main( int argc,
ctx->netdev_buf_sz = fd_netdev_tbl_footprint( NETDEV_MAX, BOND_MASTER_MAX );
ctx->netdev_buf = FD_SCRATCH_ALLOC_APPEND( l, fd_netdev_tbl_align(), ctx->netdev_buf_sz );
fd_netdev_tbl_new( ctx->netdev_buf, NETDEV_MAX, BOND_MASTER_MAX );
- FD_TEST( fd_netdev_tbl_join( &ctx->netdev_tbl, ctx->netdev_buf ) );
+ FD_TEST( fd_netdev_tbl_join( &ctx->router.netdev_tbl, ctx->netdev_buf ) );
setup_netdev_table( ctx );
ctx->has_gre_interface = 1;
@@ -423,7 +417,7 @@ main( int argc,
ulong cr_avail = ULONG_MAX;
fd_stem_context_t stem[1] = {{
.mcaches = &rx_link->mcache,
- .seqs = &ctx->shred_out->seq,
+ .seqs = &ctx->out[ 0 ].seq,
.depths = &link_depth,
.cr_avail = &cr_avail,
.cr_decrement_amount = 0UL
@@ -487,7 +481,7 @@ main( int argc,
fd_memcpy( eth_mac_addrs_before_frag, eth1_dst_mac_addr, 6 );
fd_memcpy( eth_mac_addrs_before_frag + 6, eth1_src_mac_addr, 6 );
- struct {
+ struct __attribute__((packed)) {
fd_eth_hdr_t eth;
fd_ip4_hdr_t inner_ip4;
fd_udp_hdr_t udp;
@@ -509,27 +503,6 @@ main( int argc,
.data = {0xFF, 0xFF, 0}
};
- struct {
- fd_eth_hdr_t eth;
- fd_ip4_hdr_t outer_ip4;
- fd_gre_hdr_t gre;
- fd_ip4_hdr_t inner_ip4;
- fd_udp_hdr_t udp;
- uchar data[3];
- } tx_pkt_during_frag_gre = {
- .inner_ip4 = {
- .verihl = FD_IP4_VERIHL( 4, 5 ),
- .protocol = FD_IP4_HDR_PROTOCOL_UDP,
- .net_tot_len = fd_ushort_bswap( 31 ),
- .daddr = gre0_dst_ip
- },
- .udp = {
- .net_len = fd_ushort_bswap( 11 ),
- .net_dport = fd_ushort_bswap( SHRED_PORT )
- },
- .data = {0xFF, 0xFF, 0}
- };
-
struct __attribute__((packed)) {
fd_eth_hdr_t eth;
fd_ip4_hdr_t ip4;
@@ -634,7 +607,6 @@ main( int argc,
rx_pkt_gre.data[2] = (uchar)i;
rx_pkt.data[2] = (uchar)i;
tx_pkt_before_frag_gre.data[2] = (uchar)i;
- tx_pkt_during_frag_gre.data[2] = (uchar)i;
tx_pkt_before_during_frag.data[2] = (uchar)i;
tx_pkt_after_frag_gre.data[2] = (uchar)i;
tx_pkt_after_frag.data[2] = (uchar)i;
@@ -656,8 +628,6 @@ main( int argc,
void * during_frag_src;
ulong during_frag_src_sz;
- ulong during_frag_expected_sz;
- void * during_frag_expected;
void * after_frag_expected;
ulong after_frag_expected_sz;
@@ -668,7 +638,7 @@ main( int argc,
fd_memcpy( eth_mac_addrs_before_frag_gre, eth0_dst_mac_addr, 6 );
fd_memcpy( eth_mac_addrs_before_frag_gre + 6, eth0_src_mac_addr, 6 );
- xsk->if_idx = IF_IDX_ETH0;
+ xsk->if_idx = ctx->router.if_idx = IF_IDX_ETH0;
before_credit_input = &rx_pkt_gre;
before_credit_input_sz = sizeof(rx_pkt_gre);
before_credit_expected = &rx_pkt;
@@ -683,10 +653,8 @@ main( int argc,
gre_outer_dst_ip = gre0_outer_dst_ip;
use_gre = 1;
- tx_pkt_during_frag_gre.inner_ip4.daddr = gre1_dst_ip;
during_frag_src = &tx_pkt_before_frag_gre;
during_frag_src_sz = sizeof(tx_pkt_before_frag_gre);
- during_frag_expected = &tx_pkt_during_frag_gre;
after_frag_expected = &tx_pkt_after_frag_gre;
after_frag_expected_sz = sizeof(tx_pkt_after_frag_gre);
@@ -707,7 +675,7 @@ main( int argc,
fd_memcpy( eth_mac_addrs_before_frag_gre, eth1_dst_mac_addr, 6 );
fd_memcpy( eth_mac_addrs_before_frag_gre + 6, eth1_src_mac_addr, 6 );
- xsk->if_idx = IF_IDX_ETH1;
+ xsk->if_idx = ctx->router.if_idx = IF_IDX_ETH1;
before_credit_input = &rx_pkt_gre;
before_credit_input_sz = sizeof(rx_pkt_gre);
@@ -723,10 +691,8 @@ main( int argc,
gre_outer_dst_ip = gre1_outer_dst_ip;
use_gre = 1;
- tx_pkt_during_frag_gre.inner_ip4.daddr = gre1_dst_ip;
during_frag_src = &tx_pkt_before_frag_gre;
during_frag_src_sz = sizeof(tx_pkt_before_frag_gre);
- during_frag_expected = &tx_pkt_during_frag_gre;
after_frag_expected = &tx_pkt_after_frag_gre;
after_frag_expected_sz = sizeof(tx_pkt_after_frag_gre);
@@ -744,7 +710,7 @@ main( int argc,
break;
}
case 2: { // non-gre
- xsk->if_idx = IF_IDX_ETH1;
+ xsk->if_idx = ctx->router.if_idx = IF_IDX_ETH1;
before_credit_input = &rx_pkt;
before_credit_input_sz = sizeof(rx_pkt);
@@ -759,8 +725,6 @@ main( int argc,
during_frag_src = &tx_pkt_before_during_frag;
during_frag_src_sz = sizeof(tx_pkt_before_during_frag);
- during_frag_expected_sz = sizeof(tx_pkt_before_during_frag);
- during_frag_expected = &tx_pkt_before_during_frag;
after_frag_expected = &tx_pkt_after_frag;
after_frag_expected_sz = sizeof(tx_pkt_after_frag);
@@ -809,24 +773,22 @@ main( int argc,
ulong sig = fd_disco_netmux_sig( 0, SHRED_PORT, before_frag_dst_ip, DST_PROTO_OUTGOING, before_frag_hdr_sz );
FD_TEST( before_frag( ctx, 0, tx_seq, sig ) == 0 ) ;
- FD_TEST( ctx->tx_op.frame );
- FD_TEST( fd_memeq( ctx->tx_op.mac_addrs, before_frag_expected_mac_addr, 12 ) );
- FD_TEST( ctx->tx_op.src_ip==before_frag_expected_src_ip );
- FD_TEST( ctx->tx_op.use_gre == use_gre );
+ FD_TEST( fd_memeq( ctx->next_hop.mac_addrs, before_frag_expected_mac_addr, 12 ) );
+ FD_TEST( ctx->next_hop.src_ip==before_frag_expected_src_ip );
+ FD_TEST( (ctx->tx_action==FD_NET_HOP_GRE) == use_gre );
if( use_gre ) {
- FD_TEST( ctx->tx_op.gre_outer_src_ip==gre_outer_src_ip );
- FD_TEST( ctx->tx_op.gre_outer_dst_ip==gre_outer_dst_ip );
+ FD_TEST( ctx->next_hop.gre_src_ip==gre_outer_src_ip );
+ FD_TEST( ctx->next_hop.gre_dst_ip==gre_outer_dst_ip );
}
/* during_frag */
uchar * src = fd_chunk_to_laddr( ctx->in[ 0 ].mem, tx_chunk );
fd_memcpy( src, during_frag_src, during_frag_src_sz );
during_frag( ctx, 0, tx_seq, 0, tx_chunk, during_frag_src_sz, 0 );
- FD_TEST( fd_memeq( ctx->tx_op.frame, during_frag_expected, during_frag_expected_sz ) );
/* after_frag */
ulong tx_metric_before = ctx->metrics.tx_submit_cnt;
- after_frag( ctx, 0, tx_seq, 0, during_frag_expected_sz, 0, 0, NULL );
+ after_frag( ctx, 0, tx_seq, 0, during_frag_src_sz, 0, 0, NULL );
ulong tx_metric_after = ctx->metrics.tx_submit_cnt;
FD_TEST( tx_metric_before+1==tx_metric_after ); /* assert that XDP tile published a TX frame */
struct xdp_desc * tx_ring_entry = &xsk->ring_tx.packet_ring[xdp_tx_ring_prod-1];
@@ -834,7 +796,7 @@ main( int argc,
void * after_frag_output = (void *)((ulong)tx_ring_entry->addr + (ulong)ctx->umem_frame0);
FD_TEST( fd_memeq( after_frag_output, after_frag_expected, after_frag_expected_sz ) );
tx_seq++;
- tx_chunk = fd_dcache_compact_next( tx_chunk, during_frag_expected_sz, tx_chunk0, tx_wmark );
+ tx_chunk = fd_dcache_compact_next( tx_chunk, after_frag_expected_sz, tx_chunk0, tx_wmark );
}
FD_LOG_NOTICE(( "pass" ));
diff --git a/src/disco/netlink/fd_netlink_tile.c b/src/disco/netlink/fd_netlink_tile.c
index c765a0fbbc..a0d09e66d9 100644
--- a/src/disco/netlink/fd_netlink_tile.c
+++ b/src/disco/netlink/fd_netlink_tile.c
@@ -110,7 +110,7 @@ populate_allowed_seccomp( fd_topo_t const * topo,
struct sock_filter * out ) {
fd_netlink_tile_ctx_t * ctx = fd_topo_obj_laddr( topo, tile->tile_obj_id );
FD_TEST( ctx->magic==FD_NETLINK_TILE_CTX_MAGIC );
- populate_sock_filter_policy_netlink( out_cnt, out, (uint)fd_log_private_logfile_fd(), (uint)ctx->nl_monitor->fd, (uint)ctx->nl_req->fd, (uint)ctx->prober->sock_fd );
+ populate_sock_filter_policy_netlink( out_cnt, out, (uint)fd_log_private_logfile_fd(), (uint)ctx->nl_monitor->fd, (uint)ctx->nl_req->fd );
return sock_filter_policy_netlink_instr_cnt;
}
@@ -122,7 +122,7 @@ populate_allowed_fds( fd_topo_t const * topo,
fd_netlink_tile_ctx_t * ctx = fd_topo_obj_laddr( topo, tile->tile_obj_id );
FD_TEST( ctx->magic==FD_NETLINK_TILE_CTX_MAGIC );
- if( FD_UNLIKELY( out_fds_cnt<5UL ) ) FD_LOG_ERR(( "out_fds_cnt too low (%lu)", out_fds_cnt ));
+ if( FD_UNLIKELY( out_fds_cnt<4UL ) ) FD_LOG_ERR(( "out_fds_cnt too low (%lu)", out_fds_cnt ));
ulong out_cnt = 0UL;
out_fds[ out_cnt++ ] = 2; /* stderr */
@@ -130,7 +130,6 @@ populate_allowed_fds( fd_topo_t const * topo,
out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
out_fds[ out_cnt++ ] = ctx->nl_monitor->fd;
out_fds[ out_cnt++ ] = ctx->nl_req->fd;
- out_fds[ out_cnt++ ] = ctx->prober->sock_fd;
return out_cnt;
}
@@ -168,11 +167,6 @@ privileged_init( fd_topo_t * topo,
FD_LOG_ERR(( "bind(sock,RT_NETLINK,RTMGRP_{LINK,NEIGH,IPV4_ROUTE}) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
}
- float const max_probes_per_second = 3.f;
- ulong const max_probe_burst = 128UL;
- float const probe_delay_seconds = 15.f;
- fd_neigh4_prober_init( ctx->prober, max_probes_per_second, max_probe_burst, probe_delay_seconds );
-
/* Set duration of blocking reads in before_credit */
struct timeval tv = { .tv_usec = 2000 }; /* 2ms */
if( FD_UNLIKELY( 0!=setsockopt( ctx->nl_monitor->fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(struct timeval) ) ) ) {
@@ -204,10 +198,7 @@ unprivileged_init( fd_topo_t * topo,
ctx->fib4_local = fd_fib4_join( fd_topo_obj_laddr( topo, tile->netlink.fib4_local_obj_id ) ); FD_TEST( ctx->fib4_local );
ctx->fib4_main = fd_fib4_join( fd_topo_obj_laddr( topo, tile->netlink.fib4_main_obj_id ) ); FD_TEST( ctx->fib4_main );
- for( ulong i=0UL; iin_cnt; i++ ) {
- fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
- if( FD_UNLIKELY( link->mtu!=0UL ) ) FD_LOG_ERR(( "netlink solicit links must have an MTU of zero" ));
- }
+ if( FD_UNLIKELY( tile->in_cnt!=0 ) ) FD_LOG_ERR(( "netlink tile had unexpected input links" ));
ctx->action |= FD_NET_TILE_ACTION_LINK_UPDATE;
ctx->action |= FD_NET_TILE_ACTION_ROUTE4_UPDATE;
@@ -232,10 +223,6 @@ metrics_write( fd_netlink_tile_ctx_t * ctx ) {
FD_MGAUGE_SET( NETLNK, INTERFACE_COUNT, ctx->netdev_tbl->hdr->dev_cnt );
FD_MGAUGE_SET( NETLNK, ROUTE_COUNT_LOCAL, fd_fib4_cnt( ctx->fib4_local ) );
FD_MGAUGE_SET( NETLNK, ROUTE_COUNT_MAIN, fd_fib4_cnt( ctx->fib4_main ) );
- FD_MCNT_SET( NETLNK, NEIGH_PROBE_SENT, ctx->metrics.neigh_solicits_sent );
- FD_MCNT_SET( NETLNK, NEIGH_PROBE_FAILS, ctx->metrics.neigh_solicits_fails );
- FD_MCNT_SET( NETLNK, NEIGH_PROBE_RATE_LIMIT_HOST, ctx->prober->local_rate_limited_cnt );
- FD_MCNT_SET( NETLNK, NEIGH_PROBE_RATE_LIMIT_GLOBAL, ctx->prober->global_rate_limited_cnt );
}
/* netlink_monitor_read calls recvfrom to process a link, route, or
@@ -337,81 +324,6 @@ before_credit( fd_netlink_tile_ctx_t * ctx,
}
-/* after_poll_overrun is called when fd_stem.c was overrun while
- checking for new fragments. This typically happens when
- before_credit takes too long (e.g. we were in a blocking netlink
- read) */
-
-static void
-after_poll_overrun( fd_netlink_tile_ctx_t * ctx ) {
- ctx->idle_cnt = -1L;
-}
-
-/* after_frag handles a neighbor solicit request */
-
-static void
-after_frag( fd_netlink_tile_ctx_t * ctx,
- ulong in_idx,
- ulong seq,
- ulong sig,
- ulong sz,
- ulong tsorig,
- ulong tspub,
- fd_stem_context_t * stem ) {
- (void)in_idx; (void)seq; (void)tsorig; (void)tspub; (void)stem;
-
- long now = fd_tickcount();
- ctx->idle_cnt = -1L;
-
- /* Parse request (fully contained in sig field) */
-
- if( FD_UNLIKELY( sz!=0UL ) ) {
- FD_LOG_WARNING(( "unexpected sz %lu", sz ));
- }
- if( FD_UNLIKELY( sig>>48 ) ) {
- FD_LOG_WARNING(( "unexpected high bits in sig %016lx", sig ));
- }
- ushort if_idx = (ushort)(sig>>32);
- uint ip4_addr = (uint)sig;
- if( FD_UNLIKELY( if_idx!=ctx->neigh4_ifidx ) ) {
- ctx->metrics.neigh_solicits_fails++;
- FD_LOG_ERR(( "received neighbor solicit request for invalid interface index %u", if_idx ));
- return;
- }
-
- /* Drop if the kernel is already working on the request */
-
- fd_neigh4_hmap_query_t query[1];
- int spec_res = fd_neigh4_hmap_query_try( ctx->neigh4, &ip4_addr, NULL, query, 0 );
- if( spec_res==FD_MAP_SUCCESS ) {
- ctx->metrics.neigh_solicits_fails++;
- return;
- }
-
- /* Insert placeholder (take above branch next time) */
-
- int prepare_res = fd_neigh4_hmap_prepare( ctx->neigh4, &ip4_addr, NULL, query, 0 );
- if( FD_UNLIKELY( prepare_res!=FD_MAP_SUCCESS ) ) {
- ctx->metrics.neigh_solicits_fails++;
- return;
- }
- fd_neigh4_entry_t * ele = fd_neigh4_hmap_query_ele( query );
- ele->state = FD_NEIGH4_STATE_INCOMPLETE;
- ele->ip4_addr = ip4_addr;
- memset( ele->mac_addr, 0, 6UL );
- fd_neigh4_hmap_publish( query );
-
- /* Trigger neighbor solicit via netlink */
-
- int probe_res = fd_neigh4_probe_rate_limited( ctx->prober, ele, ip4_addr, now );
- if( probe_res==0 ) {
- ctx->metrics.neigh_solicits_sent++;
- } else if( probe_res>0 ) {
- ctx->metrics.neigh_solicits_fails++;
- }
-
-}
-
#define STEM_BURST (1UL)
#define STEM_LAZY ((ulong)13e6) /* 13ms */
@@ -421,8 +333,6 @@ after_frag( fd_netlink_tile_ctx_t * ctx,
#define STEM_CALLBACK_METRICS_WRITE metrics_write
#define STEM_CALLBACK_DURING_HOUSEKEEPING during_housekeeping
#define STEM_CALLBACK_BEFORE_CREDIT before_credit
-#define STEM_CALLBACK_AFTER_POLL_OVERRUN after_poll_overrun
-#define STEM_CALLBACK_AFTER_FRAG after_frag
#include "../stem/fd_stem.c"
diff --git a/src/disco/netlink/fd_netlink_tile.h b/src/disco/netlink/fd_netlink_tile.h
index 6ad68399ad..e6276aa52b 100644
--- a/src/disco/netlink/fd_netlink_tile.h
+++ b/src/disco/netlink/fd_netlink_tile.h
@@ -16,17 +16,6 @@
extern fd_topo_run_tile_t fd_tile_netlnk;
-/* fd_netlink_neigh4_solicit_link_t holds information required to send
- neighbor solicitation requests to the netlink tile. */
-
-struct fd_netlink_neigh4_solicit_link {
- fd_frag_meta_t * mcache;
- ulong depth;
- ulong seq;
-};
-
-typedef struct fd_netlink_neigh4_solicit_link fd_netlink_neigh4_solicit_link_t;
-
struct fdctl_config;
FD_PROTOTYPES_BEGIN
@@ -44,21 +33,6 @@ fd_netlink_topo_join( fd_topo_t * topo,
fd_topo_tile_t * netlink_tile,
fd_topo_tile_t * join_tile );
-/* fd_netlink_neigh4_solicit requests a neighbor solicitation (i.e. ARP
- request) for an IPv4 address. Safe to call at a high rate. The
- netlink tile will deduplicate requests. ip4_addr is big endian. */
-
-static inline void
-fd_netlink_neigh4_solicit( fd_netlink_neigh4_solicit_link_t * link,
- uint ip4_addr,
- uint if_idx,
- ulong tspub_comp ) {
- ulong seq = link->seq;
- ulong sig = (ulong)ip4_addr | ( (ulong)if_idx<<32 );
- fd_mcache_publish( link->mcache, link->depth, seq, sig, 0UL, 0UL, 0UL, 0UL, tspub_comp );
- link->seq = fd_seq_inc( seq, 1UL );
-}
-
FD_PROTOTYPES_END
#endif /* HEADER_fd_src_disco_netlink_fd_netlink_tile_h */
diff --git a/src/disco/netlink/fd_netlink_tile_private.h b/src/disco/netlink/fd_netlink_tile_private.h
index 3addf77994..7de909abc2 100644
--- a/src/disco/netlink/fd_netlink_tile_private.h
+++ b/src/disco/netlink/fd_netlink_tile_private.h
@@ -7,7 +7,6 @@
#include "../../waltz/mib/fd_dbl_buf.h"
#include "../../waltz/mib/fd_netdev_tbl.h"
#include "../../waltz/neigh/fd_neigh4_map.h"
-#include "../../waltz/neigh/fd_neigh4_probe.h"
/* FD_NETLINK_TILE_CTX_MAGIC uniquely identifies a fd_netlink_tile_ctx_t.
CHange this whenever the fd_netlink_tile_ctx_t struct changes. */
@@ -46,15 +45,10 @@ struct fd_netlink_tile_ctx {
uint neigh4_ifidx;
long idle_cnt;
- /* Neighbor table prober */
- fd_neigh4_prober_t prober[1];
-
struct {
ulong link_full_syncs;
ulong route_full_syncs;
ulong update_cnt[ FD_METRICS_COUNTER_NETLNK_UPDATES_CNT ];
- ulong neigh_solicits_sent;
- ulong neigh_solicits_fails;
} metrics;
};
diff --git a/src/disco/netlink/generated/netlink_seccomp.h b/src/disco/netlink/generated/netlink_seccomp.h
index 5108320587..dca8921f61 100644
--- a/src/disco/netlink/generated/netlink_seccomp.h
+++ b/src/disco/netlink/generated/netlink_seccomp.h
@@ -21,14 +21,14 @@
#else
# error "Target architecture is unsupported by seccomp."
#endif
-static const unsigned int sock_filter_policy_netlink_instr_cnt = 46;
+static const unsigned int sock_filter_policy_netlink_instr_cnt = 36;
-static void populate_sock_filter_policy_netlink( ulong out_cnt, struct sock_filter * out, uint logfile_fd, uint nl_mon_fd, uint nl_req_fd, uint arp_probe_fd ) {
- FD_TEST( out_cnt >= 46 );
- struct sock_filter filter[46] = {
+static void populate_sock_filter_policy_netlink( ulong out_cnt, struct sock_filter * out, uint logfile_fd, uint nl_mon_fd, uint nl_req_fd ) {
+ FD_TEST( out_cnt >= 36 );
+ struct sock_filter filter[36] = {
/* Check: Jump to RET_KILL_PROCESS if the script's arch != the runtime arch */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, ( offsetof( struct seccomp_data, arch ) ) ),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, ARCH_NR, 0, /* RET_KILL_PROCESS */ 42 ),
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, ARCH_NR, 0, /* RET_KILL_PROCESS */ 32 ),
/* loading syscall number in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, ( offsetof( struct seccomp_data, nr ) ) ),
/* allow write based on expression */
@@ -38,78 +38,58 @@ static void populate_sock_filter_policy_netlink( ulong out_cnt, struct sock_filt
/* allow sendto based on expression */
BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, SYS_sendto, /* check_sendto */ 8, 0 ),
/* allow recvfrom based on expression */
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, SYS_recvfrom, /* check_recvfrom */ 25, 0 ),
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, SYS_recvfrom, /* check_recvfrom */ 15, 0 ),
/* none of the syscalls matched */
- { BPF_JMP | BPF_JA, 0, 0, /* RET_KILL_PROCESS */ 36 },
+ { BPF_JMP | BPF_JA, 0, 0, /* RET_KILL_PROCESS */ 26 },
// check_write:
/* load syscall argument 0 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 2, /* RET_ALLOW */ 35, /* lbl_1 */ 0 ),
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 2, /* RET_ALLOW */ 25, /* lbl_1 */ 0 ),
// lbl_1:
/* load syscall argument 0 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, logfile_fd, /* RET_ALLOW */ 33, /* RET_KILL_PROCESS */ 32 ),
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, logfile_fd, /* RET_ALLOW */ 23, /* RET_KILL_PROCESS */ 22 ),
// check_fsync:
/* load syscall argument 0 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, logfile_fd, /* RET_ALLOW */ 31, /* RET_KILL_PROCESS */ 30 ),
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, logfile_fd, /* RET_ALLOW */ 21, /* RET_KILL_PROCESS */ 20 ),
// check_sendto:
/* load syscall argument 0 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, nl_req_fd, /* lbl_3 */ 0, /* lbl_2 */ 6 ),
-// lbl_3:
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, nl_req_fd, /* lbl_2 */ 0, /* RET_KILL_PROCESS */ 18 ),
+// lbl_2:
/* load syscall argument 3 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[3])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_4 */ 0, /* lbl_2 */ 4 ),
-// lbl_4:
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_3 */ 0, /* RET_KILL_PROCESS */ 16 ),
+// lbl_3:
/* load syscall argument 4 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[4])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_5 */ 0, /* lbl_2 */ 2 ),
-// lbl_5:
- /* load syscall argument 5 in accumulator */
- BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[5])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* RET_ALLOW */ 23, /* lbl_2 */ 0 ),
-// lbl_2:
- /* load syscall argument 0 in accumulator */
- BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, arp_probe_fd, /* lbl_6 */ 0, /* RET_KILL_PROCESS */ 20 ),
-// lbl_6:
- /* load syscall argument 1 in accumulator */
- BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[1])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_7 */ 0, /* RET_KILL_PROCESS */ 18 ),
-// lbl_7:
- /* load syscall argument 2 in accumulator */
- BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[2])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_8 */ 0, /* RET_KILL_PROCESS */ 16 ),
-// lbl_8:
- /* load syscall argument 3 in accumulator */
- BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[3])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, MSG_DONTWAIT, /* lbl_9 */ 0, /* RET_KILL_PROCESS */ 14 ),
-// lbl_9:
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_4 */ 0, /* RET_KILL_PROCESS */ 14 ),
+// lbl_4:
/* load syscall argument 5 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[5])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, FD_SOCKADDR_IN_SZ, /* RET_ALLOW */ 13, /* RET_KILL_PROCESS */ 12 ),
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* RET_ALLOW */ 13, /* RET_KILL_PROCESS */ 12 ),
// check_recvfrom:
/* load syscall argument 0 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, nl_mon_fd, /* lbl_10 */ 2, /* lbl_11 */ 0 ),
-// lbl_11:
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, nl_mon_fd, /* lbl_5 */ 2, /* lbl_6 */ 0 ),
+// lbl_6:
/* load syscall argument 0 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[0])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, nl_req_fd, /* lbl_10 */ 0, /* RET_KILL_PROCESS */ 8 ),
-// lbl_10:
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, nl_req_fd, /* lbl_5 */ 0, /* RET_KILL_PROCESS */ 8 ),
+// lbl_5:
/* load syscall argument 3 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[3])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_12 */ 2, /* lbl_13 */ 0 ),
-// lbl_13:
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_7 */ 2, /* lbl_8 */ 0 ),
+// lbl_8:
/* load syscall argument 3 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[3])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, MSG_DONTWAIT, /* lbl_12 */ 0, /* RET_KILL_PROCESS */ 4 ),
-// lbl_12:
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, MSG_DONTWAIT, /* lbl_7 */ 0, /* RET_KILL_PROCESS */ 4 ),
+// lbl_7:
/* load syscall argument 4 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[4])),
- BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_14 */ 0, /* RET_KILL_PROCESS */ 2 ),
-// lbl_14:
+ BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* lbl_9 */ 0, /* RET_KILL_PROCESS */ 2 ),
+// lbl_9:
/* load syscall argument 5 in accumulator */
BPF_STMT( BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[5])),
BPF_JUMP( BPF_JMP | BPF_JEQ | BPF_K, 0, /* RET_ALLOW */ 1, /* RET_KILL_PROCESS */ 0 ),
diff --git a/src/disco/netlink/netlink.seccomppolicy b/src/disco/netlink/netlink.seccomppolicy
index c54586087a..a6411f889c 100644
--- a/src/disco/netlink/netlink.seccomppolicy
+++ b/src/disco/netlink/netlink.seccomppolicy
@@ -3,8 +3,7 @@
#
# nl_mon_fd: An rtnetlink socket used to monitor updates
# nl_req_fd: An rtnetlink socket used for request-reply
-# arp_probe_fd: A UDP socket used to indirectly generate ARP probes
-uint logfile_fd, uint nl_mon_fd, uint nl_req_fd, uint arp_probe_fd
+uint logfile_fd, uint nl_mon_fd, uint nl_req_fd
# logging: all log messages are written to a file and/or pipe
#
@@ -24,19 +23,11 @@ fsync: (eq (arg 0) logfile_fd)
# nl_req_fd: Periodically send read-only/unprivileged rtnetlink requests
#
-# arp_probe_fd: Send UDP packets that cause the kernel to generate ARP
-# requests
-#
# (In theory could use send(2) but that syscall doesn't exist on arm64)
-sendto: (or (and (eq (arg 0) nl_req_fd)
- (eq (arg 3) 0)
- (eq (arg 4) 0)
- (eq (arg 5) 0))
- (and (eq (arg 0) arp_probe_fd)
- (eq (arg 1) 0)
- (eq (arg 2) 0)
- (eq (arg 3) MSG_DONTWAIT)
- (eq (arg 5) FD_SOCKADDR_IN_SZ)))
+sendto: (and (eq (arg 0) nl_req_fd)
+ (eq (arg 3) 0)
+ (eq (arg 4) 0)
+ (eq (arg 5) 0))
# nl_mon_fd: Monitor for asynchronous rtnetlink updates
#
diff --git a/src/disco/shred/fd_shred_tile.c b/src/disco/shred/fd_shred_tile.c
index b991dfca18..45ce24aed4 100644
--- a/src/disco/shred/fd_shred_tile.c
+++ b/src/disco/shred/fd_shred_tile.c
@@ -1,4 +1,4 @@
-#include "../tiles.h"
+#include "fd_shred_tile.h"
#include "generated/fd_shred_tile_seccomp.h"
#include "../../util/pod/fd_pod_format.h"
@@ -172,6 +172,7 @@ typedef struct {
ushort net_id;
int skip_frag;
+ int ping_frag;
ulong adtl_dests_leader_cnt;
fd_shred_dest_weighted_t adtl_dests_leader [ FD_TOPO_ADTL_DESTS_MAX ];
@@ -349,6 +350,64 @@ before_frag( fd_shred_ctx_t * ctx,
return 0;
}
+/* *** Ping forwarding ***
+ Solana peers use a simple 'ping-pong' to do a primitive form of
+ address and endpoint validation. (Mainly defeats reflection-like
+ flood attacks, by allowing receivers to decline flows)
+
+ Unfortunately, ping-pong flows that belong to the repair tile reuse
+ the 'repair intake' port, which ends up at the shred tile. So, the
+ shred tile must forward pings back to repair. */
+
+static int
+forward_ping_prepare(
+ fd_shred_ctx_t * ctx,
+ uchar const * buf,
+ ulong sz
+) {
+ /* Don't do anything if the repair tile doesn't exist in this topology */
+ if( FD_UNLIKELY( !ctx->repair_out_mem ) ) return 0;
+
+ /* Extract IPv4 and UDP info, so downstream can generate a response */
+ if( FD_UNLIKELY( szrepair link.
+ Pings are smaller than the shred_repair MTU, so we always have
+ space to send a frame. */
+ FD_STATIC_ASSERT( FD_SHRED_REPAIR_MTU>=sizeof(fd_repair_ping_fwd_t), mtu );
+ fd_repair_ping_fwd_t * dst = fd_chunk_to_laddr( ctx->repair_out_mem, ctx->repair_out_chunk );
+ dst->src_ip4 = ip4.saddr;
+ dst->src_port = fd_ushort_bswap( udp.net_sport );
+ fd_memcpy( dst->ping, buf, FD_REPAIR_PING_SZ );
+ return 1;
+}
+
+static void
+forward_ping_commit(
+ fd_shred_ctx_t * ctx,
+ fd_stem_context_t * stem
+) {
+ /* Don't do anything if the repair tile doesn't exist in this topology */
+ if( FD_UNLIKELY( !ctx->repair_out_mem ) ) return;
+
+ /* Commit a previous shred->repair ping forward */
+ ulong out_idx = ctx->repair_out_idx;
+ ulong sig = ULONG_MAX; /* repair ping */
+ ulong chunk = ctx->repair_out_chunk;
+ ulong sz = sizeof(fd_repair_ping_fwd_t);
+ ulong ctl = 0UL; /* unused */
+ ulong tsorig = 0UL; /* TODO forward tsorig from upstream packet */
+ ulong tspub = fd_frag_meta_ts_comp( fd_tickcount() );
+ fd_stem_publish( stem, out_idx, sig, chunk, sz, ctl, tsorig, tspub );
+
+ /* Wind up for next iteration */
+ ctx->repair_out_chunk = fd_dcache_compact_next( chunk, sz, ctx->repair_out_chunk0, ctx->repair_out_wmark );
+}
+
static void
during_frag( fd_shred_ctx_t * ctx,
ulong in_idx,
@@ -635,6 +694,16 @@ during_frag( fd_shred_ctx_t * ctx,
uchar const * dcache_entry = fd_net_rx_translate_frag( &ctx->in[ in_idx ].net_rx, chunk, ctl, sz );
ulong hdr_sz = fd_disco_netmux_sig_hdr_sz( sig );
FD_TEST( hdr_sz <= sz ); /* Should be ensured by the net tile */
+
+ /* Ping traffic generated by the repair tile can end up with the
+ shred tile ~ forward it. */
+ if( FD_UNLIKELY( (sz-hdr_sz)==FD_REPAIR_PING_SZ ) ) {
+ ctx->ping_frag = !!forward_ping_prepare( ctx, dcache_entry, sz );
+ return;
+ } else {
+ ctx->ping_frag = 0;
+ }
+
fd_shred_t const * shred = fd_shred_parse( dcache_entry+hdr_sz, sz-hdr_sz );
if( FD_UNLIKELY( !shred ) ) {
ctx->skip_frag = 1;
@@ -808,6 +877,11 @@ after_frag( fd_shred_ctx_t * ctx,
ulong fanout = 200UL; /* Default Agave's DATA_PLANE_FANOUT = 200UL */
if( FD_LIKELY( ctx->in_kind[ in_idx ]==IN_KIND_NET ) ) {
+ if( FD_UNLIKELY( ctx->ping_frag ) ) {
+ forward_ping_commit( ctx, stem );
+ return;
+ }
+
uchar * shred_buffer = ctx->shred_buffer;
ulong shred_buffer_sz = ctx->shred_buffer_sz;
diff --git a/src/disco/shred/fd_shred_tile.h b/src/disco/shred/fd_shred_tile.h
index a8b472c315..1fd3b7534d 100644
--- a/src/disco/shred/fd_shred_tile.h
+++ b/src/disco/shred/fd_shred_tile.h
@@ -4,12 +4,30 @@
#include "../tiles.h"
#include "../../flamenco/types/fd_types_custom.h"
+/* FD_REPAIR_PING_SZ is the UDP payload size of a 'ping'-related packet.
+ These incoming packets are forwarded to the repair tile. */
+#define FD_REPAIR_PING_SZ (132UL)
+
+struct fd_repair_ping_fwd {
+ uint src_ip4;
+ uint dst_ip4;
+ ushort src_port;
+
+ /* FIXME: Just have a wire-format struct here for the ping frame.
+ This is currently not possible due to use of fd_types, which is not
+ guaranteed to have the same in-memory format as the wire format. */
+ uchar ping[ FD_REPAIR_PING_SZ ];
+};
+
+typedef struct fd_repair_ping_fwd fd_repair_ping_fwd_t;
+
/* Forward declarations */
typedef struct fd_fec_resolver fd_fec_resolver_t;
typedef struct fd_keyswitch_private fd_keyswitch_t;
typedef struct fd_keyguard_client fd_keyguard_client_t;
-/* Shred tile context structure */
+/* Part of the shred tile context struct
+ FIXME remove this and just use fd_shred_ctx_t everywhere */
typedef struct {
fd_shredder_t * shredder;
fd_fec_resolver_t * resolver;
@@ -27,6 +45,6 @@ typedef struct {
fd_keyswitch_t * keyswitch;
fd_keyguard_client_t keyguard_client[1];
/* ... rest of the structure members ... */
-} fd_shred_ctx_t;
+} fd_shred_ctx_hdr_t;
#endif /* HEADER_fd_src_disco_shred_fd_shred_tile_h */
diff --git a/src/disco/topo/fd_topo.h b/src/disco/topo/fd_topo.h
index d6c446f83e..5ce1fb7bbe 100644
--- a/src/disco/topo/fd_topo.h
+++ b/src/disco/topo/fd_topo.h
@@ -29,13 +29,16 @@
/* Maximum number of additional destinations for leader shreds and for retransmitted shreds */
#define FD_TOPO_ADTL_DESTS_MAX ( 32UL)
+#define FD_TOPO_LINK_NAME_SZ (13UL)
+
+#define FD_TOPO_NET_RX_RULE_MAX (32UL)
/* A workspace is a Firedancer specific memory management structure that
sits on top of 1 or more memory mapped gigantic or huge pages mounted
to the hugetlbfs. */
typedef struct {
ulong id; /* The ID of this workspace. Indexed from [0, wksp_cnt). When placed in a topology, the ID must be the index of the workspace in the workspaces list. */
- char name[ 13UL ]; /* The name of this workspace, like "pack". There can be at most one of each workspace name in a topology. */
+ char name[ FD_TOPO_LINK_NAME_SZ ]; /* The name of this workspace, like "pack". There can be at most one of each workspace name in a topology. */
ulong numa_idx; /* The index of the NUMA node on the system that this workspace should be allocated from. */
@@ -90,17 +93,53 @@ typedef struct {
ushort port; /* in host byte order */
} fd_topo_ip_port_t;
+struct fd_topo_net_rx_rule {
+ ushort port;
+ ushort proto_id;
+ char link[ FD_TOPO_LINK_NAME_SZ ];
+};
+typedef struct fd_topo_net_rx_rule fd_topo_net_rx_rule_t;
+
+struct fd_topo_net_rx {
+ fd_topo_net_rx_rule_t rx_rules[ FD_TOPO_NET_RX_RULE_MAX ];
+ ushort rx_rule_cnt;
+};
+typedef struct fd_topo_net_rx fd_topo_net_rx_t;
+
+static inline void
+fd_topo_net_rx_rule_push( fd_topo_net_rx_t * net,
+ ushort dst_id,
+ char const * link_name,
+ ushort port ) {
+ ulong const prev_rule_cnt = net->rx_rule_cnt;
+ if( FD_UNLIKELY( prev_rule_cnt>=FD_TOPO_NET_RX_RULE_MAX ) ) {
+ FD_LOG_ERR(( "too many net rx rules" ));
+ }
+
+ for( ulong i=0UL; irx_rules[ i ].port==port ) {
+ FD_LOG_ERR(( "duplicate net rx rule for port %hu", port ));
+ }
+ }
+
+ fd_topo_net_rx_rule_t * rule = &net->rx_rules[ prev_rule_cnt ];
+ fd_memset( rule, 0, sizeof(fd_topo_net_rx_rule_t) );
+ rule->port = port;
+ rule->proto_id = dst_id;
+
+ ulong link_name_len = strnlen( link_name, FD_TOPO_LINK_NAME_SZ );
+ if( FD_UNLIKELY( link_name_len>=FD_TOPO_LINK_NAME_SZ ) ) {
+ FD_LOG_ERR(( "link name too long: \"%s\"", link_name ));
+ }
+ fd_cstr_fini( fd_cstr_append_text( fd_cstr_init( rule->link ), link_name, link_name_len ) );
+
+ net->rx_rule_cnt = (ushort)( prev_rule_cnt+1UL );
+}
+
struct fd_topo_net_tile {
ulong umem_dcache_obj_id; /* dcache for XDP UMEM frames */
uint bind_address;
-
- ushort shred_listen_port;
- ushort quic_transaction_listen_port;
- ushort legacy_transaction_listen_port;
- ushort gossip_listen_port;
- ushort repair_intake_listen_port;
- ushort repair_serve_listen_port;
- ushort send_src_port;
+ fd_topo_net_rx_t rx_rules;
};
typedef struct fd_topo_net_tile fd_topo_net_tile_t;
diff --git a/src/disco/topo/fd_topo_run.c b/src/disco/topo/fd_topo_run.c
index 4fcacd4325..209d9e9ef3 100644
--- a/src/disco/topo/fd_topo_run.c
+++ b/src/disco/topo/fd_topo_run.c
@@ -252,22 +252,18 @@ fd_topo_install_xdp( fd_topo_t const * topo,
FD_TEST( net0_tile_idx!=ULONG_MAX );
fd_topo_tile_t const * net0_tile = &topo->tiles[ net0_tile_idx ];
- ushort udp_port_candidates[] = {
- (ushort)net0_tile->xdp.net.legacy_transaction_listen_port,
- (ushort)net0_tile->xdp.net.quic_transaction_listen_port,
- (ushort)net0_tile->xdp.net.shred_listen_port,
- (ushort)net0_tile->xdp.net.gossip_listen_port,
- (ushort)net0_tile->xdp.net.repair_intake_listen_port,
- (ushort)net0_tile->xdp.net.repair_serve_listen_port,
- (ushort)net0_tile->xdp.net.send_src_port,
- };
+ ulong const rule_cnt = net0_tile->net.rx_rules.rx_rule_cnt;
+ ushort udp_port_candidates[ FD_TOPO_NET_RX_RULE_MAX ];
+ for( ulong i=0UL; inet.rx_rules.rx_rules[ i ].port;
+ }
uint if_idx = if_nametoindex( net0_tile->xdp.interface );
if( FD_UNLIKELY( !if_idx ) ) FD_LOG_ERR(( "if_nametoindex(%s) failed", net0_tile->xdp.interface ));
fd_xdp_fds_t xdp_fds = fd_xdp_install( if_idx,
bind_addr,
- sizeof(udp_port_candidates)/sizeof(udp_port_candidates[0]),
+ rule_cnt,
udp_port_candidates,
net0_tile->xdp.xdp_mode );
if( FD_UNLIKELY( -1==dup2( xdp_fds.xsk_map_fd, 123462 ) ) ) FD_LOG_ERR(( "dup2() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
diff --git a/src/discof/repair/fd_repair_tile.c b/src/discof/repair/fd_repair_tile.c
index 206cb252aa..21abf79695 100644
--- a/src/discof/repair/fd_repair_tile.c
+++ b/src/discof/repair/fd_repair_tile.c
@@ -2,6 +2,7 @@
#define _GNU_SOURCE
#include "../../disco/topo/fd_topo.h"
+#include "../../disco/shred/fd_shred_tile.h"
#include "generated/fd_repair_tile_seccomp.h"
#include "../../flamenco/repair/fd_repair.h"
@@ -253,14 +254,12 @@ handle_new_cluster_contact_info( fd_repair_tile_ctx_t * ctx,
}
}
-ulong
-fd_repair_handle_ping( fd_repair_tile_ctx_t * repair_tile_ctx,
- fd_repair_t * glob,
- fd_gossip_ping_t const * ping,
- fd_gossip_peer_addr_t const * peer_addr FD_PARAM_UNUSED,
- uint self_ip4_addr FD_PARAM_UNUSED,
- uchar * msg_buf,
- ulong msg_buf_sz ) {
+static ulong
+fd_repair_handle_ping( fd_repair_tile_ctx_t * repair_tile_ctx,
+ fd_repair_t * glob,
+ fd_gossip_ping_t const * ping,
+ uchar * msg_buf,
+ ulong msg_buf_sz ) {
fd_repair_protocol_t protocol;
fd_repair_protocol_new_disc(&protocol, fd_repair_protocol_enum_pong);
fd_gossip_ping_t * pong = &protocol.inner.pong;
@@ -286,8 +285,22 @@ fd_repair_handle_ping( fd_repair_tile_ctx_t * repair_tile_ctx,
return buflen;
}
+static void
+fd_repair_handle_ping1( fd_repair_tile_ctx_t * repair_tile_ctx,
+ fd_repair_t * glob,
+ fd_stem_context_t * stem,
+ fd_gossip_ping_t const * ping,
+ uint const src_ip,
+ uint const dst_port,
+ ushort const src_port ) {
+ uchar buf[1024];
+ ulong buflen = fd_repair_handle_ping( repair_tile_ctx, glob, ping, buf, sizeof(buf) );
+ ulong tsorig = fd_frag_meta_ts_comp( fd_tickcount() );
+ send_packet( repair_tile_ctx, stem, 1, src_ip, src_port, dst_port, buf, buflen, tsorig );
+}
+
/* Pass a raw client response packet into the protocol. addr is the address of the sender */
-static int
+static void
fd_repair_recv_clnt_packet( fd_repair_tile_ctx_t * repair_tile_ctx,
fd_stem_context_t * stem,
fd_repair_t * glob,
@@ -297,35 +310,25 @@ fd_repair_recv_clnt_packet( fd_repair_tile_ctx_t * repair_tile_ctx,
uint dst_ip4_addr ) {
glob->metrics.recv_clnt_pkt++;
- FD_SCRATCH_SCOPE_BEGIN {
- while( 1 ) {
- ulong decoded_sz;
- fd_repair_response_t * gmsg = fd_bincode_decode1_scratch(
- repair_response, msg, msglen, NULL, &decoded_sz );
- if( FD_UNLIKELY( !gmsg ) ) {
- /* Solana falls back to assuming we got a shred in this case
- https://github.com/solana-labs/solana/blob/master/core/src/repair/serve_repair.rs#L1198 */
- break;
- }
- if( FD_UNLIKELY( decoded_sz != msglen ) ) {
- break;
- }
-
- switch( gmsg->discriminant ) {
- case fd_repair_response_enum_ping:
- {
- uchar buf[1024];
- ulong buflen = fd_repair_handle_ping( repair_tile_ctx, glob, &gmsg->inner.ping, src_addr, dst_ip4_addr, buf, sizeof(buf) );
- ulong tsorig = fd_frag_meta_ts_comp( fd_tickcount() );
- send_packet( repair_tile_ctx, stem, 1, src_addr->addr, src_addr->port, dst_ip4_addr, buf, buflen, tsorig );
- break;
- }
- }
-
- return 0;
+ if( FD_UNLIKELY( msglenmetrics.recv_pkt_corrupted_msg++;
+ return;
+ }
+ uint msg_type = FD_LOAD( uint, msg );
+ msg += sizeof(uint);
+ msglen -= sizeof(uint);
+
+ switch( msg_type ) {
+ case 0: /* ping */
+ if( FD_UNLIKELY( msglen!=132 ) ) {
+ glob->metrics.recv_pkt_corrupted_msg++;
+ return;
}
- } FD_SCRATCH_SCOPE_END;
- return 0;
+ fd_repair_handle_ping1( repair_tile_ctx, glob, stem, fd_type_pun_const( msg ), src_addr->addr, dst_ip4_addr, src_addr->port );
+ break;
+ default:
+ break;
+ }
}
static ulong
@@ -427,7 +430,10 @@ before_frag( fd_repair_tile_ctx_t * ctx,
ulong sig ) {
uint in_kind = ctx->in_kind[ in_idx ];
if( FD_LIKELY ( in_kind==IN_KIND_NET ) ) return fd_disco_netmux_sig_proto( sig )!=DST_PROTO_REPAIR;
- if( FD_UNLIKELY( in_kind==IN_KIND_SHRED ) ) return fd_int_if( fd_forest_root_slot( ctx->forest )==ULONG_MAX, -1, 0 ); /* not ready to read frag */
+ if( FD_UNLIKELY( in_kind==IN_KIND_SHRED ) ) {
+ if( FD_UNLIKELY( sig==ULONG_MAX ) ) return 0; /* repair ping */
+ return fd_int_if( fd_forest_root_slot( ctx->forest )==ULONG_MAX, -1, 0 ); /* not ready to read frag */
+ }
return 0;
}
@@ -646,6 +652,13 @@ after_frag( fd_repair_tile_ctx_t * ctx,
return;
}
+ if( FD_UNLIKELY( in_kind==IN_KIND_NET && sig==ULONG_MAX ) ) {
+ fd_repair_ping_fwd_t const * fwd = fd_type_pun_const( ctx->buffer );
+ fd_gossip_ping_t const * ping = fd_type_pun_const( fwd->ping );
+ fd_repair_handle_ping1( ctx, ctx->repair, stem, ping, fwd->src_ip4, fwd->dst_ip4, fwd->src_port );
+ return;
+ }
+
fd_eth_hdr_t const * eth = (fd_eth_hdr_t const *)ctx->buffer;
fd_ip4_hdr_t const * ip4 = (fd_ip4_hdr_t const *)( (ulong)eth + sizeof(fd_eth_hdr_t) );
fd_udp_hdr_t const * udp = (fd_udp_hdr_t const *)( (ulong)ip4 + FD_IP4_GET_LEN( *ip4 ) );
diff --git a/src/flamenco/types/fd_fuzz_types.h b/src/flamenco/types/fd_fuzz_types.h
index 0bd02308a7..91f7a206ca 100644
--- a/src/flamenco/types/fd_fuzz_types.h
+++ b/src/flamenco/types/fd_fuzz_types.h
@@ -3675,23 +3675,6 @@ void *fd_repair_protocol_generate( void *mem, void **alloc_mem, fd_rng_t * rng )
return mem;
}
-void fd_repair_response_inner_generate( fd_repair_response_inner_t * self, void **alloc_mem, uint discriminant, fd_rng_t * rng ) {
- switch (discriminant) {
- case 0: {
- fd_gossip_ping_generate( &self->ping, alloc_mem, rng );
- break;
- }
- }
-}
-void *fd_repair_response_generate( void *mem, void **alloc_mem, fd_rng_t * rng ) {
- fd_repair_response_t *self = (fd_repair_response_t *) mem;
- *alloc_mem = (uchar *) *alloc_mem + sizeof(fd_repair_response_t);
- fd_repair_response_new(mem);
- self->discriminant = fd_rng_uint( rng ) % 1;
- fd_repair_response_inner_generate( &self->inner, alloc_mem, self->discriminant, rng );
- return mem;
-}
-
void fd_instr_error_enum_inner_generate( fd_instr_error_enum_inner_t * self, void **alloc_mem, uint discriminant, fd_rng_t * rng ) {
switch (discriminant) {
case 25: {
diff --git a/src/flamenco/types/fd_types.c b/src/flamenco/types/fd_types.c
index cab99c49e8..8311185fa4 100644
--- a/src/flamenco/types/fd_types.c
+++ b/src/flamenco/types/fd_types.c
@@ -22410,116 +22410,6 @@ int fd_repair_protocol_encode( fd_repair_protocol_t const * self, fd_bincode_enc
return fd_repair_protocol_inner_encode( &self->inner, self->discriminant, ctx );
}
-FD_FN_PURE uchar fd_repair_response_is_ping(fd_repair_response_t const * self) {
- return self->discriminant == 0;
-}
-void fd_repair_response_inner_new( fd_repair_response_inner_t * self, uint discriminant );
-int fd_repair_response_inner_decode_footprint( uint discriminant, fd_bincode_decode_ctx_t * ctx, ulong * total_sz ) {
- int err;
- switch (discriminant) {
- case 0: {
- err = fd_gossip_ping_decode_footprint_inner( ctx, total_sz );
- if( FD_UNLIKELY( err ) ) return err;
- return FD_BINCODE_SUCCESS;
- }
- default: return FD_BINCODE_ERR_ENCODING;
- }
-}
-static int fd_repair_response_decode_footprint_inner( fd_bincode_decode_ctx_t * ctx, ulong * total_sz ) {
- if( ctx->data>=ctx->dataend ) { return FD_BINCODE_ERR_OVERFLOW; };
- uint discriminant = 0;
- int err = fd_bincode_uint32_decode( &discriminant, ctx );
- if( FD_UNLIKELY( err ) ) return err;
- return fd_repair_response_inner_decode_footprint( discriminant, ctx, total_sz );
-}
-int fd_repair_response_decode_footprint( fd_bincode_decode_ctx_t * ctx, ulong * total_sz ) {
- *total_sz += sizeof(fd_repair_response_t);
- void const * start_data = ctx->data;
- int err = fd_repair_response_decode_footprint_inner( ctx, total_sz );
- if( ctx->data>ctx->dataend ) { return FD_BINCODE_ERR_OVERFLOW; };
- ctx->data = start_data;
- return err;
-}
-static void fd_repair_response_inner_decode_inner( fd_repair_response_inner_t * self, void * * alloc_mem, uint discriminant, fd_bincode_decode_ctx_t * ctx ) {
- switch (discriminant) {
- case 0: {
- fd_gossip_ping_decode_inner( &self->ping, alloc_mem, ctx );
- break;
- }
- }
-}
-static void fd_repair_response_decode_inner( void * struct_mem, void * * alloc_mem, fd_bincode_decode_ctx_t * ctx ) {
- fd_repair_response_t * self = (fd_repair_response_t *)struct_mem;
- fd_bincode_uint32_decode_unsafe( &self->discriminant, ctx );
- fd_repair_response_inner_decode_inner( &self->inner, alloc_mem, self->discriminant, ctx );
-}
-void * fd_repair_response_decode( void * mem, fd_bincode_decode_ctx_t * ctx ) {
- fd_repair_response_t * self = (fd_repair_response_t *)mem;
- fd_repair_response_new( self );
- void * alloc_region = (uchar *)mem + sizeof(fd_repair_response_t);
- void * * alloc_mem = &alloc_region;
- fd_repair_response_decode_inner( mem, alloc_mem, ctx );
- return self;
-}
-void fd_repair_response_inner_new( fd_repair_response_inner_t * self, uint discriminant ) {
- switch( discriminant ) {
- case 0: {
- fd_gossip_ping_new( &self->ping );
- break;
- }
- default: break; // FD_LOG_ERR(( "unhandled type"));
- }
-}
-void fd_repair_response_new_disc( fd_repair_response_t * self, uint discriminant ) {
- self->discriminant = discriminant;
- fd_repair_response_inner_new( &self->inner, self->discriminant );
-}
-void fd_repair_response_new( fd_repair_response_t * self ) {
- fd_memset( self, 0, sizeof(fd_repair_response_t) );
- fd_repair_response_new_disc( self, UINT_MAX );
-}
-
-void fd_repair_response_walk( void * w, fd_repair_response_t const * self, fd_types_walk_fn_t fun, const char *name, uint level, uint varint ) {
- (void) varint;
- fun(w, self, name, FD_FLAMENCO_TYPE_ENUM, "fd_repair_response", level++, 0);
- switch( self->discriminant ) {
- case 0: {
- fun( w, self, "ping", FD_FLAMENCO_TYPE_ENUM_DISC, "discriminant", level, 0 );
- fd_gossip_ping_walk( w, &self->inner.ping, fun, "ping", level, 0 );
- break;
- }
- }
- fun( w, self, name, FD_FLAMENCO_TYPE_ENUM_END, "fd_repair_response", level--, 0 );
-}
-ulong fd_repair_response_size( fd_repair_response_t const * self ) {
- ulong size = 0;
- size += sizeof(uint);
- switch (self->discriminant) {
- case 0: {
- size += fd_gossip_ping_size( &self->inner.ping );
- break;
- }
- }
- return size;
-}
-
-int fd_repair_response_inner_encode( fd_repair_response_inner_t const * self, uint discriminant, fd_bincode_encode_ctx_t * ctx ) {
- int err;
- switch (discriminant) {
- case 0: {
- err = fd_gossip_ping_encode( &self->ping, ctx );
- if( FD_UNLIKELY( err ) ) return err;
- break;
- }
- }
- return FD_BINCODE_SUCCESS;
-}
-int fd_repair_response_encode( fd_repair_response_t const * self, fd_bincode_encode_ctx_t * ctx ) {
- int err = fd_bincode_uint32_encode( self->discriminant, ctx );
- if( FD_UNLIKELY( err ) ) return err;
- return fd_repair_response_inner_encode( &self->inner, self->discriminant, ctx );
-}
-
FD_FN_PURE uchar fd_instr_error_enum_is_generic_error(fd_instr_error_enum_t const * self) {
return self->discriminant == 0;
}
diff --git a/src/flamenco/types/fd_types.h b/src/flamenco/types/fd_types.h
index 51275c8bfe..01c9bb384b 100644
--- a/src/flamenco/types/fd_types.h
+++ b/src/flamenco/types/fd_types.h
@@ -3266,18 +3266,6 @@ struct fd_repair_protocol {
typedef struct fd_repair_protocol fd_repair_protocol_t;
#define FD_REPAIR_PROTOCOL_ALIGN alignof(fd_repair_protocol_t)
-union fd_repair_response_inner {
- fd_gossip_ping_t ping;
-};
-typedef union fd_repair_response_inner fd_repair_response_inner_t;
-
-struct fd_repair_response {
- uint discriminant;
- fd_repair_response_inner_t inner;
-};
-typedef struct fd_repair_response fd_repair_response_t;
-#define FD_REPAIR_RESPONSE_ALIGN alignof(fd_repair_response_t)
-
union fd_instr_error_enum_inner {
uint custom;
char* borsh_io_error;
@@ -6115,19 +6103,6 @@ fd_repair_protocol_enum_highest_window_index = 9,
fd_repair_protocol_enum_orphan = 10,
fd_repair_protocol_enum_ancestor_hashes = 11,
};
-void fd_repair_response_new_disc( fd_repair_response_t * self, uint discriminant );
-void fd_repair_response_new( fd_repair_response_t * self );
-int fd_repair_response_encode( fd_repair_response_t const * self, fd_bincode_encode_ctx_t * ctx );
-void fd_repair_response_walk( void * w, fd_repair_response_t const * self, fd_types_walk_fn_t fun, const char *name, uint level, uint varint );
-ulong fd_repair_response_size( fd_repair_response_t const * self );
-static inline ulong fd_repair_response_align( void ) { return FD_REPAIR_RESPONSE_ALIGN; }
-int fd_repair_response_decode_footprint( fd_bincode_decode_ctx_t * ctx, ulong * total_sz );
-void * fd_repair_response_decode( void * mem, fd_bincode_decode_ctx_t * ctx );
-
-FD_FN_PURE uchar fd_repair_response_is_ping( fd_repair_response_t const * self );
-enum {
-fd_repair_response_enum_ping = 0,
-};
void fd_instr_error_enum_new_disc( fd_instr_error_enum_t * self, uint discriminant );
void fd_instr_error_enum_new( fd_instr_error_enum_t * self );
int fd_instr_error_enum_encode( fd_instr_error_enum_t const * self, fd_bincode_encode_ctx_t * ctx );
diff --git a/src/flamenco/types/fd_types.json b/src/flamenco/types/fd_types.json
index ff5210d0fe..f5fd5c6e30 100644
--- a/src/flamenco/types/fd_types.json
+++ b/src/flamenco/types/fd_types.json
@@ -2302,13 +2302,6 @@
{ "name": "ancestor_hashes", "type": "repair_ancestor_hashes" }
]
},
- {
- "name": "repair_response",
- "type": "enum",
- "variants": [
- { "name": "ping", "type": "gossip_ping" }
- ]
- },
{
"name": "instr_error_enum",
"type": "enum",
diff --git a/src/flamenco/types/fd_types_reflect_generated.c b/src/flamenco/types/fd_types_reflect_generated.c
index d9f7d6cb6f..ab9187f75c 100644
--- a/src/flamenco/types/fd_types_reflect_generated.c
+++ b/src/flamenco/types/fd_types_reflect_generated.c
@@ -3,7 +3,7 @@
#include "fd_types_custom.h"
#include "fd_types_reflect_private.h"
#pragma GCC diagnostic ignored "-Wpedantic"
-ulong fd_types_vt_list_cnt = 248;
+ulong fd_types_vt_list_cnt = 247;
fd_types_vt_t const fd_types_vt_list[] = {
{ .name="fd_hash", .name_len=7, .align=FD_HASH_ALIGN, .new_=(void *)fd_hash_new, .decode=(void *)fd_hash_decode, .size=(void *)fd_hash_size, .walk=(void *)fd_hash_walk, .decode_footprint=(void *)fd_hash_decode_footprint, .encode=(void *)fd_hash_encode },
{ .name="fd_pubkey", .name_len=9, .align=FD_PUBKEY_ALIGN, .new_=(void *)fd_pubkey_new, .decode=(void *)fd_pubkey_decode, .size=(void *)fd_pubkey_size, .walk=(void *)fd_pubkey_walk, .decode_footprint=(void *)fd_pubkey_decode_footprint, .encode=(void *)fd_pubkey_encode },
@@ -228,7 +228,6 @@ fd_types_vt_t const fd_types_vt_list[] = {
{ .name="fd_repair_orphan", .name_len=16, .align=FD_REPAIR_ORPHAN_ALIGN, .new_=(void *)fd_repair_orphan_new, .decode=(void *)fd_repair_orphan_decode, .size=(void *)fd_repair_orphan_size, .walk=(void *)fd_repair_orphan_walk, .decode_footprint=(void *)fd_repair_orphan_decode_footprint, .encode=(void *)fd_repair_orphan_encode },
{ .name="fd_repair_ancestor_hashes", .name_len=25, .align=FD_REPAIR_ANCESTOR_HASHES_ALIGN, .new_=(void *)fd_repair_ancestor_hashes_new, .decode=(void *)fd_repair_ancestor_hashes_decode, .size=(void *)fd_repair_ancestor_hashes_size, .walk=(void *)fd_repair_ancestor_hashes_walk, .decode_footprint=(void *)fd_repair_ancestor_hashes_decode_footprint, .encode=(void *)fd_repair_ancestor_hashes_encode },
{ .name="fd_repair_protocol", .name_len=18, .align=FD_REPAIR_PROTOCOL_ALIGN, .new_=(void *)fd_repair_protocol_new, .decode=(void *)fd_repair_protocol_decode, .size=(void *)fd_repair_protocol_size, .walk=(void *)fd_repair_protocol_walk, .decode_footprint=(void *)fd_repair_protocol_decode_footprint, .encode=(void *)fd_repair_protocol_encode },
- { .name="fd_repair_response", .name_len=18, .align=FD_REPAIR_RESPONSE_ALIGN, .new_=(void *)fd_repair_response_new, .decode=(void *)fd_repair_response_decode, .size=(void *)fd_repair_response_size, .walk=(void *)fd_repair_response_walk, .decode_footprint=(void *)fd_repair_response_decode_footprint, .encode=(void *)fd_repair_response_encode },
{ .name="fd_instr_error_enum", .name_len=19, .align=FD_INSTR_ERROR_ENUM_ALIGN, .new_=(void *)fd_instr_error_enum_new, .decode=(void *)fd_instr_error_enum_decode, .size=(void *)fd_instr_error_enum_size, .walk=(void *)fd_instr_error_enum_walk, .decode_footprint=(void *)fd_instr_error_enum_decode_footprint, .encode=(void *)fd_instr_error_enum_encode },
{ .name="fd_txn_instr_error", .name_len=18, .align=FD_TXN_INSTR_ERROR_ALIGN, .new_=(void *)fd_txn_instr_error_new, .decode=(void *)fd_txn_instr_error_decode, .size=(void *)fd_txn_instr_error_size, .walk=(void *)fd_txn_instr_error_walk, .decode_footprint=(void *)fd_txn_instr_error_decode_footprint, .encode=(void *)fd_txn_instr_error_encode },
{ .name="fd_txn_error_enum", .name_len=17, .align=FD_TXN_ERROR_ENUM_ALIGN, .new_=(void *)fd_txn_error_enum_new, .decode=(void *)fd_txn_error_enum_decode, .size=(void *)fd_txn_error_enum_size, .walk=(void *)fd_txn_error_enum_walk, .decode_footprint=(void *)fd_txn_error_enum_decode_footprint, .encode=(void *)fd_txn_error_enum_encode },
diff --git a/src/waltz/neigh/Local.mk b/src/waltz/neigh/Local.mk
index 8aa1b5e01a..4f3ea2d6f0 100644
--- a/src/waltz/neigh/Local.mk
+++ b/src/waltz/neigh/Local.mk
@@ -1,7 +1,7 @@
$(call add-hdrs,fd_neigh4_map.h fd_neigh4_map_defines.h)
$(call add-objs,fd_neigh4_map,fd_waltz)
ifdef FD_HAS_LINUX
-$(call add-hdrs,fd_neigh4_netlink.h fd_neigh4_probe.h)
-$(call add-objs,fd_neigh4_netlink fd_neigh4_probe,fd_waltz)
+$(call add-hdrs,fd_neigh4_netlink.h)
+$(call add-objs,fd_neigh4_netlink,fd_waltz)
$(call make-unit-test,test_neigh4_netlink,test_neigh4_netlink,fd_waltz fd_util)
endif
diff --git a/src/waltz/neigh/fd_neigh4_probe.c b/src/waltz/neigh/fd_neigh4_probe.c
deleted file mode 100644
index f4507c85fb..0000000000
--- a/src/waltz/neigh/fd_neigh4_probe.c
+++ /dev/null
@@ -1,81 +0,0 @@
-#include "fd_neigh4_probe.h"
-#include "../../tango/tempo/fd_tempo.h" /* fd_tempo_tick_per_ns */
-
-#include
-#include /* socket(2) */
-#include /* IPPROTO_IP */
-#include /* close(2) */
-
-void
-fd_neigh4_prober_init( fd_neigh4_prober_t * prober,
- float max_probes_per_second,
- ulong max_probe_burst,
- float probe_delay_seconds ) {
-
- int sock_fd = socket( AF_INET, SOCK_DGRAM, 0 );
- if( FD_UNLIKELY( sock_fd<0 ) ) {
- FD_LOG_ERR(( "socket(AF_INET,SOCK_DGRAM,0) failed (%i-%s)",
- errno, fd_io_strerror( errno ) ));
- }
-
- /* IP_TTL=1 is the lowest permitted value:
- https://github.com/torvalds/linux/blob/v6.13/net/ipv4/ip_sockglue.c#L300 */
- int ip_ttl = 1;
- if( FD_UNLIKELY( 0!=setsockopt( sock_fd, IPPROTO_IP, IP_TTL, &ip_ttl, sizeof(int) ) ) ) {
- (void)close( sock_fd );
- FD_LOG_ERR(( "setsockopt(%i,IPPROTO_IP,IP_TTL,1) failed (%i-%s)",
- sock_fd, errno, fd_io_strerror( errno ) ));
- }
-
- /* Only need to send probe packets to Ethernet neighbors */
- int dontroute = 1;
- if( FD_UNLIKELY( 0!=setsockopt( sock_fd, SOL_SOCKET, SO_DONTROUTE, &dontroute, sizeof(int) ) ) ) {
- (void)close( sock_fd );
- FD_LOG_ERR(( "setsockopt(%i,SOL_SOCKET,SO_DONTROUTE,1) failed (%i-%s)",
- sock_fd, errno, fd_io_strerror( errno ) ));
- }
-
- float tick_per_ns = (float)fd_tempo_tick_per_ns( NULL );
-
- *prober = (fd_neigh4_prober_t) {
- .sock_fd = sock_fd,
- .probe_delay = (long)( tick_per_ns * probe_delay_seconds * 1e9f ),
- .rate_limit = (fd_token_bucket_t) {
- .ts = fd_tickcount(),
- .rate = tick_per_ns * (max_probes_per_second / 1e9f),
- .burst = (float)max_probe_burst,
- .balance = 0.f
- },
- .local_rate_limited_cnt = 0UL,
- .global_rate_limited_cnt = 0UL
- };
-}
-
-void
-fd_neigh4_prober_fini( fd_neigh4_prober_t * prober ) {
- if( FD_UNLIKELY( 0!=close( prober->sock_fd ) ) ) {
- FD_LOG_ERR(( "close(%i) failed (%i-%s)",
- prober->sock_fd, errno, fd_io_strerror( errno ) ));
- }
- prober->sock_fd = -1;
-}
-
-int
-fd_neigh4_probe( fd_neigh4_prober_t * prober,
- fd_neigh4_entry_t * entry,
- uint ip4_addr,
- long now ) {
-
- struct sockaddr_in dst = {
- .sin_family = AF_INET,
- .sin_port = (ushort)0xFFFF,
- .sin_addr = { .s_addr = ip4_addr }
- };
- if( FD_UNLIKELY( sendto( prober->sock_fd, NULL, 0UL, MSG_DONTWAIT, fd_type_pun_const( &dst ), sizeof(struct sockaddr_in) )<0 ) ) {
- return errno;
- }
-
- entry->probe_suppress_until = now + prober->probe_delay;
-
- return 0;
-}
diff --git a/src/waltz/neigh/fd_neigh4_probe.h b/src/waltz/neigh/fd_neigh4_probe.h
deleted file mode 100644
index eed577629b..0000000000
--- a/src/waltz/neigh/fd_neigh4_probe.h
+++ /dev/null
@@ -1,135 +0,0 @@
-#ifndef HEADER_fd_src_waltz_neigh_fd_neigh4_probe_h
-#define HEADER_fd_src_waltz_neigh_fd_neigh4_probe_h
-
-/* fd_neigh4_probe.h is a hack to indirectly trigger ARP requests in
- Linux.
-
- ### Background
-
- When sending an IP packet via the Firedancer network stack, it is
- the net tile's responsibility to pick the network interface to send
- the packet out on, as well as the destination MAC address.
-
- The dst MAC address is taken from a neighbor table entry given the
- "next hop" (an output of a previously done route table lookiup).
- The neighbor table is directly mirrored from the Linux kernel.
-
- If no matching neighbor table entry exists, the system should send
- broadcast an ARP request (e.g. "who is 192.168.12.13? tell
- 192.168.12.4"). ARP replies to this request will then go to the
- kernel. The kernel also needs to be told that it should expect an
- ARP reply to avoid drops.
-
- ### Possible Solutions
-
- 1. Add a neighbor table entry, send out the ARP request via XDP:
- `ip neigh add IP_ADDR nud incomplete`
- Requires CAP_NET_ADMIN (to send RTM_NEWNEIGH)
-
- 2. Add a neighbor table entry, make the kernel issue the ARP
- request: `ip neigh add IP_ADDR nud incomplete use`
- Requires CAP_NET_ADMIN (to send RTM_NEWNEIGH)
-
- 3. Send a UDP datagram which indirectly makes the kernel do an ARP
- request: `echo "hello" | nc -u IP_ADDR:65535`
- Does not require privileges
-
- 4. Send an IP packet (ICMP echo, invalid ICMP, invalid next proto...)
- which indirectly makes the kernel do an ARP request
- `ping IP_ADDR -c 1`
- Requires CAP_NET_RAW to create a SOCK_RAW socket
-
- Solution 2 is theoretically ideal. Unfortunately, it requires the
- netlink API caller to be in the root user namespace, which would
- break assumptions made in fd_sandbox.
-
- fd_neigh4_probe implements solution 3 because it requires the least
- amount of privileges. */
-
-#include "fd_neigh4_map.h"
-#include "../fd_token_bucket.h"
-
-/* The fd_neigh4_prober_t class provides "neighbor probing"
- functionality as described above using empty UDP/IP packets. */
-
-struct fd_neigh4_prober {
- int sock_fd; /* UDP socket with IP_TTL 0 */
-
- /* probe_delay specifies the delay in ticks for successive ARP
- requests to the same IP address (see fd_tickcount()) */
- long probe_delay;
-
- /* Token bucket rate limiter on any outgoing ARP probes */
- fd_token_bucket_t rate_limit;
-
- /* Metric counter for probes suppressed by local rate limit */
- ulong local_rate_limited_cnt;
-
- /* Metric counter for probes suppressed by global rate limit */
- ulong global_rate_limited_cnt;
-};
-
-typedef struct fd_neigh4_prober fd_neigh4_prober_t;
-
-FD_PROTOTYPES_BEGIN
-
-/* fd_neigh4_prober_init initializes a neigh4_prober object. Creates a
- new unbound UDP socket (socket(2)) with an IPv4 TTL of zero
- (setsockopt(2)). max_probes_per_second and max_probe_burst configure
- token bucket rate limit parameters for outgoing probe packets.
- probe_delay_seconds sets the min wait time between two probe packet
- sends for the same dst IP. */
-
-void
-fd_neigh4_prober_init( fd_neigh4_prober_t * prober,
- float max_probes_per_second,
- ulong max_probe_burst,
- float probe_delay_seconds );
-
-/* fd_neigh4_prober_fini closes the neigh4_prober socket. */
-
-void
-fd_neigh4_prober_fini( fd_neigh4_prober_t * prober );
-
-/* fd_neigh4_probe sends out an empty UDP packet to port 65535 with the
- IP time-to-live field set to 0. ip4_addr is an IP address on a
- neighboring subnet for which the neighbor discovery process should
- be started. ip4_addr is big endian. now is a recent fd_tickcount()
- value. Returns the errno value produced by sendto(2) or 0 on success. */
-
-int
-fd_neigh4_probe( fd_neigh4_prober_t * prober,
- fd_neigh4_entry_t * entry,
- uint ip4_addr,
- long now );
-
-/* fd_neigh4_probe_rate_limited calls fd_neigh4_probe unless that would
- violate rate limits. Returns 0 if a probe was sent out. Returns
- positive errno on probe failure. Returns -1 if rate limit was hit. */
-
-static inline int
-fd_neigh4_probe_rate_limited(
- fd_neigh4_prober_t * prober,
- fd_neigh4_entry_t * entry,
- uint ip4_addr,
- long now
-) {
- /* Local rate limit */
- if( now < entry->probe_suppress_until ) {
- prober->local_rate_limited_cnt++;
- return -1;
- }
- entry->probe_suppress_until = now + prober->probe_delay;
-
- /* Global rate limit */
- if( !fd_token_bucket_consume( &prober->rate_limit, 1.0f, now ) ) {
- prober->global_rate_limited_cnt++;
- return -1;
- }
-
- return fd_neigh4_probe( prober, entry, ip4_addr, now );
-}
-
-FD_PROTOTYPES_END
-
-#endif /* HEADER_fd_src_waltz_neigh_fd_neigh4_probe_h */