diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 5b75c3f7a137b..39a0df70da534 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -75,6 +75,7 @@ Contents: mptcp-sysctl multiqueue napi + net_cachelines/index netconsole netdev-features netdevices diff --git a/Documentation/networking/net_cachelines/index.rst b/Documentation/networking/net_cachelines/index.rst new file mode 100644 index 0000000000000..6f1a99989bbd1 --- /dev/null +++ b/Documentation/networking/net_cachelines/index.rst @@ -0,0 +1,15 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. Copyright (C) 2023 Google LLC +=================================== +Common Networking Struct Cachelines +=================================== + +.. toctree:: + :maxdepth: 1 + + inet_connection_sock + inet_sock + net_device + netns_ipv4_sysctl + snmp + tcp_sock diff --git a/Documentation/networking/net_cachelines/inet_connection_sock.rst b/Documentation/networking/net_cachelines/inet_connection_sock.rst new file mode 100644 index 0000000000000..ad2b200147a67 --- /dev/null +++ b/Documentation/networking/net_cachelines/inet_connection_sock.rst @@ -0,0 +1,49 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. Copyright (C) 2023 Google LLC +===================================================== +inet_connection_sock struct fast path usage breakdown +===================================================== + +Type Name fastpath_tx_access fastpath_rx_access comment +..struct ..inet_connection_sock +struct_inet_sock icsk_inet read_mostly read_mostly tcp_init_buffer_space,tcp_init_transfer,tcp_finish_connect,tcp_connect,tcp_send_rcvq,tcp_send_syn_data +struct_request_sock_queue icsk_accept_queue - - +struct_inet_bind_bucket icsk_bind_hash read_mostly - tcp_set_state +struct_inet_bind2_bucket icsk_bind2_hash read_mostly - tcp_set_state,inet_put_port +unsigned_long icsk_timeout read_mostly - inet_csk_reset_xmit_timer,tcp_connect +struct_timer_list icsk_retransmit_timer read_mostly - inet_csk_reset_xmit_timer,tcp_connect +struct_timer_list icsk_delack_timer read_mostly - inet_csk_reset_xmit_timer,tcp_connect +u32 icsk_rto read_write - tcp_cwnd_validate,tcp_schedule_loss_probe,tcp_connect_init,tcp_connect,tcp_write_xmit,tcp_push_one +u32 icsk_rto_min - - +u32 icsk_delack_max - - +u32 icsk_pmtu_cookie read_write - tcp_sync_mss,tcp_current_mss,tcp_send_syn_data,tcp_connect_init,tcp_connect +struct_tcp_congestion_ops icsk_ca_ops read_write - tcp_cwnd_validate,tcp_tso_segs,tcp_ca_dst_init,tcp_connect_init,tcp_connect,tcp_write_xmit +struct_inet_connection_sock_af_ops icsk_af_ops read_mostly - tcp_finish_connect,tcp_send_syn_data,tcp_mtup_init,tcp_mtu_check_reprobe,tcp_mtu_probe,tcp_connect_init,tcp_connect,__tcp_transmit_skb +struct_tcp_ulp_ops* icsk_ulp_ops - - +void* icsk_ulp_data - - +u8:5 icsk_ca_state read_write - tcp_cwnd_application_limited,tcp_set_ca_state,tcp_enter_cwr,tcp_tso_should_defer,tcp_mtu_probe,tcp_schedule_loss_probe,tcp_write_xmit,__tcp_transmit_skb +u8:1 icsk_ca_initialized read_write - tcp_init_transfer,tcp_init_congestion_control,tcp_init_transfer,tcp_finish_connect,tcp_connect +u8:1 icsk_ca_setsockopt - - +u8:1 icsk_ca_dst_locked write_mostly - tcp_ca_dst_init,tcp_connect_init,tcp_connect +u8 icsk_retransmits write_mostly - tcp_connect_init,tcp_connect +u8 icsk_pending read_write - inet_csk_reset_xmit_timer,tcp_connect,tcp_check_probe_timer,__tcp_push_pending_frames,tcp_rearm_rto,tcp_event_new_data_sent,tcp_event_new_data_sent +u8 icsk_backoff write_mostly - tcp_write_queue_purge,tcp_connect_init +u8 icsk_syn_retries - - +u8 icsk_probes_out - - +u16 icsk_ext_hdr_len read_mostly - __tcp_mtu_to_mss,tcp_mtu_to_rss,tcp_mtu_probe,tcp_write_xmit,tcp_mtu_to_mss, +struct_icsk_ack_u8 pending read_write read_write inet_csk_ack_scheduled,__tcp_cleanup_rbuf,tcp_cleanup_rbuf,inet_csk_clear_xmit_timer,tcp_event_ack-sent,inet_csk_reset_xmit_timer +struct_icsk_ack_u8 quick read_write write_mostly tcp_dec_quickack_mode,tcp_event_ack_sent,__tcp_transmit_skb,__tcp_select_window,__tcp_cleanup_rbuf +struct_icsk_ack_u8 pingpong - - +struct_icsk_ack_u8 retry write_mostly read_write inet_csk_clear_xmit_timer,tcp_rearm_rto,tcp_event_new_data_sent,tcp_write_xmit,__tcp_send_ack,tcp_send_ack, +struct_icsk_ack_u8 ato read_mostly write_mostly tcp_dec_quickack_mode,tcp_event_ack_sent,__tcp_transmit_skb,__tcp_send_ack,tcp_send_ack +struct_icsk_ack_unsigned_long timeout read_write read_write inet_csk_reset_xmit_timer,tcp_connect +struct_icsk_ack_u32 lrcvtime read_write - tcp_finish_connect,tcp_connect,tcp_event_data_sent,__tcp_transmit_skb +struct_icsk_ack_u16 rcv_mss write_mostly read_mostly __tcp_select_window,__tcp_cleanup_rbuf,tcp_initialize_rcv_mss,tcp_connect_init +struct_icsk_mtup_int search_high read_write - tcp_mtup_init,tcp_sync_mss,tcp_connect_init,tcp_mtu_check_reprobe,tcp_write_xmit +struct_icsk_mtup_int search_low read_write - tcp_mtu_probe,tcp_mtu_check_reprobe,tcp_write_xmit,tcp_sync_mss,tcp_connect_init,tcp_mtup_init +struct_icsk_mtup_u32:31 probe_size read_write - tcp_mtup_init,tcp_connect_init,__tcp_transmit_skb +struct_icsk_mtup_u32:1 enabled read_write - tcp_mtup_init,tcp_sync_mss,tcp_connect_init,tcp_mtu_probe,tcp_write_xmit +struct_icsk_mtup_u32 probe_timestamp read_write - tcp_mtup_init,tcp_connect_init,tcp_mtu_check_reprobe,tcp_mtu_probe +u32 icsk_probes_tstamp - - +u32 icsk_user_timeout - - +u64[104/sizeof(u64)] icsk_ca_priv - - diff --git a/Documentation/networking/net_cachelines/inet_sock.rst b/Documentation/networking/net_cachelines/inet_sock.rst new file mode 100644 index 0000000000000..4077febdb9954 --- /dev/null +++ b/Documentation/networking/net_cachelines/inet_sock.rst @@ -0,0 +1,43 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. Copyright (C) 2023 Google LLC +===================================================== +inet_connection_sock struct fast path usage breakdown +===================================================== + +Type Name fastpath_tx_access fastpath_rx_access comment +..struct ..inet_sock +struct_sock sk read_mostly read_mostly tcp_init_buffer_space,tcp_init_transfer,tcp_finish_connect,tcp_connect,tcp_send_rcvq,tcp_send_syn_data +struct_ipv6_pinfo* pinet6 - - +be16 inet_sport read_mostly - __tcp_transmit_skb +be32 inet_daddr read_mostly - ip_select_ident_segs +be32 inet_rcv_saddr - - +be16 inet_dport read_mostly - __tcp_transmit_skb +u16 inet_num - - +be32 inet_saddr - - +s16 uc_ttl read_mostly - __ip_queue_xmit/ip_select_ttl +u16 cmsg_flags - - +struct_ip_options_rcu* inet_opt read_mostly - __ip_queue_xmit +u16 inet_id read_mostly - ip_select_ident_segs +u8 tos read_mostly - ip_queue_xmit +u8 min_ttl - - +u8 mc_ttl - - +u8 pmtudisc - - +u8:1 recverr - - +u8:1 is_icsk - - +u8:1 freebind - - +u8:1 hdrincl - - +u8:1 mc_loop - - +u8:1 transparent - - +u8:1 mc_all - - +u8:1 nodefrag - - +u8:1 bind_address_no_port - - +u8:1 recverr_rfc4884 - - +u8:1 defer_connect read_mostly - tcp_sendmsg_fastopen +u8 rcv_tos - - +u8 convert_csum - - +int uc_index - - +int mc_index - - +be32 mc_addr - - +struct_ip_mc_socklist* mc_list - - +struct_inet_cork_full cork read_mostly - __tcp_transmit_skb +struct local_port_range - - diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst new file mode 100644 index 0000000000000..f472b91a55eeb --- /dev/null +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -0,0 +1,177 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. Copyright (C) 2023 Google LLC +=========================================== +net_device struct fast path usage breakdown +=========================================== + +Type Name fastpath_tx_access fastpath_rx_access Comments +..struct ..net_device +char name[16] - - +struct_netdev_name_node* name_node +struct_dev_ifalias* ifalias +unsigned_long mem_end +unsigned_long mem_start +unsigned_long base_addr +unsigned_long state read_mostly read_mostly netif_running(dev) +struct_list_head dev_list +struct_list_head napi_list +struct_list_head unreg_list +struct_list_head close_list +struct_list_head ptype_all read_mostly - dev_nit_active(tx) +struct_list_head ptype_specific read_mostly deliver_ptype_list_skb/__netif_receive_skb_core(rx) +struct adj_list +unsigned_int flags read_mostly read_mostly __dev_queue_xmit,__dev_xmit_skb,ip6_output,__ip6_finish_output(tx);ip6_rcv_core(rx) +xdp_features_t xdp_features +unsigned_long_long priv_flags read_mostly - __dev_queue_xmit(tx) +struct_net_device_ops* netdev_ops read_mostly - netdev_core_pick_tx,netdev_start_xmit(tx) +struct_xdp_metadata_ops* xdp_metadata_ops +int ifindex - read_mostly ip6_rcv_core +unsigned_short gflags +unsigned_short hard_header_len read_mostly read_mostly ip6_xmit(tx);gro_list_prepare(rx) +unsigned_int mtu read_mostly - ip_finish_output2 +unsigned_short needed_headroom read_mostly - LL_RESERVED_SPACE/ip_finish_output2 +unsigned_short needed_tailroom +netdev_features_t features read_mostly read_mostly HARD_TX_LOCK,netif_skb_features,sk_setup_caps(tx);netif_elide_gro(rx) +netdev_features_t hw_features +netdev_features_t wanted_features +netdev_features_t vlan_features +netdev_features_t hw_enc_features - - netif_skb_features +netdev_features_t mpls_features +netdev_features_t gso_partial_features read_mostly gso_features_check +unsigned_int min_mtu +unsigned_int max_mtu +unsigned_short type +unsigned_char min_header_len +unsigned_char name_assign_type +int group +struct_net_device_stats stats +struct_net_device_core_stats* core_stats +atomic_t carrier_up_count +atomic_t carrier_down_count +struct_iw_handler_def* wireless_handlers +struct_iw_public_data* wireless_data +struct_ethtool_ops* ethtool_ops +struct_l3mdev_ops* l3mdev_ops +struct_ndisc_ops* ndisc_ops +struct_xfrmdev_ops* xfrmdev_ops +struct_tlsdev_ops* tlsdev_ops +struct_header_ops* header_ops read_mostly - ip_finish_output2,ip6_finish_output2(tx) +unsigned_char operstate +unsigned_char link_mode +unsigned_char if_port +unsigned_char dma +unsigned_char perm_addr[32] +unsigned_char addr_assign_type +unsigned_char addr_len +unsigned_char upper_level +unsigned_char lower_level +unsigned_short neigh_priv_len +unsigned_short padded +unsigned_short dev_id +unsigned_short dev_port +spinlock_t addr_list_lock +int irq +struct_netdev_hw_addr_list uc +struct_netdev_hw_addr_list mc +struct_netdev_hw_addr_list dev_addrs +struct_kset* queues_kset +struct_list_head unlink_list +unsigned_int promiscuity +unsigned_int allmulti +bool uc_promisc +unsigned_char nested_level +struct_in_device* ip_ptr read_mostly read_mostly __in_dev_get +struct_inet6_dev* ip6_ptr read_mostly read_mostly __in6_dev_get +struct_vlan_info* vlan_info +struct_dsa_port* dsa_ptr +struct_tipc_bearer* tipc_ptr +void* atalk_ptr +void* ax25_ptr +struct_wireless_dev* ieee80211_ptr +struct_wpan_dev* ieee802154_ptr +struct_mpls_dev* mpls_ptr +struct_mctp_dev* mctp_ptr +unsigned_char* dev_addr +struct_netdev_queue* _rx read_mostly - netdev_get_rx_queue(rx) +unsigned_int num_rx_queues +unsigned_int real_num_rx_queues - read_mostly get_rps_cpu +struct_bpf_prog* xdp_prog - read_mostly netif_elide_gro() +unsigned_long gro_flush_timeout - read_mostly napi_complete_done +int napi_defer_hard_irqs - read_mostly napi_complete_done +unsigned_int gro_max_size - read_mostly skb_gro_receive +unsigned_int gro_ipv4_max_size - read_mostly skb_gro_receive +rx_handler_func_t* rx_handler read_mostly - __netif_receive_skb_core +void* rx_handler_data read_mostly - +struct_netdev_queue* ingress_queue read_mostly - +struct_bpf_mprog_entry tcx_ingress - read_mostly sch_handle_ingress +struct_nf_hook_entries* nf_hooks_ingress +unsigned_char broadcast[32] +struct_cpu_rmap* rx_cpu_rmap +struct_hlist_node index_hlist +struct_netdev_queue* _tx read_mostly - netdev_get_tx_queue(tx) +unsigned_int num_tx_queues - - +unsigned_int real_num_tx_queues read_mostly - skb_tx_hash,netdev_core_pick_tx(tx) +unsigned_int tx_queue_len +spinlock_t tx_global_lock +struct_xdp_dev_bulk_queue__percpu* xdp_bulkq +struct_xps_dev_maps* xps_maps[2] read_mostly - __netif_set_xps_queue +struct_bpf_mprog_entry tcx_egress read_mostly - sch_handle_egress +struct_nf_hook_entries* nf_hooks_egress read_mostly - +struct_hlist_head qdisc_hash[16] +struct_timer_list watchdog_timer +int watchdog_timeo +u32 proto_down_reason +struct_list_head todo_list +int__percpu* pcpu_refcnt +refcount_t dev_refcnt +struct_ref_tracker_dir refcnt_tracker +struct_list_head link_watch_list +enum:8 reg_state +bool dismantle +enum:16 rtnl_link_state +bool needs_free_netdev +void*priv_destructor struct_net_device +struct_netpoll_info* npinfo - read_mostly napi_poll/napi_poll_lock +possible_net_t nd_net - read_mostly (dev_net)napi_busy_loop,tcp_v(4/6)_rcv,ip(v6)_rcv,ip(6)_input,ip(6)_input_finish +void* ml_priv +enum_netdev_ml_priv_type ml_priv_type +struct_pcpu_lstats__percpu* lstats read_mostly dev_lstats_add() +struct_pcpu_sw_netstats__percpu* tstats read_mostly dev_sw_netstats_tx_add() +struct_pcpu_dstats__percpu* dstats +struct_garp_port* garp_port +struct_mrp_port* mrp_port +struct_dm_hw_stat_delta* dm_private +struct_device dev - - +struct_attribute_group* sysfs_groups[4] +struct_attribute_group* sysfs_rx_queue_group +struct_rtnl_link_ops* rtnl_link_ops +unsigned_int gso_max_size read_mostly - sk_dst_gso_max_size +unsigned_int tso_max_size +u16 gso_max_segs read_mostly - gso_max_segs +u16 tso_max_segs +unsigned_int gso_ipv4_max_size read_mostly - sk_dst_gso_max_size +struct_dcbnl_rtnl_ops* dcbnl_ops +s16 num_tc read_mostly - skb_tx_hash +struct_netdev_tc_txq tc_to_txq[16] read_mostly - skb_tx_hash +u8 prio_tc_map[16] +unsigned_int fcoe_ddp_xid +struct_netprio_map* priomap +struct_phy_device* phydev +struct_sfp_bus* sfp_bus +struct_lock_class_key* qdisc_tx_busylock +bool proto_down +unsigned:1 wol_enabled +unsigned:1 threaded - - napi_poll(napi_enable,dev_set_threaded) +struct_list_head net_notifier_list +struct_macsec_ops* macsec_ops +struct_udp_tunnel_nic_info* udp_tunnel_nic_info +struct_udp_tunnel_nic* udp_tunnel_nic +unsigned_int xdp_zc_max_segs +struct_bpf_xdp_entity xdp_state[3] +u8 dev_addr_shadow[32] +netdevice_tracker linkwatch_dev_tracker +netdevice_tracker watchdog_dev_tracker +netdevice_tracker dev_registered_tracker +struct_rtnl_hw_stats64* offload_xstats_l3 +struct_devlink_port* devlink_port +struct_dpll_pin* dpll_pin diff --git a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst new file mode 100644 index 0000000000000..47e1a01053848 --- /dev/null +++ b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst @@ -0,0 +1,157 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. Copyright (C) 2023 Google LLC +=========================================== +netns_ipv4 struct fast path usage breakdown +=========================================== + +Type Name fastpath_tx_access fastpath_rx_access comment +..struct ..netns_ipv4 +struct_inet_timewait_death_row tcp_death_row +struct_udp_table* udp_table +struct_ctl_table_header* forw_hdr +struct_ctl_table_header* frags_hdr +struct_ctl_table_header* ipv4_hdr +struct_ctl_table_header* route_hdr +struct_ctl_table_header* xfrm4_hdr +struct_ipv4_devconf* devconf_all +struct_ipv4_devconf* devconf_dflt +struct_ip_ra_chain ra_chain +struct_mutex ra_mutex +struct_fib_rules_ops* rules_ops +struct_fib_table fib_main +struct_fib_table fib_default +unsigned_int fib_rules_require_fldissect +bool fib_has_custom_rules +bool fib_has_custom_local_routes +bool fib_offload_disabled +atomic_t fib_num_tclassid_users +struct_hlist_head* fib_table_hash +struct_sock* fibnl +struct_sock* mc_autojoin_sk +struct_inet_peer_base* peers +struct_fqdir* fqdir +u8 sysctl_icmp_echo_ignore_all +u8 sysctl_icmp_echo_enable_probe +u8 sysctl_icmp_echo_ignore_broadcasts +u8 sysctl_icmp_ignore_bogus_error_responses +u8 sysctl_icmp_errors_use_inbound_ifaddr +int sysctl_icmp_ratelimit +int sysctl_icmp_ratemask +u32 ip_rt_min_pmtu - - +int ip_rt_mtu_expires - - +int ip_rt_min_advmss - - +struct_local_ports ip_local_ports - - +u8 sysctl_tcp_ecn - - +u8 sysctl_tcp_ecn_fallback - - +u8 sysctl_ip_default_ttl - - ip4_dst_hoplimit/ip_select_ttl +u8 sysctl_ip_no_pmtu_disc - - +u8 sysctl_ip_fwd_use_pmtu read_mostly - ip_dst_mtu_maybe_forward/ip_skb_dst_mtu +u8 sysctl_ip_fwd_update_priority - - ip_forward +u8 sysctl_ip_nonlocal_bind - - +u8 sysctl_ip_autobind_reuse - - +u8 sysctl_ip_dynaddr - - +u8 sysctl_ip_early_demux - read_mostly ip(6)_rcv_finish_core +u8 sysctl_raw_l3mdev_accept - - +u8 sysctl_tcp_early_demux - read_mostly ip(6)_rcv_finish_core +u8 sysctl_udp_early_demux +u8 sysctl_nexthop_compat_mode - - +u8 sysctl_fwmark_reflect - - +u8 sysctl_tcp_fwmark_accept - - +u8 sysctl_tcp_l3mdev_accept - - +u8 sysctl_tcp_mtu_probing - - +int sysctl_tcp_mtu_probe_floor - - +int sysctl_tcp_base_mss - - +int sysctl_tcp_min_snd_mss read_mostly - __tcp_mtu_to_mss(tcp_write_xmit) +int sysctl_tcp_probe_threshold - - tcp_mtu_probe(tcp_write_xmit) +u32 sysctl_tcp_probe_interval - - tcp_mtu_check_reprobe(tcp_write_xmit) +int sysctl_tcp_keepalive_time - - +int sysctl_tcp_keepalive_intvl - - +u8 sysctl_tcp_keepalive_probes - - +u8 sysctl_tcp_syn_retries - - +u8 sysctl_tcp_synack_retries - - +u8 sysctl_tcp_syncookies - - generated_on_syn +u8 sysctl_tcp_migrate_req - - reuseport +u8 sysctl_tcp_comp_sack_nr - - __tcp_ack_snd_check +int sysctl_tcp_reordering - read_mostly tcp_may_raise_cwnd/tcp_cong_control +u8 sysctl_tcp_retries1 - - +u8 sysctl_tcp_retries2 - - +u8 sysctl_tcp_orphan_retries - - +u8 sysctl_tcp_tw_reuse - - timewait_sock_ops +int sysctl_tcp_fin_timeout - - TCP_LAST_ACK/tcp_rcv_state_process +unsigned_int sysctl_tcp_notsent_lowat read_mostly - tcp_notsent_lowat/tcp_stream_memory_free +u8 sysctl_tcp_sack - - tcp_syn_options +u8 sysctl_tcp_window_scaling - - tcp_syn_options,tcp_parse_options +u8 sysctl_tcp_timestamps +u8 sysctl_tcp_early_retrans read_mostly - tcp_schedule_loss_probe(tcp_write_xmit) +u8 sysctl_tcp_recovery - - tcp_fastretrans_alert +u8 sysctl_tcp_thin_linear_timeouts - - tcp_retrans_timer(on_thin_streams) +u8 sysctl_tcp_slow_start_after_idle - - unlikely(tcp_cwnd_validate-network-not-starved) +u8 sysctl_tcp_retrans_collapse - - +u8 sysctl_tcp_stdurg - - unlikely(tcp_check_urg) +u8 sysctl_tcp_rfc1337 - - +u8 sysctl_tcp_abort_on_overflow - - +u8 sysctl_tcp_fack - - +int sysctl_tcp_max_reordering - - tcp_check_sack_reordering +int sysctl_tcp_adv_win_scale - - tcp_init_buffer_space +u8 sysctl_tcp_dsack - - partial_packet_or_retrans_in_tcp_data_queue +u8 sysctl_tcp_app_win - - tcp_win_from_space +u8 sysctl_tcp_frto - - tcp_enter_loss +u8 sysctl_tcp_nometrics_save - - TCP_LAST_ACK/tcp_update_metrics +u8 sysctl_tcp_no_ssthresh_metrics_save - - TCP_LAST_ACK/tcp_(update/init)_metrics +u8 sysctl_tcp_moderate_rcvbuf read_mostly read_mostly tcp_tso_should_defer(tx);tcp_rcv_space_adjust(rx) +u8 sysctl_tcp_tso_win_divisor read_mostly - tcp_tso_should_defer(tcp_write_xmit) +u8 sysctl_tcp_workaround_signed_windows - - tcp_select_window +int sysctl_tcp_limit_output_bytes read_mostly - tcp_small_queue_check(tcp_write_xmit) +int sysctl_tcp_challenge_ack_limit - - +int sysctl_tcp_min_rtt_wlen read_mostly - tcp_ack_update_rtt +u8 sysctl_tcp_min_tso_segs - - unlikely(icsk_ca_ops-written) +u8 sysctl_tcp_tso_rtt_log read_mostly - tcp_tso_autosize +u8 sysctl_tcp_autocorking read_mostly - tcp_push/tcp_should_autocork +u8 sysctl_tcp_reflect_tos - - tcp_v(4/6)_send_synack +int sysctl_tcp_invalid_ratelimit - - +int sysctl_tcp_pacing_ss_ratio - - default_cong_cont(tcp_update_pacing_rate) +int sysctl_tcp_pacing_ca_ratio - - default_cong_cont(tcp_update_pacing_rate) +int sysctl_tcp_wmem[3] read_mostly - tcp_wmem_schedule(sendmsg/sendpage) +int sysctl_tcp_rmem[3] - read_mostly __tcp_grow_window(tx),tcp_rcv_space_adjust(rx) +unsigned_int sysctl_tcp_child_ehash_entries +unsigned_long sysctl_tcp_comp_sack_delay_ns - - __tcp_ack_snd_check +unsigned_long sysctl_tcp_comp_sack_slack_ns - - __tcp_ack_snd_check +int sysctl_max_syn_backlog - - +int sysctl_tcp_fastopen - - +struct_tcp_congestion_ops tcp_congestion_control - - init_cc +struct_tcp_fastopen_context tcp_fastopen_ctx - - +unsigned_int sysctl_tcp_fastopen_blackhole_timeout - - +atomic_t tfo_active_disable_times - - +unsigned_long tfo_active_disable_stamp - - +u32 tcp_challenge_timestamp - - +u32 tcp_challenge_count - - +u8 sysctl_tcp_plb_enabled - - +u8 sysctl_tcp_plb_idle_rehash_rounds - - +u8 sysctl_tcp_plb_rehash_rounds - - +u8 sysctl_tcp_plb_suspend_rto_sec - - +int sysctl_tcp_plb_cong_thresh - - +int sysctl_udp_wmem_min +int sysctl_udp_rmem_min +u8 sysctl_fib_notify_on_flag_change +u8 sysctl_udp_l3mdev_accept +u8 sysctl_igmp_llm_reports +int sysctl_igmp_max_memberships +int sysctl_igmp_max_msf +int sysctl_igmp_qrv +struct_ping_group_range ping_group_range +atomic_t dev_addr_genid +unsigned_int sysctl_udp_child_hash_entries +unsigned_long* sysctl_local_reserved_ports +int sysctl_ip_prot_sock +struct_mr_table* mrt +struct_list_head mr_tables +struct_fib_rules_ops* mr_rules_ops +u32 sysctl_fib_multipath_hash_fields +u8 sysctl_fib_multipath_use_neigh +u8 sysctl_fib_multipath_hash_policy +struct_fib_notifier_ops* notifier_ops +unsigned_int fib_seq +struct_fib_notifier_ops* ipmr_notifier_ops +unsigned_int ipmr_seq +atomic_t rt_genid +siphash_key_t ip_id_key diff --git a/Documentation/networking/net_cachelines/snmp.rst b/Documentation/networking/net_cachelines/snmp.rst new file mode 100644 index 0000000000000..1eaeb8bc252c0 --- /dev/null +++ b/Documentation/networking/net_cachelines/snmp.rst @@ -0,0 +1,134 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. Copyright (C) 2023 Google LLC +=========================================== +netns_ipv4 enum fast path usage breakdown +=========================================== + +Type Name fastpath_tx_access fastpath_rx_access comment +..enum +unsigned_long LINUX_MIB_TCPKEEPALIVE write_mostly - tcp_keepalive_timer +unsigned_long LINUX_MIB_DELAYEDACKS write_mostly - tcp_delack_timer_handler,tcp_delack_timer +unsigned_long LINUX_MIB_DELAYEDACKLOCKED write_mostly - tcp_delack_timer_handler,tcp_delack_timer +unsigned_long LINUX_MIB_TCPAUTOCORKING write_mostly - tcp_push,tcp_sendmsg_locked +unsigned_long LINUX_MIB_TCPFROMZEROWINDOWADV write_mostly - tcp_select_window,tcp_transmit-skb +unsigned_long LINUX_MIB_TCPTOZEROWINDOWADV write_mostly - tcp_select_window,tcp_transmit-skb +unsigned_long LINUX_MIB_TCPWANTZEROWINDOWADV write_mostly - tcp_select_window,tcp_transmit-skb +unsigned_long LINUX_MIB_TCPORIGDATASENT write_mostly - tcp_write_xmit +unsigned_long LINUX_MIB_TCPHPHITS - write_mostly tcp_rcv_established,tcp_v4_do_rcv,tcp_v6_do_rcv +unsigned_long LINUX_MIB_TCPRCVCOALESCE - write_mostly tcp_try_coalesce,tcp_queue_rcv,tcp_rcv_established +unsigned_long LINUX_MIB_TCPPUREACKS - write_mostly tcp_ack,tcp_rcv_established +unsigned_long LINUX_MIB_TCPHPACKS - write_mostly tcp_ack,tcp_rcv_established +unsigned_long LINUX_MIB_TCPDELIVERED - write_mostly tcp_newly_delivered,tcp_ack,tcp_rcv_established +unsigned_long LINUX_MIB_SYNCOOKIESSENT +unsigned_long LINUX_MIB_SYNCOOKIESRECV +unsigned_long LINUX_MIB_SYNCOOKIESFAILED +unsigned_long LINUX_MIB_EMBRYONICRSTS +unsigned_long LINUX_MIB_PRUNECALLED +unsigned_long LINUX_MIB_RCVPRUNED +unsigned_long LINUX_MIB_OFOPRUNED +unsigned_long LINUX_MIB_OUTOFWINDOWICMPS +unsigned_long LINUX_MIB_LOCKDROPPEDICMPS +unsigned_long LINUX_MIB_ARPFILTER +unsigned_long LINUX_MIB_TIMEWAITED +unsigned_long LINUX_MIB_TIMEWAITRECYCLED +unsigned_long LINUX_MIB_TIMEWAITKILLED +unsigned_long LINUX_MIB_PAWSACTIVEREJECTED +unsigned_long LINUX_MIB_PAWSESTABREJECTED +unsigned_long LINUX_MIB_DELAYEDACKLOST +unsigned_long LINUX_MIB_LISTENOVERFLOWS +unsigned_long LINUX_MIB_LISTENDROPS +unsigned_long LINUX_MIB_TCPRENORECOVERY +unsigned_long LINUX_MIB_TCPSACKRECOVERY +unsigned_long LINUX_MIB_TCPSACKRENEGING +unsigned_long LINUX_MIB_TCPSACKREORDER +unsigned_long LINUX_MIB_TCPRENOREORDER +unsigned_long LINUX_MIB_TCPTSREORDER +unsigned_long LINUX_MIB_TCPFULLUNDO +unsigned_long LINUX_MIB_TCPPARTIALUNDO +unsigned_long LINUX_MIB_TCPDSACKUNDO +unsigned_long LINUX_MIB_TCPLOSSUNDO +unsigned_long LINUX_MIB_TCPLOSTRETRANSMIT +unsigned_long LINUX_MIB_TCPRENOFAILURES +unsigned_long LINUX_MIB_TCPSACKFAILURES +unsigned_long LINUX_MIB_TCPLOSSFAILURES +unsigned_long LINUX_MIB_TCPFASTRETRANS +unsigned_long LINUX_MIB_TCPSLOWSTARTRETRANS +unsigned_long LINUX_MIB_TCPTIMEOUTS +unsigned_long LINUX_MIB_TCPLOSSPROBES +unsigned_long LINUX_MIB_TCPLOSSPROBERECOVERY +unsigned_long LINUX_MIB_TCPRENORECOVERYFAIL +unsigned_long LINUX_MIB_TCPSACKRECOVERYFAIL +unsigned_long LINUX_MIB_TCPRCVCOLLAPSED +unsigned_long LINUX_MIB_TCPDSACKOLDSENT +unsigned_long LINUX_MIB_TCPDSACKOFOSENT +unsigned_long LINUX_MIB_TCPDSACKRECV +unsigned_long LINUX_MIB_TCPDSACKOFORECV +unsigned_long LINUX_MIB_TCPABORTONDATA +unsigned_long LINUX_MIB_TCPABORTONCLOSE +unsigned_long LINUX_MIB_TCPABORTONMEMORY +unsigned_long LINUX_MIB_TCPABORTONTIMEOUT +unsigned_long LINUX_MIB_TCPABORTONLINGER +unsigned_long LINUX_MIB_TCPABORTFAILED +unsigned_long LINUX_MIB_TCPMEMORYPRESSURES +unsigned_long LINUX_MIB_TCPMEMORYPRESSURESCHRONO +unsigned_long LINUX_MIB_TCPSACKDISCARD +unsigned_long LINUX_MIB_TCPDSACKIGNOREDOLD +unsigned_long LINUX_MIB_TCPDSACKIGNOREDNOUNDO +unsigned_long LINUX_MIB_TCPSPURIOUSRTOS +unsigned_long LINUX_MIB_TCPMD5NOTFOUND +unsigned_long LINUX_MIB_TCPMD5UNEXPECTED +unsigned_long LINUX_MIB_TCPMD5FAILURE +unsigned_long LINUX_MIB_SACKSHIFTED +unsigned_long LINUX_MIB_SACKMERGED +unsigned_long LINUX_MIB_SACKSHIFTFALLBACK +unsigned_long LINUX_MIB_TCPBACKLOGDROP +unsigned_long LINUX_MIB_PFMEMALLOCDROP +unsigned_long LINUX_MIB_TCPMINTTLDROP +unsigned_long LINUX_MIB_TCPDEFERACCEPTDROP +unsigned_long LINUX_MIB_IPRPFILTER +unsigned_long LINUX_MIB_TCPTIMEWAITOVERFLOW +unsigned_long LINUX_MIB_TCPREQQFULLDOCOOKIES +unsigned_long LINUX_MIB_TCPREQQFULLDROP +unsigned_long LINUX_MIB_TCPRETRANSFAIL +unsigned_long LINUX_MIB_TCPBACKLOGCOALESCE +unsigned_long LINUX_MIB_TCPOFOQUEUE +unsigned_long LINUX_MIB_TCPOFODROP +unsigned_long LINUX_MIB_TCPOFOMERGE +unsigned_long LINUX_MIB_TCPCHALLENGEACK +unsigned_long LINUX_MIB_TCPSYNCHALLENGE +unsigned_long LINUX_MIB_TCPFASTOPENACTIVE +unsigned_long LINUX_MIB_TCPFASTOPENACTIVEFAIL +unsigned_long LINUX_MIB_TCPFASTOPENPASSIVE +unsigned_long LINUX_MIB_TCPFASTOPENPASSIVEFAIL +unsigned_long LINUX_MIB_TCPFASTOPENLISTENOVERFLOW +unsigned_long LINUX_MIB_TCPFASTOPENCOOKIEREQD +unsigned_long LINUX_MIB_TCPFASTOPENBLACKHOLE +unsigned_long LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES +unsigned_long LINUX_MIB_BUSYPOLLRXPACKETS +unsigned_long LINUX_MIB_TCPSYNRETRANS +unsigned_long LINUX_MIB_TCPHYSTARTTRAINDETECT +unsigned_long LINUX_MIB_TCPHYSTARTTRAINCWND +unsigned_long LINUX_MIB_TCPHYSTARTDELAYDETECT +unsigned_long LINUX_MIB_TCPHYSTARTDELAYCWND +unsigned_long LINUX_MIB_TCPACKSKIPPEDSYNRECV +unsigned_long LINUX_MIB_TCPACKSKIPPEDPAWS +unsigned_long LINUX_MIB_TCPACKSKIPPEDSEQ +unsigned_long LINUX_MIB_TCPACKSKIPPEDFINWAIT2 +unsigned_long LINUX_MIB_TCPACKSKIPPEDTIMEWAIT +unsigned_long LINUX_MIB_TCPACKSKIPPEDCHALLENGE +unsigned_long LINUX_MIB_TCPWINPROBE +unsigned_long LINUX_MIB_TCPMTUPFAIL +unsigned_long LINUX_MIB_TCPMTUPSUCCESS +unsigned_long LINUX_MIB_TCPDELIVEREDCE +unsigned_long LINUX_MIB_TCPACKCOMPRESSED +unsigned_long LINUX_MIB_TCPZEROWINDOWDROP +unsigned_long LINUX_MIB_TCPRCVQDROP +unsigned_long LINUX_MIB_TCPWQUEUETOOBIG +unsigned_long LINUX_MIB_TCPFASTOPENPASSIVEALTKEY +unsigned_long LINUX_MIB_TCPTIMEOUTREHASH +unsigned_long LINUX_MIB_TCPDUPLICATEDATAREHASH +unsigned_long LINUX_MIB_TCPDSACKRECVSEGS +unsigned_long LINUX_MIB_TCPDSACKIGNOREDDUBIOUS +unsigned_long LINUX_MIB_TCPMIGRATEREQSUCCESS +unsigned_long LINUX_MIB_TCPMIGRATEREQFAILURE +unsigned_long __LINUX_MIB_MAX diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst new file mode 100644 index 0000000000000..65ee2c816f78c --- /dev/null +++ b/Documentation/networking/net_cachelines/tcp_sock.rst @@ -0,0 +1,156 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. Copyright (C) 2023 Google LLC +========================================= +tcp_sock struct fast path usage breakdown +========================================= + +Type Name fastpath_tx_access fastpath_rx_access Comments +..struct ..tcp_sock +struct_inet_connection_sock inet_conn +u16 tcp_header_len read_mostly read_mostly tcp_bound_to_half_wnd,tcp_current_mss(tx);tcp_rcv_established(rx) +u16 gso_segs read_mostly - tcp_xmit_size_goal +__be32 pred_flags read_write read_mostly tcp_select_window(tx);tcp_rcv_established(rx) +u64 bytes_received - read_write tcp_rcv_nxt_update(rx) +u32 segs_in - read_write tcp_v6_rcv(rx) +u32 data_segs_in - read_write tcp_v6_rcv(rx) +u32 rcv_nxt read_mostly read_write tcp_cleanup_rbuf,tcp_send_ack,tcp_inq_hint,tcp_transmit_skb,tcp_receive_window(tx);tcp_v6_do_rcv,tcp_rcv_established,tcp_data_queue,tcp_receive_window,tcp_rcv_nxt_update(write)(rx) +u32 copied_seq - read_mostly tcp_cleanup_rbuf,tcp_rcv_space_adjust,tcp_inq_hint +u32 rcv_wup - read_write __tcp_cleanup_rbuf,tcp_receive_window,tcp_receive_established +u32 snd_nxt read_write read_mostly tcp_rate_check_app_limited,__tcp_transmit_skb,tcp_event_new_data_sent(write)(tx);tcp_rcv_established,tcp_ack,tcp_clean_rtx_queue(rx) +u32 segs_out read_write - __tcp_transmit_skb +u32 data_segs_out read_write - __tcp_transmit_skb,tcp_update_skb_after_send +u64 bytes_sent read_write - __tcp_transmit_skb +u64 bytes_acked - read_write tcp_snd_una_update/tcp_ack +u32 dsack_dups +u32 snd_una read_mostly read_write tcp_wnd_end,tcp_urg_mode,tcp_minshall_check,tcp_cwnd_validate(tx);tcp_ack,tcp_may_update_window,tcp_clean_rtx_queue(write),tcp_ack_tstamp(rx) +u32 snd_sml read_write - tcp_minshall_check,tcp_minshall_update +u32 rcv_tstamp - read_mostly tcp_ack +u32 lsndtime read_write - tcp_slow_start_after_idle_check,tcp_event_data_sent +u32 last_oow_ack_time +u32 compressed_ack_rcv_nxt +u32 tsoffset read_mostly read_mostly tcp_established_options(tx);tcp_fast_parse_options(rx) +struct_list_head tsq_node - - +struct_list_head tsorted_sent_queue read_write - tcp_update_skb_after_send +u32 snd_wl1 - read_mostly tcp_may_update_window +u32 snd_wnd read_mostly read_mostly tcp_wnd_end,tcp_tso_should_defer(tx);tcp_fast_path_on(rx) +u32 max_window read_mostly - tcp_bound_to_half_wnd,forced_push +u32 mss_cache read_mostly read_mostly tcp_rate_check_app_limited,tcp_current_mss,tcp_sync_mss,tcp_sndbuf_expand,tcp_tso_should_defer(tx);tcp_update_pacing_rate,tcp_clean_rtx_queue(rx) +u32 window_clamp read_mostly read_write tcp_rcv_space_adjust,__tcp_select_window +u32 rcv_ssthresh read_mostly - __tcp_select_window +u8 scaling_ratio read_mostly read_mostly tcp_win_from_space +struct tcp_rack +u16 advmss - read_mostly tcp_rcv_space_adjust +u8 compressed_ack +u8:2 dup_ack_counter +u8:1 tlp_retrans +u8:1 tcp_usec_ts read_mostly read_mostly +u32 chrono_start read_write - tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data) +u32[3] chrono_stat read_write - tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data) +u8:2 chrono_type read_write - tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data) +u8:1 rate_app_limited - read_write tcp_rate_gen +u8:1 fastopen_connect +u8:1 fastopen_no_cookie +u8:1 is_sack_reneg - read_mostly tcp_skb_entail,tcp_ack +u8:2 fastopen_client_fail +u8:4 nonagle read_write - tcp_skb_entail,tcp_push_pending_frames +u8:1 thin_lto +u8:1 recvmsg_inq +u8:1 repair read_mostly - tcp_write_xmit +u8:1 frto +u8 repair_queue - - +u8:2 save_syn +u8:1 syn_data +u8:1 syn_fastopen +u8:1 syn_fastopen_exp +u8:1 syn_fastopen_ch +u8:1 syn_data_acked +u8:1 is_cwnd_limited read_mostly - tcp_cwnd_validate,tcp_is_cwnd_limited +u32 tlp_high_seq - read_mostly tcp_ack +u32 tcp_tx_delay +u64 tcp_wstamp_ns read_write - tcp_pacing_check,tcp_tso_should_defer,tcp_update_skb_after_send +u64 tcp_clock_cache read_write read_write tcp_mstamp_refresh(tcp_write_xmit/tcp_rcv_space_adjust),__tcp_transmit_skb,tcp_tso_should_defer;timer +u64 tcp_mstamp read_write read_write tcp_mstamp_refresh(tcp_write_xmit/tcp_rcv_space_adjust)(tx);tcp_rcv_space_adjust,tcp_rate_gen,tcp_clean_rtx_queue,tcp_ack_update_rtt/tcp_time_stamp(rx);timer +u32 srtt_us read_mostly read_write tcp_tso_should_defer(tx);tcp_update_pacing_rate,__tcp_set_rto,tcp_rtt_estimator(rx) +u32 mdev_us read_write - tcp_rtt_estimator +u32 mdev_max_us +u32 rttvar_us - read_mostly __tcp_set_rto +u32 rtt_seq read_write tcp_rtt_estimator +struct_minmax rtt_min - read_mostly tcp_min_rtt/tcp_rate_gen,tcp_min_rtttcp_update_rtt_min +u32 packets_out read_write read_write tcp_packets_in_flight(tx/rx);tcp_slow_start_after_idle_check,tcp_nagle_check,tcp_rate_skb_sent,tcp_event_new_data_sent,tcp_cwnd_validate,tcp_write_xmit(tx);tcp_ack,tcp_clean_rtx_queue,tcp_update_pacing_rate(rx) +u32 retrans_out - read_mostly tcp_packets_in_flight,tcp_rate_check_app_limited +u32 max_packets_out - read_write tcp_cwnd_validate +u32 cwnd_usage_seq - read_write tcp_cwnd_validate +u16 urg_data - read_mostly tcp_fast_path_check +u8 ecn_flags read_write - tcp_ecn_send +u8 keepalive_probes +u32 reordering read_mostly - tcp_sndbuf_expand +u32 reord_seen +u32 snd_up read_write read_mostly tcp_mark_urg,tcp_urg_mode,__tcp_transmit_skb(tx);tcp_clean_rtx_queue(rx) +struct_tcp_options_received rx_opt read_mostly read_write tcp_established_options(tx);tcp_fast_path_on,tcp_ack_update_window,tcp_is_sack,tcp_data_queue,tcp_rcv_established,tcp_ack_update_rtt(rx) +u32 snd_ssthresh - read_mostly tcp_update_pacing_rate +u32 snd_cwnd read_mostly read_mostly tcp_snd_cwnd,tcp_rate_check_app_limited,tcp_tso_should_defer(tx);tcp_update_pacing_rate +u32 snd_cwnd_cnt +u32 snd_cwnd_clamp +u32 snd_cwnd_used +u32 snd_cwnd_stamp +u32 prior_cwnd +u32 prr_delivered +u32 prr_out read_mostly read_mostly tcp_rate_skb_sent,tcp_newly_delivered(tx);tcp_ack,tcp_rate_gen,tcp_clean_rtx_queue(rx) +u32 delivered read_mostly read_write tcp_rate_skb_sent, tcp_newly_delivered(tx);tcp_ack, tcp_rate_gen, tcp_clean_rtx_queue (rx) +u32 delivered_ce read_mostly read_write tcp_rate_skb_sent(tx);tcp_rate_gen(rx) +u32 lost - read_mostly tcp_ack +u32 app_limited read_write read_mostly tcp_rate_check_app_limited,tcp_rate_skb_sent(tx);tcp_rate_gen(rx) +u64 first_tx_mstamp read_write - tcp_rate_skb_sent +u64 delivered_mstamp read_write - tcp_rate_skb_sent +u32 rate_delivered - read_mostly tcp_rate_gen +u32 rate_interval_us - read_mostly rate_delivered,rate_app_limited +u32 rcv_wnd read_write read_mostly tcp_select_window,tcp_receive_window,tcp_fast_path_check +u32 write_seq read_write - tcp_rate_check_app_limited,tcp_write_queue_empty,tcp_skb_entail,forced_push,tcp_mark_push +u32 notsent_lowat read_mostly - tcp_stream_memory_free +u32 pushed_seq read_write - tcp_mark_push,forced_push +u32 lost_out read_mostly read_mostly tcp_left_out(tx);tcp_packets_in_flight(tx/rx);tcp_rate_check_app_limited(rx) +u32 sacked_out read_mostly read_mostly tcp_left_out(tx);tcp_packets_in_flight(tx/rx);tcp_clean_rtx_queue(rx) +struct_hrtimer pacing_timer +struct_hrtimer compressed_ack_timer +struct_sk_buff* lost_skb_hint read_mostly tcp_clean_rtx_queue +struct_sk_buff* retransmit_skb_hint read_mostly - tcp_clean_rtx_queue +struct_rb_root out_of_order_queue - read_mostly tcp_data_queue,tcp_fast_path_check +struct_sk_buff* ooo_last_skb +struct_tcp_sack_block[1] duplicate_sack +struct_tcp_sack_block[4] selective_acks +struct_tcp_sack_block[4] recv_sack_cache +struct_sk_buff* highest_sack read_write - tcp_event_new_data_sent +int lost_cnt_hint +u32 prior_ssthresh +u32 high_seq +u32 retrans_stamp +u32 undo_marker +int undo_retrans +u64 bytes_retrans +u32 total_retrans +u32 rto_stamp +u16 total_rto +u16 total_rto_recoveries +u32 total_rto_time +u32 urg_seq - - +unsigned_int keepalive_time +unsigned_int keepalive_intvl +int linger2 +u8 bpf_sock_ops_cb_flags +u8:1 bpf_chg_cc_inprogress +u16 timeout_rehash +u32 rcv_ooopack +u32 rcv_rtt_last_tsecr +struct rcv_rtt_est - read_write tcp_rcv_space_adjust,tcp_rcv_established +struct rcvq_space - read_write tcp_rcv_space_adjust +struct mtu_probe +u32 plb_rehash +u32 mtu_info +bool is_mptcp +bool smc_hs_congested +bool syn_smc +struct_tcp_sock_af_ops* af_specific +struct_tcp_md5sig_info* md5sig_info +struct_tcp_fastopen_request* fastopen_req +struct_request_sock* fastopen_rsk +struct_saved_syn* saved_syn \ No newline at end of file diff --git a/MAINTAINERS b/MAINTAINERS index ae46ea84ee5c1..7288da0f5653b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14856,6 +14856,7 @@ Q: https://patchwork.kernel.org/project/netdevbpf/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git F: Documentation/devicetree/bindings/net/ +F: Documentation/networking/net_cachelines/net_device.rst F: drivers/connector/ F: drivers/net/ F: include/dt-bindings/net/ @@ -14910,6 +14911,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git F: Documentation/core-api/netlink.rst F: Documentation/netlink/ F: Documentation/networking/ +F: Documentation/networking/net_cachelines/ F: Documentation/process/maintainer-netdev.rst F: Documentation/userspace-api/netlink/ F: include/linux/in.h @@ -15016,6 +15018,7 @@ NETWORKING [TCP] M: Eric Dumazet L: netdev@vger.kernel.org S: Maintained +F: Documentation/networking/net_cachelines/tcp_sock.rst F: include/linux/tcp.h F: include/net/tcp.h F: include/trace/events/tcp.h diff --git a/include/linux/cache.h b/include/linux/cache.h index 9900d20b76c28..0ecb17bb68837 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -85,6 +85,31 @@ #define cache_line_size() L1_CACHE_BYTES #endif +#ifndef __cacheline_group_begin +#define __cacheline_group_begin(GROUP) \ + __u8 __cacheline_group_begin__##GROUP[0] +#endif + +#ifndef __cacheline_group_end +#define __cacheline_group_end(GROUP) \ + __u8 __cacheline_group_end__##GROUP[0] +#endif + +#ifndef CACHELINE_ASSERT_GROUP_MEMBER +#define CACHELINE_ASSERT_GROUP_MEMBER(TYPE, GROUP, MEMBER) \ + BUILD_BUG_ON(!(offsetof(TYPE, MEMBER) >= \ + offsetofend(TYPE, __cacheline_group_begin__##GROUP) && \ + offsetofend(TYPE, MEMBER) <= \ + offsetof(TYPE, __cacheline_group_end__##GROUP))) +#endif + +#ifndef CACHELINE_ASSERT_GROUP_SIZE +#define CACHELINE_ASSERT_GROUP_SIZE(TYPE, GROUP, SIZE) \ + BUILD_BUG_ON(offsetof(TYPE, __cacheline_group_end__##GROUP) - \ + offsetofend(TYPE, __cacheline_group_begin__##GROUP) > \ + SIZE) +#endif + /* * Helper to add padding within a struct to ensure data fall into separate * cachelines. diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8e175fee7ad2d..ac6cdb6ff03a8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2094,6 +2094,78 @@ enum netdev_stat_type { */ struct net_device { + /* Cacheline organization can be found documented in + * Documentation/networking/net_cachelines/net_device.rst. + * Please update the document when adding new fields. + */ + + /* TX read-mostly hotpath */ + __cacheline_group_begin(net_device_read_tx); + unsigned long long priv_flags; + const struct net_device_ops *netdev_ops; + const struct header_ops *header_ops; + struct netdev_queue *_tx; + netdev_features_t gso_partial_features; + unsigned int real_num_tx_queues; + unsigned int gso_max_size; + unsigned int gso_ipv4_max_size; + u16 gso_max_segs; + s16 num_tc; + /* Note : dev->mtu is often read without holding a lock. + * Writers usually hold RTNL. + * It is recommended to use READ_ONCE() to annotate the reads, + * and to use WRITE_ONCE() to annotate the writes. + */ + unsigned int mtu; + unsigned short needed_headroom; + struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; +#ifdef CONFIG_XPS + struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX]; +#endif +#ifdef CONFIG_NETFILTER_EGRESS + struct nf_hook_entries __rcu *nf_hooks_egress; +#endif +#ifdef CONFIG_NET_XGRESS + struct bpf_mprog_entry __rcu *tcx_egress; +#endif + __cacheline_group_end(net_device_read_tx); + + /* TXRX read-mostly hotpath */ + __cacheline_group_begin(net_device_read_txrx); + union { + struct pcpu_lstats __percpu *lstats; + struct pcpu_sw_netstats __percpu *tstats; + struct pcpu_dstats __percpu *dstats; + }; + unsigned long state; + unsigned int flags; + unsigned short hard_header_len; + netdev_features_t features; + struct inet6_dev __rcu *ip6_ptr; + __cacheline_group_end(net_device_read_txrx); + + /* RX read-mostly hotpath */ + __cacheline_group_begin(net_device_read_rx); + struct bpf_prog __rcu *xdp_prog; + struct list_head ptype_specific; + int ifindex; + unsigned int real_num_rx_queues; + struct netdev_rx_queue *_rx; + unsigned long gro_flush_timeout; + int napi_defer_hard_irqs; + unsigned int gro_max_size; + unsigned int gro_ipv4_max_size; + rx_handler_func_t __rcu *rx_handler; + void __rcu *rx_handler_data; + possible_net_t nd_net; +#ifdef CONFIG_NETPOLL + struct netpoll_info __rcu *npinfo; +#endif +#ifdef CONFIG_NET_XGRESS + struct bpf_mprog_entry __rcu *tcx_ingress; +#endif + __cacheline_group_end(net_device_read_rx); + char name[IFNAMSIZ]; struct netdev_name_node *name_node; struct dev_ifalias __rcu *ifalias; @@ -2111,14 +2183,12 @@ struct net_device { * part of the usual set specified in Space.c. */ - unsigned long state; struct list_head dev_list; struct list_head napi_list; struct list_head unreg_list; struct list_head close_list; struct list_head ptype_all; - struct list_head ptype_specific; struct { struct list_head upper; @@ -2126,31 +2196,18 @@ struct net_device { } adj_list; /* Read-mostly cache-line for fast-path access */ - unsigned int flags; xdp_features_t xdp_features; - unsigned long long priv_flags; - const struct net_device_ops *netdev_ops; const struct xdp_metadata_ops *xdp_metadata_ops; - int ifindex; + const struct xsk_tx_metadata_ops *xsk_tx_metadata_ops; // reserve for 48eb03dd26304 unsigned short gflags; - unsigned short hard_header_len; - /* Note : dev->mtu is often read without holding a lock. - * Writers usually hold RTNL. - * It is recommended to use READ_ONCE() to annotate the reads, - * and to use WRITE_ONCE() to annotate the writes. - */ - unsigned int mtu; - unsigned short needed_headroom; unsigned short needed_tailroom; - netdev_features_t features; netdev_features_t hw_features; netdev_features_t wanted_features; netdev_features_t vlan_features; netdev_features_t hw_enc_features; netdev_features_t mpls_features; - netdev_features_t gso_partial_features; unsigned int min_mtu; unsigned int max_mtu; @@ -2188,8 +2245,6 @@ struct net_device { const struct tlsdev_ops *tlsdev_ops; #endif - const struct header_ops *header_ops; - unsigned char operstate; unsigned char link_mode; @@ -2230,9 +2285,7 @@ struct net_device { /* Protocol-specific pointers */ - struct in_device __rcu *ip_ptr; - struct inet6_dev __rcu *ip6_ptr; #if IS_ENABLED(CONFIG_VLAN_8021Q) struct vlan_info __rcu *vlan_info; #endif @@ -2267,26 +2320,13 @@ struct net_device { /* Interface address info used in eth_type_trans() */ const unsigned char *dev_addr; - struct netdev_rx_queue *_rx; unsigned int num_rx_queues; - unsigned int real_num_rx_queues; - - struct bpf_prog __rcu *xdp_prog; - unsigned long gro_flush_timeout; - int napi_defer_hard_irqs; #define GRO_LEGACY_MAX_SIZE 65536u /* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE), * and shinfo->gso_segs is a 16bit field. */ #define GRO_MAX_SIZE (8 * 65535u) - unsigned int gro_max_size; - unsigned int gro_ipv4_max_size; unsigned int xdp_zc_max_segs; - rx_handler_func_t __rcu *rx_handler; - void __rcu *rx_handler_data; -#ifdef CONFIG_NET_XGRESS - struct bpf_mprog_entry __rcu *tcx_ingress; -#endif struct netdev_queue __rcu *ingress_queue; #ifdef CONFIG_NETFILTER_INGRESS struct nf_hook_entries __rcu *nf_hooks_ingress; @@ -2301,25 +2341,13 @@ struct net_device { /* * Cache lines mostly used on transmit path */ - struct netdev_queue *_tx ____cacheline_aligned_in_smp; unsigned int num_tx_queues; - unsigned int real_num_tx_queues; struct Qdisc __rcu *qdisc; unsigned int tx_queue_len; spinlock_t tx_global_lock; struct xdp_dev_bulk_queue __percpu *xdp_bulkq; -#ifdef CONFIG_XPS - struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX]; -#endif -#ifdef CONFIG_NET_XGRESS - struct bpf_mprog_entry __rcu *tcx_egress; -#endif -#ifdef CONFIG_NETFILTER_EGRESS - struct nf_hook_entries __rcu *nf_hooks_egress; -#endif - #ifdef CONFIG_NET_SCHED DECLARE_HASHTABLE (qdisc_hash, 4); #endif @@ -2358,22 +2386,11 @@ struct net_device { bool needs_free_netdev; void (*priv_destructor)(struct net_device *dev); -#ifdef CONFIG_NETPOLL - struct netpoll_info __rcu *npinfo; -#endif - - possible_net_t nd_net; - /* mid-layer private */ void *ml_priv; enum netdev_ml_priv_type ml_priv_type; enum netdev_stat_type pcpu_stat_type:8; - union { - struct pcpu_lstats __percpu *lstats; - struct pcpu_sw_netstats __percpu *tstats; - struct pcpu_dstats __percpu *dstats; - }; #if IS_ENABLED(CONFIG_GARP) struct garp_port __rcu *garp_port; @@ -2398,20 +2415,15 @@ struct net_device { */ #define GSO_MAX_SIZE (8 * GSO_MAX_SEGS) - unsigned int gso_max_size; #define TSO_LEGACY_MAX_SIZE 65536 #define TSO_MAX_SIZE UINT_MAX unsigned int tso_max_size; - u16 gso_max_segs; #define TSO_MAX_SEGS U16_MAX u16 tso_max_segs; - unsigned int gso_ipv4_max_size; #ifdef CONFIG_DCB const struct dcbnl_rtnl_ops *dcbnl_ops; #endif - s16 num_tc; - struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; u8 prio_tc_map[TC_BITMASK + 1]; #if IS_ENABLED(CONFIG_FCOE) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9b371aa7c7962..66ecbea3c075d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -175,23 +175,122 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) #define TCP_RMEM_TO_WIN_SCALE 8 struct tcp_sock { + /* Cacheline organization can be found documented in + * Documentation/networking/net_cachelines/tcp_sock.rst. + * Please update the document when adding new fields. + */ + /* inet_connection_sock has to be the first member of tcp_sock */ struct inet_connection_sock inet_conn; - u16 tcp_header_len; /* Bytes of tcp header to send */ + + /* TX read-mostly hotpath cache lines */ + __cacheline_group_begin(tcp_sock_read_tx); + u32 max_window; /* Maximal window ever seen from peer */ + u32 rcv_ssthresh; /* Current window clamp */ + u32 reordering; /* Packet reordering metric. */ + u32 notsent_lowat; /* TCP_NOTSENT_LOWAT */ u16 gso_segs; /* Max number of segs per GSO packet */ + /* from STCP, retrans queue hinting */ + struct sk_buff *lost_skb_hint; + struct sk_buff *retransmit_skb_hint; + __cacheline_group_end(tcp_sock_read_tx); + /* TXRX read-mostly hotpath cache lines */ + __cacheline_group_begin(tcp_sock_read_txrx); + u32 tsoffset; /* timestamp offset */ + u32 snd_wnd; /* The window we expect to receive */ + u32 mss_cache; /* Cached effective mss, not including SACKS */ + u32 snd_cwnd; /* Sending congestion window */ + u32 prr_out; /* Total number of pkts sent during Recovery. */ + u32 lost_out; /* Lost packets */ + u32 sacked_out; /* SACK'd packets */ + u16 tcp_header_len; /* Bytes of tcp header to send */ + u8 scaling_ratio; /* see tcp_win_from_space() */ + u8 chrono_type : 2, /* current chronograph type */ + repair : 1, + tcp_usec_ts : 1, /* TSval values in usec (reserved) */ + is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ + is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ + __cacheline_group_end(tcp_sock_read_txrx); + + /* RX read-mostly hotpath cache lines */ + __cacheline_group_begin(tcp_sock_read_rx); + u32 copied_seq; /* Head of yet unread data */ + u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ + u32 snd_wl1; /* Sequence for window update */ + u32 tlp_high_seq; /* snd_nxt at the time of TLP */ + u32 rttvar_us; /* smoothed mdev_max */ + u32 retrans_out; /* Retransmitted packets out */ + u16 advmss; /* Advertised MSS */ + u16 urg_data; /* Saved octet of OOB data and control flags */ + u32 lost; /* Total data packets lost incl. rexmits */ + struct minmax rtt_min; + /* OOO segments go in this rbtree. Socket lock must be held. */ + struct rb_root out_of_order_queue; + u32 snd_ssthresh; /* Slow start size threshold */ + __cacheline_group_end(tcp_sock_read_rx); + + /* TX read-write hotpath cache lines */ + __cacheline_group_begin(tcp_sock_write_tx) ____cacheline_aligned; + u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut + * The total number of segments sent. + */ + u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut + * total number of data segments sent. + */ + u64 bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut + * total number of data bytes sent. + */ + u32 snd_sml; /* Last byte of the most recently transmitted small packet */ + u32 chrono_start; /* Start time in jiffies of a TCP chrono */ + u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ + u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ + u32 pushed_seq; /* Last pushed seq, required to talk to windows */ + u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ + u32 mdev_us; /* medium deviation */ + u64 tcp_wstamp_ns; /* departure time for next sent data packet */ + u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ + u64 tcp_mstamp; /* most recent packet received/sent */ + u32 rtt_seq; /* sequence number to update rttvar */ + struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ + struct sk_buff *highest_sack; /* skb just after the highest + * skb with SACKed bit set + * (validity guaranteed only if + * sacked_out > 0) + */ + u8 ecn_flags; /* ECN status bits. */ + __cacheline_group_end(tcp_sock_write_tx); + + /* TXRX read-write hotpath cache lines */ + __cacheline_group_begin(tcp_sock_write_txrx); /* * Header prediction flags * 0x5?10 << 16 + snd_wnd in net byte order */ __be32 pred_flags; - + u32 rcv_nxt; /* What we want to receive next */ + u32 snd_nxt; /* Next sequence we send */ + u32 snd_una; /* First byte we want an ack for */ + u32 window_clamp; /* Maximal window to advertise */ + u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + u32 packets_out; /* Packets which are "in flight" */ + u32 snd_up; /* Urgent pointer */ + u32 delivered; /* Total data packets delivered incl. rexmits */ + u32 delivered_ce; /* Like the above but only ECE marked packets */ + u32 app_limited; /* limited until "delivered" reaches this val */ + u32 rcv_wnd; /* Current receiver window */ /* - * RFC793 variables by their proper names. This means you can - * read the code and the spec side by side (and laugh ...) - * See RFC793 and RFC1122. The RFC writes these in capitals. + * Options received (usually on last packet, some only on SYN packets). */ - u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived + struct tcp_options_received rx_opt; + u8 nonagle : 4,/* Disable Nagle algorithm? */ + rate_app_limited:1; /* rate_{delivered,interval_us} limited? */ + __cacheline_group_end(tcp_sock_write_txrx); + + /* RX read-write hotpath cache lines */ + __cacheline_group_begin(tcp_sock_write_rx); + u64 bytes_received; + /* RFC4898 tcpEStatsAppHCThruOctetsReceived * sum(delta(rcv_nxt)), or how many bytes * were acked. */ @@ -201,46 +300,44 @@ struct tcp_sock { u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn * total number of data segments in. */ - u32 rcv_nxt; /* What we want to receive next */ - u32 copied_seq; /* Head of yet unread data */ u32 rcv_wup; /* rcv_nxt on last window update sent */ - u32 snd_nxt; /* Next sequence we send */ - u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut - * The total number of segments sent. - */ - u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut - * total number of data segments sent. - */ - u64 bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut - * total number of data bytes sent. - */ + u32 max_packets_out; /* max packets_out in last window */ + u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */ + u32 rate_delivered; /* saved rate sample: packets delivered */ + u32 rate_interval_us; /* saved rate sample: time elapsed */ + u32 rcv_rtt_last_tsecr; + u64 first_tx_mstamp; /* start of window send phase */ + u64 delivered_mstamp; /* time we reached "delivered" */ u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked * sum(delta(snd_una)), or how many bytes * were acked. */ + struct { + u32 rtt_us; + u32 seq; + u64 time; + } rcv_rtt_est; +/* Receiver queue space */ + struct { + u32 space; + u32 seq; + u64 time; + } rcvq_space; + __cacheline_group_end(tcp_sock_write_rx); + /* End of Hot Path */ + +/* + * RFC793 variables by their proper names. This means you can + * read the code and the spec side by side (and laugh ...) + * See RFC793 and RFC1122. The RFC writes these in capitals. + */ u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups * total number of DSACK blocks received */ - u32 snd_una; /* First byte we want an ack for */ - u32 snd_sml; /* Last byte of the most recently transmitted small packet */ - u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ - u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ u32 last_oow_ack_time; /* timestamp of last out-of-window ACK */ u32 compressed_ack_rcv_nxt; - - u32 tsoffset; /* timestamp offset */ - struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ - struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ - - u32 snd_wl1; /* Sequence for window update */ - u32 snd_wnd; /* The window we expect to receive */ - u32 max_window; /* Maximal window ever seen from peer */ - u32 mss_cache; /* Cached effective mss, not including SACKS */ - u32 window_clamp; /* Maximal window to advertise */ - u32 rcv_ssthresh; /* Current window clamp */ - u8 scaling_ratio; /* see tcp_win_from_space() */ /* Information of the most recently (s)acked skb */ struct tcp_rack { u64 mstamp; /* (Re)sent time of the skb */ @@ -253,23 +350,15 @@ struct tcp_sock { dsack_seen:1, /* Whether DSACK seen after last adj */ advanced:1; /* mstamp advanced since last lost marking */ } rack; - u16 advmss; /* Advertised MSS */ u8 compressed_ack; u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ unused:5; - u32 chrono_start; /* Start time in jiffies of a TCP chrono */ - u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ - u8 chrono_type:2, /* current chronograph type */ - rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ + u8 thin_lto : 1,/* Use linear timeouts for thin streams */ + recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ - is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ - fastopen_client_fail:2; /* reason why fastopen failed */ - u8 nonagle : 4,/* Disable Nagle algorithm? */ - thin_lto : 1,/* Use linear timeouts for thin streams */ - recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */ - repair : 1, + fastopen_client_fail:2, /* reason why fastopen failed */ frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */ u8 repair_queue; u8 save_syn:2, /* Save headers of SYN packet */ @@ -277,45 +366,19 @@ struct tcp_sock { syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ syn_fastopen_ch:1, /* Active TFO re-enabling probe */ - syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ - is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ - u32 tlp_high_seq; /* snd_nxt at the time of TLP */ + syn_data_acked:1;/* data in SYN is acked by SYN-ACK */ u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ - u64 tcp_wstamp_ns; /* departure time for next sent data packet */ - u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ /* RTT measurement */ - u64 tcp_mstamp; /* most recent packet received/sent */ - u32 srtt_us; /* smoothed round trip time << 3 in usecs */ - u32 mdev_us; /* medium deviation */ u32 mdev_max_us; /* maximal mdev for the last rtt period */ - u32 rttvar_us; /* smoothed mdev_max */ - u32 rtt_seq; /* sequence number to update rttvar */ - struct minmax rtt_min; - - u32 packets_out; /* Packets which are "in flight" */ - u32 retrans_out; /* Retransmitted packets out */ - u32 max_packets_out; /* max packets_out in last window */ - u32 cwnd_usage_seq; /* right edge of cwnd usage tracking flight */ - u16 urg_data; /* Saved octet of OOB data and control flags */ - u8 ecn_flags; /* ECN status bits. */ u8 keepalive_probes; /* num of allowed keep alive probes */ - u32 reordering; /* Packet reordering metric. */ u32 reord_seen; /* number of data packet reordering events */ - u32 snd_up; /* Urgent pointer */ - -/* - * Options received (usually on last packet, some only on SYN packets). - */ - struct tcp_options_received rx_opt; /* * Slow start and congestion control (see also Nagle, and Karn & Partridge) */ - u32 snd_ssthresh; /* Slow start size threshold */ - u32 snd_cwnd; /* Sending congestion window */ u32 snd_cwnd_cnt; /* Linear increase counter */ u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ u32 snd_cwnd_used; @@ -323,32 +386,10 @@ struct tcp_sock { u32 prior_cwnd; /* cwnd right before starting loss recovery */ u32 prr_delivered; /* Number of newly delivered packets to * receiver in Recovery. */ - u32 prr_out; /* Total number of pkts sent during Recovery. */ - u32 delivered; /* Total data packets delivered incl. rexmits */ - u32 delivered_ce; /* Like the above but only ECE marked packets */ - u32 lost; /* Total data packets lost incl. rexmits */ - u32 app_limited; /* limited until "delivered" reaches this val */ - u64 first_tx_mstamp; /* start of window send phase */ - u64 delivered_mstamp; /* time we reached "delivered" */ - u32 rate_delivered; /* saved rate sample: packets delivered */ - u32 rate_interval_us; /* saved rate sample: time elapsed */ - - u32 rcv_wnd; /* Current receiver window */ - u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ - u32 notsent_lowat; /* TCP_NOTSENT_LOWAT */ - u32 pushed_seq; /* Last pushed seq, required to talk to windows */ - u32 lost_out; /* Lost packets */ - u32 sacked_out; /* SACK'd packets */ struct hrtimer pacing_timer; struct hrtimer compressed_ack_timer; - /* from STCP, retrans queue hinting */ - struct sk_buff* lost_skb_hint; - struct sk_buff *retransmit_skb_hint; - - /* OOO segments go in this rbtree. Socket lock must be held. */ - struct rb_root out_of_order_queue; struct sk_buff *ooo_last_skb; /* cache rb_last(out_of_order_queue) */ /* SACKs data, these 2 need to be together (see tcp_options_write) */ @@ -357,12 +398,6 @@ struct tcp_sock { struct tcp_sack_block recv_sack_cache[4]; - struct sk_buff *highest_sack; /* skb just after the highest - * skb with SACKed bit set - * (validity guaranteed only if - * sacked_out > 0) - */ - int lost_cnt_hint; u32 prior_ssthresh; /* ssthresh saved at recovery start */ @@ -413,21 +448,6 @@ struct tcp_sock { u32 rcv_ooopack; /* Received out-of-order packets, for tcpinfo */ -/* Receiver side RTT estimation */ - u32 rcv_rtt_last_tsecr; - struct { - u32 rtt_us; - u32 seq; - u64 time; - } rcv_rtt_est; - -/* Receiver queue space */ - struct { - u32 space; - u32 seq; - u64 time; - } rcvq_space; - /* TCP-specific MTU probe information. */ struct { u32 probe_seq_start; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 07fbb549a8916..133b66b06b3bf 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -43,6 +43,38 @@ struct inet_timewait_death_row { struct tcp_fastopen_context; struct netns_ipv4 { + /* Cacheline organization can be found documented in + * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst. + * Please update the document when adding new fields. + */ + + /* TX readonly hotpath cache lines */ + __cacheline_group_begin(netns_ipv4_read_tx); + u8 sysctl_tcp_early_retrans; + u8 sysctl_tcp_tso_win_divisor; + u8 sysctl_tcp_tso_rtt_log; + u8 sysctl_tcp_autocorking; + int sysctl_tcp_min_snd_mss; + unsigned int sysctl_tcp_notsent_lowat; + int sysctl_tcp_limit_output_bytes; + int sysctl_tcp_min_rtt_wlen; + int sysctl_tcp_wmem[3]; + u8 sysctl_ip_fwd_use_pmtu; + __cacheline_group_end(netns_ipv4_read_tx); + + /* TXRX readonly hotpath cache lines */ + __cacheline_group_begin(netns_ipv4_read_txrx); + u8 sysctl_tcp_moderate_rcvbuf; + __cacheline_group_end(netns_ipv4_read_txrx); + + /* RX readonly hotpath cache line */ + __cacheline_group_begin(netns_ipv4_read_rx); + u8 sysctl_ip_early_demux; + u8 sysctl_tcp_early_demux; + int sysctl_tcp_reordering; + int sysctl_tcp_rmem[3]; + __cacheline_group_end(netns_ipv4_read_rx); + struct inet_timewait_death_row tcp_death_row; struct udp_table *udp_table; @@ -97,17 +129,14 @@ struct netns_ipv4 { u8 sysctl_ip_default_ttl; u8 sysctl_ip_no_pmtu_disc; - u8 sysctl_ip_fwd_use_pmtu; u8 sysctl_ip_fwd_update_priority; u8 sysctl_ip_nonlocal_bind; u8 sysctl_ip_autobind_reuse; /* Shall we try to damage output packets if routing dev changes? */ u8 sysctl_ip_dynaddr; - u8 sysctl_ip_early_demux; #ifdef CONFIG_NET_L3_MASTER_DEV u8 sysctl_raw_l3mdev_accept; #endif - u8 sysctl_tcp_early_demux; u8 sysctl_udp_early_demux; u8 sysctl_nexthop_compat_mode; @@ -120,7 +149,6 @@ struct netns_ipv4 { u8 sysctl_tcp_mtu_probing; int sysctl_tcp_mtu_probe_floor; int sysctl_tcp_base_mss; - int sysctl_tcp_min_snd_mss; int sysctl_tcp_probe_threshold; u32 sysctl_tcp_probe_interval; @@ -133,17 +161,17 @@ struct netns_ipv4 { u8 sysctl_tcp_syncookies; u8 sysctl_tcp_migrate_req; u8 sysctl_tcp_comp_sack_nr; - int sysctl_tcp_reordering; + u8 sysctl_tcp_backlog_ack_defer; // reserved + u8 sysctl_tcp_pingpong_thresh; // reserved + u8 sysctl_tcp_retries1; u8 sysctl_tcp_retries2; u8 sysctl_tcp_orphan_retries; u8 sysctl_tcp_tw_reuse; int sysctl_tcp_fin_timeout; - unsigned int sysctl_tcp_notsent_lowat; u8 sysctl_tcp_sack; u8 sysctl_tcp_window_scaling; u8 sysctl_tcp_timestamps; - u8 sysctl_tcp_early_retrans; u8 sysctl_tcp_recovery; u8 sysctl_tcp_thin_linear_timeouts; u8 sysctl_tcp_slow_start_after_idle; @@ -159,21 +187,13 @@ struct netns_ipv4 { u8 sysctl_tcp_frto; u8 sysctl_tcp_nometrics_save; u8 sysctl_tcp_no_ssthresh_metrics_save; - u8 sysctl_tcp_moderate_rcvbuf; - u8 sysctl_tcp_tso_win_divisor; u8 sysctl_tcp_workaround_signed_windows; - int sysctl_tcp_limit_output_bytes; int sysctl_tcp_challenge_ack_limit; - int sysctl_tcp_min_rtt_wlen; u8 sysctl_tcp_min_tso_segs; - u8 sysctl_tcp_tso_rtt_log; - u8 sysctl_tcp_autocorking; u8 sysctl_tcp_reflect_tos; int sysctl_tcp_invalid_ratelimit; int sysctl_tcp_pacing_ss_ratio; int sysctl_tcp_pacing_ca_ratio; - int sysctl_tcp_wmem[3]; - int sysctl_tcp_rmem[3]; unsigned int sysctl_tcp_child_ehash_entries; unsigned long sysctl_tcp_comp_sack_delay_ns; unsigned long sysctl_tcp_comp_sack_slack_ns; diff --git a/net/core/dev.c b/net/core/dev.c index 69da7b009f8b9..7f64e64ed76e9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11578,6 +11578,63 @@ static struct pernet_operations __net_initdata default_device_ops = { .exit_batch = default_device_exit_batch, }; +static void __init net_dev_struct_check(void) +{ + /* TX read-mostly hotpath */ + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, priv_flags); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, netdev_ops); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, header_ops); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, _tx); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, real_num_tx_queues); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_size); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_ipv4_max_size); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_segs); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_partial_features); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, num_tc); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, mtu); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, needed_headroom); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tc_to_txq); +#ifdef CONFIG_XPS + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, xps_maps); +#endif +#ifdef CONFIG_NETFILTER_EGRESS + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, nf_hooks_egress); +#endif +#ifdef CONFIG_NET_XGRESS + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tcx_egress); +#endif + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 160); + + /* TXRX read-mostly hotpath */ + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, lstats); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, state); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, hard_header_len); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, features); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, ip6_ptr); + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 46); + + /* RX read-mostly hotpath */ + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ptype_specific); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ifindex); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_defer_hard_irqs); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler_data); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, nd_net); +#ifdef CONFIG_NETPOLL + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, npinfo); +#endif +#ifdef CONFIG_NET_XGRESS + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress); +#endif + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 104); +} + /* * Initialize the DEV module. At boot time this walks the device list and * unhooks any devices that fail to initialise (normally hardware not @@ -11595,6 +11652,8 @@ static int __init net_dev_init(void) BUG_ON(!dev_boot_phase); + net_dev_struct_check(); + if (dev_proc_init()) goto out; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 92b7fea4d495c..4ebb47a44f2bb 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -1125,11 +1125,56 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, rtnl_set_sk_err(net, RTNLGRP_NSID, err); } +#ifdef CONFIG_NET_NS +static void __init netns_ipv4_struct_check(void) +{ + /* TX readonly hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_early_retrans); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_tso_win_divisor); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_tso_rtt_log); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_autocorking); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_min_snd_mss); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_notsent_lowat); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_limit_output_bytes); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_min_rtt_wlen); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_tcp_wmem); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, + sysctl_ip_fwd_use_pmtu); + CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_tx, 33); + + /* TXRX readonly hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_txrx, + sysctl_tcp_moderate_rcvbuf); + CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_txrx, 1); + + /* RX readonly hotpath cache line */ + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, + sysctl_ip_early_demux); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, + sysctl_tcp_early_demux); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, + sysctl_tcp_reordering); + CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, + sysctl_tcp_rmem); + CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 18); +} +#endif + void __init net_ns_init(void) { struct net_generic *ng; #ifdef CONFIG_NET_NS + netns_ipv4_struct_check(); net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), SMP_CACHE_BYTES, SLAB_PANIC|SLAB_ACCOUNT, NULL); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5e6615f69f175..4219f35783038 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4657,6 +4657,98 @@ static void __init tcp_init_mem(void) sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; /* 9.37 % */ } +static void __init tcp_struct_check(void) +{ + /* TX read-mostly hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, max_window); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, rcv_ssthresh); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, reordering); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, notsent_lowat); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, gso_segs); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, lost_skb_hint); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, retransmit_skb_hint); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 40); + + /* TXRX read-mostly hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, tsoffset); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, snd_wnd); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, mss_cache); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, snd_cwnd); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, prr_out); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, lost_out); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, sacked_out); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, scaling_ratio); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 32); + + /* RX read-mostly hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, copied_seq); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rcv_tstamp); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_wl1); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, tlp_high_seq); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rttvar_us); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, retrans_out); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, advmss); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, urg_data); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, lost); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rtt_min); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, out_of_order_queue); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_ssthresh); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 69); + + /* TX read-write hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, segs_out); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, data_segs_out); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, bytes_sent); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, snd_sml); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, chrono_start); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, chrono_stat); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, write_seq); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, pushed_seq); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, lsndtime); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, mdev_us); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_wstamp_ns); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_clock_cache); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_mstamp); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, rtt_seq); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tsorted_sent_queue); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, highest_sack); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, ecn_flags); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 113); + + /* TXRX read-write hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pred_flags); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_nxt); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_nxt); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_una); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, window_clamp); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, srtt_us); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, packets_out); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_up); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered_ce); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 76); + + /* RX read-write hotpath cache lines */ + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, segs_in); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, data_segs_in); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_wup); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, max_packets_out); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, cwnd_usage_seq); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_delivered); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_interval_us); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_last_tsecr); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, first_tx_mstamp); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, delivered_mstamp); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_acked); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_est); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcvq_space); + CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_rx, 99); +} + void __init tcp_init(void) { int max_rshare, max_wshare, cnt; @@ -4667,6 +4759,8 @@ void __init tcp_init(void) BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof_field(struct sk_buff, cb)); + tcp_struct_check(); + percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE); diff --git a/scripts/kernel-doc b/scripts/kernel-doc index 6e199a745ccb2..d963fdf40e900 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1592,6 +1592,11 @@ sub push_parameter($$$$$) { $parameterdescs{$param} = "anonymous\n"; $anon_struct_union = 1; } + elsif ($param =~ "__cacheline_group" ) + # handle cache group enforcing variables: they do not need be described in header files + { + return; # ignore __cacheline_group_begin and __cacheline_group_end + } # warn if parameter has no description # (but ignore ones starting with # as these are not parameters