5858#include <linux/times.h>
5959#include <linux/slab.h>
6060#include <linux/sched.h>
61+ #include <linux/sock_diag.h>
6162
6263#include <net/net_namespace.h>
6364#include <net/icmp.h>
@@ -3016,6 +3017,7 @@ static int tcp4_seq_show(struct seq_file *seq, void *v)
30163017#ifdef CONFIG_BPF_SYSCALL
30173018union bpf_tcp_iter_batch_item {
30183019 struct sock * sk ;
3020+ __u64 cookie ;
30193021};
30203022
30213023struct bpf_tcp_iter_state {
@@ -3046,10 +3048,19 @@ static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
30463048
30473049static void bpf_iter_tcp_put_batch (struct bpf_tcp_iter_state * iter )
30483050{
3051+ union bpf_tcp_iter_batch_item * item ;
30493052 unsigned int cur_sk = iter -> cur_sk ;
3053+ __u64 cookie ;
30503054
3051- while (cur_sk < iter -> end_sk )
3052- sock_gen_put (iter -> batch [cur_sk ++ ].sk );
3055+ /* Remember the cookies of the sockets we haven't seen yet, so we can
3056+ * pick up where we left off next time around.
3057+ */
3058+ while (cur_sk < iter -> end_sk ) {
3059+ item = & iter -> batch [cur_sk ++ ];
3060+ cookie = sock_gen_cookie (item -> sk );
3061+ sock_gen_put (item -> sk );
3062+ item -> cookie = cookie ;
3063+ }
30533064}
30543065
30553066static int bpf_iter_tcp_realloc_batch (struct bpf_tcp_iter_state * iter ,
@@ -3070,6 +3081,106 @@ static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter,
30703081 return 0 ;
30713082}
30723083
3084+ static struct sock * bpf_iter_tcp_resume_bucket (struct sock * first_sk ,
3085+ union bpf_tcp_iter_batch_item * cookies ,
3086+ int n_cookies )
3087+ {
3088+ struct hlist_nulls_node * node ;
3089+ struct sock * sk ;
3090+ int i ;
3091+
3092+ for (i = 0 ; i < n_cookies ; i ++ ) {
3093+ sk = first_sk ;
3094+ sk_nulls_for_each_from (sk , node )
3095+ if (cookies [i ].cookie == atomic64_read (& sk -> sk_cookie ))
3096+ return sk ;
3097+ }
3098+
3099+ return NULL ;
3100+ }
3101+
3102+ static struct sock * bpf_iter_tcp_resume_listening (struct seq_file * seq )
3103+ {
3104+ struct inet_hashinfo * hinfo = seq_file_net (seq )-> ipv4 .tcp_death_row .hashinfo ;
3105+ struct bpf_tcp_iter_state * iter = seq -> private ;
3106+ struct tcp_iter_state * st = & iter -> state ;
3107+ unsigned int find_cookie = iter -> cur_sk ;
3108+ unsigned int end_cookie = iter -> end_sk ;
3109+ int resume_bucket = st -> bucket ;
3110+ struct sock * sk ;
3111+
3112+ if (end_cookie && find_cookie == end_cookie )
3113+ ++ st -> bucket ;
3114+
3115+ sk = listening_get_first (seq );
3116+ iter -> cur_sk = 0 ;
3117+ iter -> end_sk = 0 ;
3118+
3119+ if (sk && st -> bucket == resume_bucket && end_cookie ) {
3120+ sk = bpf_iter_tcp_resume_bucket (sk , & iter -> batch [find_cookie ],
3121+ end_cookie - find_cookie );
3122+ if (!sk ) {
3123+ spin_unlock (& hinfo -> lhash2 [st -> bucket ].lock );
3124+ ++ st -> bucket ;
3125+ sk = listening_get_first (seq );
3126+ }
3127+ }
3128+
3129+ return sk ;
3130+ }
3131+
3132+ static struct sock * bpf_iter_tcp_resume_established (struct seq_file * seq )
3133+ {
3134+ struct inet_hashinfo * hinfo = seq_file_net (seq )-> ipv4 .tcp_death_row .hashinfo ;
3135+ struct bpf_tcp_iter_state * iter = seq -> private ;
3136+ struct tcp_iter_state * st = & iter -> state ;
3137+ unsigned int find_cookie = iter -> cur_sk ;
3138+ unsigned int end_cookie = iter -> end_sk ;
3139+ int resume_bucket = st -> bucket ;
3140+ struct sock * sk ;
3141+
3142+ if (end_cookie && find_cookie == end_cookie )
3143+ ++ st -> bucket ;
3144+
3145+ sk = established_get_first (seq );
3146+ iter -> cur_sk = 0 ;
3147+ iter -> end_sk = 0 ;
3148+
3149+ if (sk && st -> bucket == resume_bucket && end_cookie ) {
3150+ sk = bpf_iter_tcp_resume_bucket (sk , & iter -> batch [find_cookie ],
3151+ end_cookie - find_cookie );
3152+ if (!sk ) {
3153+ spin_unlock_bh (inet_ehash_lockp (hinfo , st -> bucket ));
3154+ ++ st -> bucket ;
3155+ sk = established_get_first (seq );
3156+ }
3157+ }
3158+
3159+ return sk ;
3160+ }
3161+
3162+ static struct sock * bpf_iter_tcp_resume (struct seq_file * seq )
3163+ {
3164+ struct bpf_tcp_iter_state * iter = seq -> private ;
3165+ struct tcp_iter_state * st = & iter -> state ;
3166+ struct sock * sk = NULL ;
3167+
3168+ switch (st -> state ) {
3169+ case TCP_SEQ_STATE_LISTENING :
3170+ sk = bpf_iter_tcp_resume_listening (seq );
3171+ if (sk )
3172+ break ;
3173+ st -> bucket = 0 ;
3174+ st -> state = TCP_SEQ_STATE_ESTABLISHED ;
3175+ fallthrough ;
3176+ case TCP_SEQ_STATE_ESTABLISHED :
3177+ sk = bpf_iter_tcp_resume_established (seq );
3178+ break ;
3179+ }
3180+
3181+ return sk ;
3182+ }
3183+
30733184static unsigned int bpf_iter_tcp_listening_batch (struct seq_file * seq ,
30743185 struct sock * * start_sk )
30753186{
@@ -3154,32 +3265,12 @@ static void bpf_iter_tcp_unlock_bucket(struct seq_file *seq)
31543265
31553266static struct sock * bpf_iter_tcp_batch (struct seq_file * seq )
31563267{
3157- struct inet_hashinfo * hinfo = seq_file_net (seq )-> ipv4 .tcp_death_row .hashinfo ;
31583268 struct bpf_tcp_iter_state * iter = seq -> private ;
3159- struct tcp_iter_state * st = & iter -> state ;
31603269 unsigned int expected ;
31613270 struct sock * sk ;
31623271 int err ;
31633272
3164- /* The st->bucket is done. Directly advance to the next
3165- * bucket instead of having the tcp_seek_last_pos() to skip
3166- * one by one in the current bucket and eventually find out
3167- * it has to advance to the next bucket.
3168- */
3169- if (iter -> end_sk && iter -> cur_sk == iter -> end_sk ) {
3170- st -> offset = 0 ;
3171- st -> bucket ++ ;
3172- if (st -> state == TCP_SEQ_STATE_LISTENING &&
3173- st -> bucket > hinfo -> lhash2_mask ) {
3174- st -> state = TCP_SEQ_STATE_ESTABLISHED ;
3175- st -> bucket = 0 ;
3176- }
3177- }
3178-
3179- iter -> cur_sk = 0 ;
3180- iter -> end_sk = 0 ;
3181-
3182- sk = tcp_seek_last_pos (seq );
3273+ sk = bpf_iter_tcp_resume (seq );
31833274 if (!sk )
31843275 return NULL ; /* Done */
31853276
@@ -3195,10 +3286,7 @@ static struct sock *bpf_iter_tcp_batch(struct seq_file *seq)
31953286 if (err )
31963287 return ERR_PTR (err );
31973288
3198- iter -> cur_sk = 0 ;
3199- iter -> end_sk = 0 ;
3200-
3201- sk = tcp_seek_last_pos (seq );
3289+ sk = bpf_iter_tcp_resume (seq );
32023290 if (!sk )
32033291 return NULL ; /* Done */
32043292
@@ -3250,11 +3338,6 @@ static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
32503338 * meta.seq_num is used instead.
32513339 */
32523340 st -> num ++ ;
3253- /* Move st->offset to the next sk in the bucket such that
3254- * the future start() will resume at st->offset in
3255- * st->bucket. See tcp_seek_last_pos().
3256- */
3257- st -> offset ++ ;
32583341 sock_gen_put (iter -> batch [iter -> cur_sk ++ ].sk );
32593342 }
32603343
0 commit comments