34
34
#include "blk-ioprio.h"
35
35
#include "blk-throttle.h"
36
36
37
+ static void __blkcg_rstat_flush (struct blkcg * blkcg , int cpu );
38
+
37
39
/*
38
40
* blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
39
41
* blkcg_pol_register_mutex nests outside of it and synchronizes entire
@@ -56,6 +58,8 @@ static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */
56
58
57
59
bool blkcg_debug_stats = false;
58
60
61
+ static DEFINE_RAW_SPINLOCK (blkg_stat_lock );
62
+
59
63
#define BLKG_DESTROY_BATCH_SIZE 64
60
64
61
65
/*
@@ -163,10 +167,20 @@ static void blkg_free(struct blkcg_gq *blkg)
163
167
static void __blkg_release (struct rcu_head * rcu )
164
168
{
165
169
struct blkcg_gq * blkg = container_of (rcu , struct blkcg_gq , rcu_head );
170
+ struct blkcg * blkcg = blkg -> blkcg ;
171
+ int cpu ;
166
172
167
173
#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
168
174
WARN_ON (!bio_list_empty (& blkg -> async_bios ));
169
175
#endif
176
+ /*
177
+ * Flush all the non-empty percpu lockless lists before releasing
178
+ * us, given these stat belongs to us.
179
+ *
180
+ * blkg_stat_lock is for serializing blkg stat update
181
+ */
182
+ for_each_possible_cpu (cpu )
183
+ __blkcg_rstat_flush (blkcg , cpu );
170
184
171
185
/* release the blkcg and parent blkg refs this blkg has been holding */
172
186
css_put (& blkg -> blkcg -> css );
@@ -951,23 +965,26 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur,
951
965
u64_stats_update_end_irqrestore (& blkg -> iostat .sync , flags );
952
966
}
953
967
954
- static void blkcg_rstat_flush (struct cgroup_subsys_state * css , int cpu )
968
+ static void __blkcg_rstat_flush (struct blkcg * blkcg , int cpu )
955
969
{
956
- struct blkcg * blkcg = css_to_blkcg (css );
957
970
struct llist_head * lhead = per_cpu_ptr (blkcg -> lhead , cpu );
958
971
struct llist_node * lnode ;
959
972
struct blkg_iostat_set * bisc , * next_bisc ;
960
973
961
- /* Root-level stats are sourced from system-wide IO stats */
962
- if (!cgroup_parent (css -> cgroup ))
963
- return ;
964
-
965
974
rcu_read_lock ();
966
975
967
976
lnode = llist_del_all (lhead );
968
977
if (!lnode )
969
978
goto out ;
970
979
980
+ /*
981
+ * For covering concurrent parent blkg update from blkg_release().
982
+ *
983
+ * When flushing from cgroup, cgroup_rstat_lock is always held, so
984
+ * this lock won't cause contention most of time.
985
+ */
986
+ raw_spin_lock (& blkg_stat_lock );
987
+
971
988
/*
972
989
* Iterate only the iostat_cpu's queued in the lockless list.
973
990
*/
@@ -991,13 +1008,19 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
991
1008
if (parent && parent -> parent )
992
1009
blkcg_iostat_update (parent , & blkg -> iostat .cur ,
993
1010
& blkg -> iostat .last );
994
- percpu_ref_put (& blkg -> refcnt );
995
1011
}
996
-
1012
+ raw_spin_unlock ( & blkg_stat_lock );
997
1013
out :
998
1014
rcu_read_unlock ();
999
1015
}
1000
1016
1017
+ static void blkcg_rstat_flush (struct cgroup_subsys_state * css , int cpu )
1018
+ {
1019
+ /* Root-level stats are sourced from system-wide IO stats */
1020
+ if (cgroup_parent (css -> cgroup ))
1021
+ __blkcg_rstat_flush (css_to_blkcg (css ), cpu );
1022
+ }
1023
+
1001
1024
/*
1002
1025
* We source root cgroup stats from the system-wide stats to avoid
1003
1026
* tracking the same information twice and incurring overhead when no
@@ -2075,7 +2098,6 @@ void blk_cgroup_bio_start(struct bio *bio)
2075
2098
2076
2099
llist_add (& bis -> lnode , lhead );
2077
2100
WRITE_ONCE (bis -> lqueued , true);
2078
- percpu_ref_get (& bis -> blkg -> refcnt );
2079
2101
}
2080
2102
2081
2103
u64_stats_update_end_irqrestore (& bis -> sync , flags );
0 commit comments