@@ -466,7 +466,7 @@ struct ioc_gq {
466
466
*/
467
467
atomic64_t vtime ;
468
468
atomic64_t done_vtime ;
469
- atomic64_t abs_vdebt ;
469
+ u64 abs_vdebt ;
470
470
u64 last_vtime ;
471
471
472
472
/*
@@ -1142,7 +1142,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
1142
1142
struct iocg_wake_ctx ctx = { .iocg = iocg };
1143
1143
u64 margin_ns = (u64 )(ioc -> period_us *
1144
1144
WAITQ_TIMER_MARGIN_PCT / 100 ) * NSEC_PER_USEC ;
1145
- u64 abs_vdebt , vdebt , vshortage , expires , oexpires ;
1145
+ u64 vdebt , vshortage , expires , oexpires ;
1146
1146
s64 vbudget ;
1147
1147
u32 hw_inuse ;
1148
1148
@@ -1152,18 +1152,15 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
1152
1152
vbudget = now -> vnow - atomic64_read (& iocg -> vtime );
1153
1153
1154
1154
/* pay off debt */
1155
- abs_vdebt = atomic64_read (& iocg -> abs_vdebt );
1156
- vdebt = abs_cost_to_cost (abs_vdebt , hw_inuse );
1155
+ vdebt = abs_cost_to_cost (iocg -> abs_vdebt , hw_inuse );
1157
1156
if (vdebt && vbudget > 0 ) {
1158
1157
u64 delta = min_t (u64 , vbudget , vdebt );
1159
1158
u64 abs_delta = min (cost_to_abs_cost (delta , hw_inuse ),
1160
- abs_vdebt );
1159
+ iocg -> abs_vdebt );
1161
1160
1162
1161
atomic64_add (delta , & iocg -> vtime );
1163
1162
atomic64_add (delta , & iocg -> done_vtime );
1164
- atomic64_sub (abs_delta , & iocg -> abs_vdebt );
1165
- if (WARN_ON_ONCE (atomic64_read (& iocg -> abs_vdebt ) < 0 ))
1166
- atomic64_set (& iocg -> abs_vdebt , 0 );
1163
+ iocg -> abs_vdebt -= abs_delta ;
1167
1164
}
1168
1165
1169
1166
/*
@@ -1219,12 +1216,18 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
1219
1216
u64 expires , oexpires ;
1220
1217
u32 hw_inuse ;
1221
1218
1219
+ lockdep_assert_held (& iocg -> waitq .lock );
1220
+
1222
1221
/* debt-adjust vtime */
1223
1222
current_hweight (iocg , NULL , & hw_inuse );
1224
- vtime += abs_cost_to_cost (atomic64_read ( & iocg -> abs_vdebt ) , hw_inuse );
1223
+ vtime += abs_cost_to_cost (iocg -> abs_vdebt , hw_inuse );
1225
1224
1226
- /* clear or maintain depending on the overage */
1227
- if (time_before_eq64 (vtime , now -> vnow )) {
1225
+ /*
1226
+ * Clear or maintain depending on the overage. Non-zero vdebt is what
1227
+ * guarantees that @iocg is online and future iocg_kick_delay() will
1228
+ * clear use_delay. Don't leave it on when there's no vdebt.
1229
+ */
1230
+ if (!iocg -> abs_vdebt || time_before_eq64 (vtime , now -> vnow )) {
1228
1231
blkcg_clear_delay (blkg );
1229
1232
return false;
1230
1233
}
@@ -1258,9 +1261,12 @@ static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
1258
1261
{
1259
1262
struct ioc_gq * iocg = container_of (timer , struct ioc_gq , delay_timer );
1260
1263
struct ioc_now now ;
1264
+ unsigned long flags ;
1261
1265
1266
+ spin_lock_irqsave (& iocg -> waitq .lock , flags );
1262
1267
ioc_now (iocg -> ioc , & now );
1263
1268
iocg_kick_delay (iocg , & now , 0 );
1269
+ spin_unlock_irqrestore (& iocg -> waitq .lock , flags );
1264
1270
1265
1271
return HRTIMER_NORESTART ;
1266
1272
}
@@ -1368,14 +1374,13 @@ static void ioc_timer_fn(struct timer_list *timer)
1368
1374
* should have woken up in the last period and expire idle iocgs.
1369
1375
*/
1370
1376
list_for_each_entry_safe (iocg , tiocg , & ioc -> active_iocgs , active_list ) {
1371
- if (!waitqueue_active (& iocg -> waitq ) &&
1372
- !atomic64_read ( & iocg -> abs_vdebt ) && ! iocg_is_idle (iocg ))
1377
+ if (!waitqueue_active (& iocg -> waitq ) && iocg -> abs_vdebt &&
1378
+ !iocg_is_idle (iocg ))
1373
1379
continue ;
1374
1380
1375
1381
spin_lock (& iocg -> waitq .lock );
1376
1382
1377
- if (waitqueue_active (& iocg -> waitq ) ||
1378
- atomic64_read (& iocg -> abs_vdebt )) {
1383
+ if (waitqueue_active (& iocg -> waitq ) || iocg -> abs_vdebt ) {
1379
1384
/* might be oversleeping vtime / hweight changes, kick */
1380
1385
iocg_kick_waitq (iocg , & now );
1381
1386
iocg_kick_delay (iocg , & now , 0 );
@@ -1718,28 +1723,49 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
1718
1723
* tests are racy but the races aren't systemic - we only miss once
1719
1724
* in a while which is fine.
1720
1725
*/
1721
- if (!waitqueue_active (& iocg -> waitq ) &&
1722
- !atomic64_read (& iocg -> abs_vdebt ) &&
1726
+ if (!waitqueue_active (& iocg -> waitq ) && !iocg -> abs_vdebt &&
1723
1727
time_before_eq64 (vtime + cost , now .vnow )) {
1724
1728
iocg_commit_bio (iocg , bio , cost );
1725
1729
return ;
1726
1730
}
1727
1731
1728
1732
/*
1729
- * We're over budget. If @bio has to be issued regardless,
1730
- * remember the abs_cost instead of advancing vtime.
1731
- * iocg_kick_waitq() will pay off the debt before waking more IOs.
1733
+ * We activated above but w/o any synchronization. Deactivation is
1734
+ * synchronized with waitq.lock and we won't get deactivated as long
1735
+ * as we're waiting or has debt, so we're good if we're activated
1736
+ * here. In the unlikely case that we aren't, just issue the IO.
1737
+ */
1738
+ spin_lock_irq (& iocg -> waitq .lock );
1739
+
1740
+ if (unlikely (list_empty (& iocg -> active_list ))) {
1741
+ spin_unlock_irq (& iocg -> waitq .lock );
1742
+ iocg_commit_bio (iocg , bio , cost );
1743
+ return ;
1744
+ }
1745
+
1746
+ /*
1747
+ * We're over budget. If @bio has to be issued regardless, remember
1748
+ * the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay
1749
+ * off the debt before waking more IOs.
1750
+ *
1732
1751
* This way, the debt is continuously paid off each period with the
1733
- * actual budget available to the cgroup. If we just wound vtime,
1734
- * we would incorrectly use the current hw_inuse for the entire
1735
- * amount which, for example, can lead to the cgroup staying
1736
- * blocked for a long time even with substantially raised hw_inuse.
1752
+ * actual budget available to the cgroup. If we just wound vtime, we
1753
+ * would incorrectly use the current hw_inuse for the entire amount
1754
+ * which, for example, can lead to the cgroup staying blocked for a
1755
+ * long time even with substantially raised hw_inuse.
1756
+ *
1757
+ * An iocg with vdebt should stay online so that the timer can keep
1758
+ * deducting its vdebt and [de]activate use_delay mechanism
1759
+ * accordingly. We don't want to race against the timer trying to
1760
+ * clear them and leave @iocg inactive w/ dangling use_delay heavily
1761
+ * penalizing the cgroup and its descendants.
1737
1762
*/
1738
1763
if (bio_issue_as_root_blkg (bio ) || fatal_signal_pending (current )) {
1739
- atomic64_add ( abs_cost , & iocg -> abs_vdebt ) ;
1764
+ iocg -> abs_vdebt += abs_cost ;
1740
1765
if (iocg_kick_delay (iocg , & now , cost ))
1741
1766
blkcg_schedule_throttle (rqos -> q ,
1742
1767
(bio -> bi_opf & REQ_SWAP ) == REQ_SWAP );
1768
+ spin_unlock_irq (& iocg -> waitq .lock );
1743
1769
return ;
1744
1770
}
1745
1771
@@ -1756,20 +1782,6 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
1756
1782
* All waiters are on iocg->waitq and the wait states are
1757
1783
* synchronized using waitq.lock.
1758
1784
*/
1759
- spin_lock_irq (& iocg -> waitq .lock );
1760
-
1761
- /*
1762
- * We activated above but w/o any synchronization. Deactivation is
1763
- * synchronized with waitq.lock and we won't get deactivated as
1764
- * long as we're waiting, so we're good if we're activated here.
1765
- * In the unlikely case that we are deactivated, just issue the IO.
1766
- */
1767
- if (unlikely (list_empty (& iocg -> active_list ))) {
1768
- spin_unlock_irq (& iocg -> waitq .lock );
1769
- iocg_commit_bio (iocg , bio , cost );
1770
- return ;
1771
- }
1772
-
1773
1785
init_waitqueue_func_entry (& wait .wait , iocg_wake_fn );
1774
1786
wait .wait .private = current ;
1775
1787
wait .bio = bio ;
@@ -1801,6 +1813,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
1801
1813
struct ioc_now now ;
1802
1814
u32 hw_inuse ;
1803
1815
u64 abs_cost , cost ;
1816
+ unsigned long flags ;
1804
1817
1805
1818
/* bypass if disabled or for root cgroup */
1806
1819
if (!ioc -> enabled || !iocg -> level )
@@ -1820,15 +1833,28 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
1820
1833
iocg -> cursor = bio_end ;
1821
1834
1822
1835
/*
1823
- * Charge if there's enough vtime budget and the existing request
1824
- * has cost assigned. Otherwise, account it as debt. See debt
1825
- * handling in ioc_rqos_throttle() for details.
1836
+ * Charge if there's enough vtime budget and the existing request has
1837
+ * cost assigned.
1826
1838
*/
1827
1839
if (rq -> bio && rq -> bio -> bi_iocost_cost &&
1828
- time_before_eq64 (atomic64_read (& iocg -> vtime ) + cost , now .vnow ))
1840
+ time_before_eq64 (atomic64_read (& iocg -> vtime ) + cost , now .vnow )) {
1829
1841
iocg_commit_bio (iocg , bio , cost );
1830
- else
1831
- atomic64_add (abs_cost , & iocg -> abs_vdebt );
1842
+ return ;
1843
+ }
1844
+
1845
+ /*
1846
+ * Otherwise, account it as debt if @iocg is online, which it should
1847
+ * be for the vast majority of cases. See debt handling in
1848
+ * ioc_rqos_throttle() for details.
1849
+ */
1850
+ spin_lock_irqsave (& iocg -> waitq .lock , flags );
1851
+ if (likely (!list_empty (& iocg -> active_list ))) {
1852
+ iocg -> abs_vdebt += abs_cost ;
1853
+ iocg_kick_delay (iocg , & now , cost );
1854
+ } else {
1855
+ iocg_commit_bio (iocg , bio , cost );
1856
+ }
1857
+ spin_unlock_irqrestore (& iocg -> waitq .lock , flags );
1832
1858
}
1833
1859
1834
1860
static void ioc_rqos_done_bio (struct rq_qos * rqos , struct bio * bio )
@@ -1998,7 +2024,6 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
1998
2024
iocg -> ioc = ioc ;
1999
2025
atomic64_set (& iocg -> vtime , now .vnow );
2000
2026
atomic64_set (& iocg -> done_vtime , now .vnow );
2001
- atomic64_set (& iocg -> abs_vdebt , 0 );
2002
2027
atomic64_set (& iocg -> active_period , atomic64_read (& ioc -> cur_period ));
2003
2028
INIT_LIST_HEAD (& iocg -> active_list );
2004
2029
iocg -> hweight_active = HWEIGHT_WHOLE ;
0 commit comments