@@ -443,7 +443,7 @@ xfs_inodegc_queue_all(
443
443
int cpu ;
444
444
bool ret = false;
445
445
446
- for_each_online_cpu (cpu ) {
446
+ for_each_cpu (cpu , & mp -> m_inodegc_cpumask ) {
447
447
gc = per_cpu_ptr (mp -> m_inodegc , cpu );
448
448
if (!llist_empty (& gc -> list )) {
449
449
mod_delayed_work_on (cpu , mp -> m_inodegc_wq , & gc -> work , 0 );
@@ -463,7 +463,7 @@ xfs_inodegc_wait_all(
463
463
int error = 0 ;
464
464
465
465
flush_workqueue (mp -> m_inodegc_wq );
466
- for_each_online_cpu (cpu ) {
466
+ for_each_cpu (cpu , & mp -> m_inodegc_cpumask ) {
467
467
struct xfs_inodegc * gc ;
468
468
469
469
gc = per_cpu_ptr (mp -> m_inodegc , cpu );
@@ -1845,9 +1845,17 @@ xfs_inodegc_worker(
1845
1845
struct xfs_inodegc , work );
1846
1846
struct llist_node * node = llist_del_all (& gc -> list );
1847
1847
struct xfs_inode * ip , * n ;
1848
+ struct xfs_mount * mp = gc -> mp ;
1848
1849
unsigned int nofs_flag ;
1849
1850
1850
- ASSERT (gc -> cpu == smp_processor_id ());
1851
+ /*
1852
+ * Clear the cpu mask bit and ensure that we have seen the latest
1853
+ * update of the gc structure associated with this CPU. This matches
1854
+ * with the release semantics used when setting the cpumask bit in
1855
+ * xfs_inodegc_queue.
1856
+ */
1857
+ cpumask_clear_cpu (gc -> cpu , & mp -> m_inodegc_cpumask );
1858
+ smp_mb__after_atomic ();
1851
1859
1852
1860
WRITE_ONCE (gc -> items , 0 );
1853
1861
@@ -1862,7 +1870,7 @@ xfs_inodegc_worker(
1862
1870
nofs_flag = memalloc_nofs_save ();
1863
1871
1864
1872
ip = llist_entry (node , struct xfs_inode , i_gclist );
1865
- trace_xfs_inodegc_worker (ip -> i_mount , READ_ONCE (gc -> shrinker_hits ));
1873
+ trace_xfs_inodegc_worker (mp , READ_ONCE (gc -> shrinker_hits ));
1866
1874
1867
1875
WRITE_ONCE (gc -> shrinker_hits , 0 );
1868
1876
llist_for_each_entry_safe (ip , n , node , i_gclist ) {
@@ -2057,25 +2065,36 @@ xfs_inodegc_queue(
2057
2065
struct xfs_inodegc * gc ;
2058
2066
int items ;
2059
2067
unsigned int shrinker_hits ;
2068
+ unsigned int cpu_nr ;
2060
2069
unsigned long queue_delay = 1 ;
2061
2070
2062
2071
trace_xfs_inode_set_need_inactive (ip );
2063
2072
spin_lock (& ip -> i_flags_lock );
2064
2073
ip -> i_flags |= XFS_NEED_INACTIVE ;
2065
2074
spin_unlock (& ip -> i_flags_lock );
2066
2075
2067
- gc = get_cpu_ptr (mp -> m_inodegc );
2076
+ cpu_nr = get_cpu ();
2077
+ gc = this_cpu_ptr (mp -> m_inodegc );
2068
2078
llist_add (& ip -> i_gclist , & gc -> list );
2069
2079
items = READ_ONCE (gc -> items );
2070
2080
WRITE_ONCE (gc -> items , items + 1 );
2071
2081
shrinker_hits = READ_ONCE (gc -> shrinker_hits );
2072
2082
2083
+ /*
2084
+ * Ensure the list add is always seen by anyone who finds the cpumask
2085
+ * bit set. This effectively gives the cpumask bit set operation
2086
+ * release ordering semantics.
2087
+ */
2088
+ smp_mb__before_atomic ();
2089
+ if (!cpumask_test_cpu (cpu_nr , & mp -> m_inodegc_cpumask ))
2090
+ cpumask_test_and_set_cpu (cpu_nr , & mp -> m_inodegc_cpumask );
2091
+
2073
2092
/*
2074
2093
* We queue the work while holding the current CPU so that the work
2075
2094
* is scheduled to run on this CPU.
2076
2095
*/
2077
2096
if (!xfs_is_inodegc_enabled (mp )) {
2078
- put_cpu_ptr ( gc );
2097
+ put_cpu ( );
2079
2098
return ;
2080
2099
}
2081
2100
@@ -2085,55 +2104,14 @@ xfs_inodegc_queue(
2085
2104
trace_xfs_inodegc_queue (mp , __return_address );
2086
2105
mod_delayed_work_on (current_cpu (), mp -> m_inodegc_wq , & gc -> work ,
2087
2106
queue_delay );
2088
- put_cpu_ptr ( gc );
2107
+ put_cpu ( );
2089
2108
2090
2109
if (xfs_inodegc_want_flush_work (ip , items , shrinker_hits )) {
2091
2110
trace_xfs_inodegc_throttle (mp , __return_address );
2092
2111
flush_delayed_work (& gc -> work );
2093
2112
}
2094
2113
}
2095
2114
2096
- /*
2097
- * Fold the dead CPU inodegc queue into the current CPUs queue.
2098
- */
2099
- void
2100
- xfs_inodegc_cpu_dead (
2101
- struct xfs_mount * mp ,
2102
- unsigned int dead_cpu )
2103
- {
2104
- struct xfs_inodegc * dead_gc , * gc ;
2105
- struct llist_node * first , * last ;
2106
- unsigned int count = 0 ;
2107
-
2108
- dead_gc = per_cpu_ptr (mp -> m_inodegc , dead_cpu );
2109
- cancel_delayed_work_sync (& dead_gc -> work );
2110
-
2111
- if (llist_empty (& dead_gc -> list ))
2112
- return ;
2113
-
2114
- first = dead_gc -> list .first ;
2115
- last = first ;
2116
- while (last -> next ) {
2117
- last = last -> next ;
2118
- count ++ ;
2119
- }
2120
- dead_gc -> list .first = NULL ;
2121
- dead_gc -> items = 0 ;
2122
-
2123
- /* Add pending work to current CPU */
2124
- gc = get_cpu_ptr (mp -> m_inodegc );
2125
- llist_add_batch (first , last , & gc -> list );
2126
- count += READ_ONCE (gc -> items );
2127
- WRITE_ONCE (gc -> items , count );
2128
-
2129
- if (xfs_is_inodegc_enabled (mp )) {
2130
- trace_xfs_inodegc_queue (mp , __return_address );
2131
- mod_delayed_work_on (current_cpu (), mp -> m_inodegc_wq , & gc -> work ,
2132
- 0 );
2133
- }
2134
- put_cpu_ptr (gc );
2135
- }
2136
-
2137
2115
/*
2138
2116
* We set the inode flag atomically with the radix tree tag. Once we get tag
2139
2117
* lookups on the radix tree, this inode flag can go away.
@@ -2195,7 +2173,7 @@ xfs_inodegc_shrinker_count(
2195
2173
if (!xfs_is_inodegc_enabled (mp ))
2196
2174
return 0 ;
2197
2175
2198
- for_each_online_cpu (cpu ) {
2176
+ for_each_cpu (cpu , & mp -> m_inodegc_cpumask ) {
2199
2177
gc = per_cpu_ptr (mp -> m_inodegc , cpu );
2200
2178
if (!llist_empty (& gc -> list ))
2201
2179
return XFS_INODEGC_SHRINKER_COUNT ;
@@ -2220,7 +2198,7 @@ xfs_inodegc_shrinker_scan(
2220
2198
2221
2199
trace_xfs_inodegc_shrinker_scan (mp , sc , __return_address );
2222
2200
2223
- for_each_online_cpu (cpu ) {
2201
+ for_each_cpu (cpu , & mp -> m_inodegc_cpumask ) {
2224
2202
gc = per_cpu_ptr (mp -> m_inodegc , cpu );
2225
2203
if (!llist_empty (& gc -> list )) {
2226
2204
unsigned int h = READ_ONCE (gc -> shrinker_hits );
0 commit comments