@@ -1028,7 +1028,14 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
1028
1028
return ret ;
1029
1029
}
1030
1030
1031
- static long btrfs_scan_inode (struct btrfs_inode * inode , long * scanned , long nr_to_scan )
1031
+ struct btrfs_em_shrink_ctx {
1032
+ long nr_to_scan ;
1033
+ long scanned ;
1034
+ u64 last_ino ;
1035
+ u64 last_root ;
1036
+ };
1037
+
1038
+ static long btrfs_scan_inode (struct btrfs_inode * inode , struct btrfs_em_shrink_ctx * ctx )
1032
1039
{
1033
1040
const u64 cur_fs_gen = btrfs_get_fs_generation (inode -> root -> fs_info );
1034
1041
struct extent_map_tree * tree = & inode -> extent_tree ;
@@ -1075,7 +1082,7 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t
1075
1082
1076
1083
em = rb_entry (node , struct extent_map , rb_node );
1077
1084
node = rb_next (node );
1078
- ( * scanned ) ++ ;
1085
+ ctx -> scanned ++ ;
1079
1086
1080
1087
if (em -> flags & EXTENT_FLAG_PINNED )
1081
1088
goto next ;
@@ -1096,7 +1103,7 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t
1096
1103
free_extent_map (em );
1097
1104
nr_dropped ++ ;
1098
1105
next :
1099
- if (* scanned >= nr_to_scan )
1106
+ if (ctx -> scanned >= ctx -> nr_to_scan )
1100
1107
break ;
1101
1108
1102
1109
/*
@@ -1115,22 +1122,21 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, long *scanned, long nr_t
1115
1122
return nr_dropped ;
1116
1123
}
1117
1124
1118
- static long btrfs_scan_root (struct btrfs_root * root , long * scanned , long nr_to_scan )
1125
+ static long btrfs_scan_root (struct btrfs_root * root , struct btrfs_em_shrink_ctx * ctx )
1119
1126
{
1120
- struct btrfs_fs_info * fs_info = root -> fs_info ;
1121
1127
struct btrfs_inode * inode ;
1122
1128
long nr_dropped = 0 ;
1123
- u64 min_ino = fs_info -> extent_map_shrinker_last_ino + 1 ;
1129
+ u64 min_ino = ctx -> last_ino + 1 ;
1124
1130
1125
1131
inode = btrfs_find_first_inode (root , min_ino );
1126
1132
while (inode ) {
1127
- nr_dropped += btrfs_scan_inode (inode , scanned , nr_to_scan );
1133
+ nr_dropped += btrfs_scan_inode (inode , ctx );
1128
1134
1129
1135
min_ino = btrfs_ino (inode ) + 1 ;
1130
- fs_info -> extent_map_shrinker_last_ino = btrfs_ino (inode );
1136
+ ctx -> last_ino = btrfs_ino (inode );
1131
1137
btrfs_add_delayed_iput (inode );
1132
1138
1133
- if (* scanned >= nr_to_scan )
1139
+ if (ctx -> scanned >= ctx -> nr_to_scan )
1134
1140
break ;
1135
1141
1136
1142
/*
@@ -1151,38 +1157,56 @@ static long btrfs_scan_root(struct btrfs_root *root, long *scanned, long nr_to_s
1151
1157
* inode if there is one or we will find out this was the last
1152
1158
* one and move to the next root.
1153
1159
*/
1154
- fs_info -> extent_map_shrinker_last_root = btrfs_root_id (root );
1160
+ ctx -> last_root = btrfs_root_id (root );
1155
1161
} else {
1156
1162
/*
1157
1163
* No more inodes in this root, set extent_map_shrinker_last_ino to 0 so
1158
1164
* that when processing the next root we start from its first inode.
1159
1165
*/
1160
- fs_info -> extent_map_shrinker_last_ino = 0 ;
1161
- fs_info -> extent_map_shrinker_last_root = btrfs_root_id (root ) + 1 ;
1166
+ ctx -> last_ino = 0 ;
1167
+ ctx -> last_root = btrfs_root_id (root ) + 1 ;
1162
1168
}
1163
1169
1164
1170
return nr_dropped ;
1165
1171
}
1166
1172
1167
1173
long btrfs_free_extent_maps (struct btrfs_fs_info * fs_info , long nr_to_scan )
1168
1174
{
1169
- const u64 start_root_id = fs_info -> extent_map_shrinker_last_root ;
1170
- u64 next_root_id = start_root_id ;
1175
+ struct btrfs_em_shrink_ctx ctx ;
1176
+ u64 start_root_id ;
1177
+ u64 next_root_id ;
1171
1178
bool cycled = false;
1172
1179
long nr_dropped = 0 ;
1173
- long scanned = 0 ;
1180
+
1181
+ ctx .scanned = 0 ;
1182
+ ctx .nr_to_scan = nr_to_scan ;
1183
+
1184
+ /*
1185
+ * In case we have multiple tasks running this shrinker, make the next
1186
+ * one start from the next inode in case it starts before we finish.
1187
+ */
1188
+ spin_lock (& fs_info -> extent_map_shrinker_lock );
1189
+ ctx .last_ino = fs_info -> extent_map_shrinker_last_ino ;
1190
+ fs_info -> extent_map_shrinker_last_ino ++ ;
1191
+ ctx .last_root = fs_info -> extent_map_shrinker_last_root ;
1192
+ spin_unlock (& fs_info -> extent_map_shrinker_lock );
1193
+
1194
+ start_root_id = ctx .last_root ;
1195
+ next_root_id = ctx .last_root ;
1174
1196
1175
1197
if (trace_btrfs_extent_map_shrinker_scan_enter_enabled ()) {
1176
1198
s64 nr = percpu_counter_sum_positive (& fs_info -> evictable_extent_maps );
1177
1199
1178
- trace_btrfs_extent_map_shrinker_scan_enter (fs_info , nr_to_scan , nr );
1200
+ trace_btrfs_extent_map_shrinker_scan_enter (fs_info , nr_to_scan ,
1201
+ nr , ctx .last_root ,
1202
+ ctx .last_ino );
1179
1203
}
1180
1204
1181
1205
/*
1182
1206
* We may be called from memory allocation paths, so we don't want to
1183
1207
* take too much time and slowdown tasks, so stop if we need reschedule.
1184
1208
*/
1185
- while (scanned < nr_to_scan && !need_resched ()) {
1209
+ while (ctx . scanned < ctx . nr_to_scan && !need_resched ()) {
1186
1210
struct btrfs_root * root ;
1187
1211
unsigned long count ;
1188
1212
@@ -1194,8 +1218,8 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
1194
1218
spin_unlock (& fs_info -> fs_roots_radix_lock );
1195
1219
if (start_root_id > 0 && !cycled ) {
1196
1220
next_root_id = 0 ;
1197
- fs_info -> extent_map_shrinker_last_root = 0 ;
1198
- fs_info -> extent_map_shrinker_last_ino = 0 ;
1221
+ ctx . last_root = 0 ;
1222
+ ctx . last_ino = 0 ;
1199
1223
cycled = true;
1200
1224
continue ;
1201
1225
}
@@ -1209,15 +1233,33 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
1209
1233
continue ;
1210
1234
1211
1235
if (is_fstree (btrfs_root_id (root )))
1212
- nr_dropped += btrfs_scan_root (root , & scanned , nr_to_scan );
1236
+ nr_dropped += btrfs_scan_root (root , & ctx );
1213
1237
1214
1238
btrfs_put_root (root );
1215
1239
}
1216
1240
1241
+ /*
1242
+ * In case of multiple tasks running this extent map shrinking code this
1243
+ * isn't perfect but it's simple and silences things like KCSAN. It's
1244
+ * not possible to know which task made more progress because we can
1245
+ * cycle back to the first root and first inode if it's not the first
1246
+ * time the shrinker ran, see the above logic. Also a task that started
1247
+ * later may finish ealier than another task and made less progress. So
1248
+ * make this simple and update to the progress of the last task that
1249
+ * finished, with the occasional possiblity of having two consecutive
1250
+ * runs of the shrinker process the same inodes.
1251
+ */
1252
+ spin_lock (& fs_info -> extent_map_shrinker_lock );
1253
+ fs_info -> extent_map_shrinker_last_ino = ctx .last_ino ;
1254
+ fs_info -> extent_map_shrinker_last_root = ctx .last_root ;
1255
+ spin_unlock (& fs_info -> extent_map_shrinker_lock );
1256
+
1217
1257
if (trace_btrfs_extent_map_shrinker_scan_exit_enabled ()) {
1218
1258
s64 nr = percpu_counter_sum_positive (& fs_info -> evictable_extent_maps );
1219
1259
1220
- trace_btrfs_extent_map_shrinker_scan_exit (fs_info , nr_dropped , nr );
1260
+ trace_btrfs_extent_map_shrinker_scan_exit (fs_info , nr_dropped ,
1261
+ nr , ctx .last_root ,
1262
+ ctx .last_ino );
1221
1263
}
1222
1264
1223
1265
return nr_dropped ;
0 commit comments