@@ -6493,6 +6493,9 @@ void btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, u64 start, u6
64936493 unpin_extent_range (fs_info , start , end , false);
64946494}
64956495
6496+ /* Max length to trim in a single iteration to avoid holding mutex too long. */
6497+ #define BTRFS_MAX_TRIM_LENGTH SZ_2G
6498+
64966499/*
64976500 * It used to be that old block groups would be left around forever.
64986501 * Iterating over them would be enough to trim unused space. Since we
@@ -6513,10 +6516,12 @@ void btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, u64 start, u6
65136516 * it while performing the free space search since we have already
65146517 * held back allocations.
65156518 */
6516- static int btrfs_trim_free_extents (struct btrfs_device * device , u64 * trimmed )
6519+ static int btrfs_trim_free_extents_throttle (struct btrfs_device * device ,
6520+ u64 * trimmed , u64 pos , u64 * ret_next_pos )
65176521{
6518- u64 start = BTRFS_DEVICE_RANGE_RESERVED , len = 0 , end = 0 ;
65196522 int ret ;
6523+ u64 start = pos ;
6524+ u64 trim_len = 0 ;
65206525
65216526 * trimmed = 0 ;
65226527
@@ -6536,15 +6541,20 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
65366541
65376542 while (1 ) {
65386543 struct btrfs_fs_info * fs_info = device -> fs_info ;
6544+ u64 cur_start ;
6545+ u64 end ;
6546+ u64 len ;
65396547 u64 bytes ;
65406548
65416549 ret = mutex_lock_interruptible (& fs_info -> chunk_mutex );
65426550 if (ret )
65436551 break ;
65446552
6553+ cur_start = start ;
65456554 btrfs_find_first_clear_extent_bit (& device -> alloc_state , start ,
65466555 & start , & end ,
65476556 CHUNK_TRIMMED | CHUNK_ALLOCATED );
6557+ start = max (start , cur_start );
65486558
65496559 /* Check if there are any CHUNK_* bits left */
65506560 if (start > device -> total_bytes ) {
@@ -6570,6 +6580,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
65706580 end = min (end , device -> total_bytes - 1 );
65716581
65726582 len = end - start + 1 ;
6583+ len = min (len , BTRFS_MAX_TRIM_LENGTH );
65736584
65746585 /* We didn't find any extents */
65756586 if (!len ) {
@@ -6590,6 +6601,12 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
65906601
65916602 start += len ;
65926603 * trimmed += bytes ;
6604+ trim_len += len ;
6605+ if (trim_len >= BTRFS_MAX_TRIM_LENGTH ) {
6606+ * ret_next_pos = start ;
6607+ ret = - EAGAIN ;
6608+ break ;
6609+ }
65936610
65946611 if (btrfs_trim_interrupted ()) {
65956612 ret = - ERESTARTSYS ;
@@ -6602,6 +6619,122 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
66026619 return ret ;
66036620}
66046621
6622+ static int btrfs_trim_free_extents (struct btrfs_fs_info * fs_info , u64 * trimmed ,
6623+ u64 * dev_failed , int * dev_ret )
6624+ {
6625+ struct btrfs_device * dev ;
6626+ struct btrfs_device * working_dev = NULL ;
6627+ struct btrfs_fs_devices * fs_devices = fs_info -> fs_devices ;
6628+ u8 uuid [BTRFS_UUID_SIZE ];
6629+ u64 start = BTRFS_DEVICE_RANGE_RESERVED ;
6630+
6631+ * trimmed = 0 ;
6632+ * dev_failed = 0 ;
6633+ * dev_ret = 0 ;
6634+
6635+ /* Find the device with the smallest UUID to start. */
6636+ mutex_lock (& fs_devices -> device_list_mutex );
6637+ list_for_each_entry (dev , & fs_devices -> devices , dev_list ) {
6638+ if (test_bit (BTRFS_DEV_STATE_MISSING , & dev -> dev_state ))
6639+ continue ;
6640+ if (!working_dev ||
6641+ memcmp (dev -> uuid , working_dev -> uuid , BTRFS_UUID_SIZE ) < 0 )
6642+ working_dev = dev ;
6643+ }
6644+ if (working_dev )
6645+ memcpy (uuid , working_dev -> uuid , BTRFS_UUID_SIZE );
6646+ mutex_unlock (& fs_devices -> device_list_mutex );
6647+
6648+ if (!working_dev )
6649+ return 0 ;
6650+
6651+ while (1 ) {
6652+ u64 group_trimmed = 0 ;
6653+ u64 next_pos = 0 ;
6654+ int ret = 0 ;
6655+
6656+ mutex_lock (& fs_devices -> device_list_mutex );
6657+
6658+ /* Find and trim the current device. */
6659+ list_for_each_entry (dev , & fs_devices -> devices , dev_list ) {
6660+ if (test_bit (BTRFS_DEV_STATE_MISSING , & dev -> dev_state ))
6661+ continue ;
6662+ if (dev == working_dev ) {
6663+ ret = btrfs_trim_free_extents_throttle (working_dev ,
6664+ & group_trimmed , start , & next_pos );
6665+ break ;
6666+ }
6667+ }
6668+
6669+ /* Throttle: continue same device from new position. */
6670+ if (ret == - EAGAIN && next_pos > start ) {
6671+ mutex_unlock (& fs_devices -> device_list_mutex );
6672+ * trimmed += group_trimmed ;
6673+ start = next_pos ;
6674+ cond_resched ();
6675+ continue ;
6676+ }
6677+
6678+ /* User interrupted. */
6679+ if (ret == - ERESTARTSYS || ret == - EINTR ) {
6680+ mutex_unlock (& fs_devices -> device_list_mutex );
6681+ * trimmed += group_trimmed ;
6682+ return ret ;
6683+ }
6684+
6685+ /*
6686+ * Device completed (ret == 0), failed, or EAGAIN with no progress.
6687+ * Record error if any, then move to next device.
6688+ */
6689+ if (ret == - EAGAIN ) {
6690+ /* No progress - log and skip device. */
6691+ btrfs_warn (fs_info ,
6692+ "trim throttle: no progress, offset=%llu device %s, skipping" ,
6693+ start , btrfs_dev_name (working_dev ));
6694+ (* dev_failed )++ ;
6695+ if (!* dev_ret )
6696+ * dev_ret = ret ;
6697+ } else if (ret ) {
6698+ /* Device failed with error. */
6699+ (* dev_failed )++ ;
6700+ if (!* dev_ret )
6701+ * dev_ret = ret ;
6702+ }
6703+
6704+ /*
6705+ * Find next device: smallest UUID larger than current.
6706+ * Devices added during trim with smaller UUID will be skipped.
6707+ */
6708+ working_dev = NULL ;
6709+ list_for_each_entry (dev , & fs_devices -> devices , dev_list ) {
6710+ if (test_bit (BTRFS_DEV_STATE_MISSING , & dev -> dev_state ))
6711+ continue ;
6712+ /* Must larger than current uuid. */
6713+ if (memcmp (dev -> uuid , uuid , BTRFS_UUID_SIZE ) <= 0 )
6714+ continue ;
6715+ /* Find the smallest. */
6716+ if (!working_dev ||
6717+ memcmp (dev -> uuid , working_dev -> uuid , BTRFS_UUID_SIZE ) < 0 )
6718+ working_dev = dev ;
6719+ }
6720+ if (working_dev )
6721+ memcpy (uuid , working_dev -> uuid , BTRFS_UUID_SIZE );
6722+
6723+ mutex_unlock (& fs_devices -> device_list_mutex );
6724+
6725+ * trimmed += group_trimmed ;
6726+ start = BTRFS_DEVICE_RANGE_RESERVED ;
6727+
6728+ /* No more devices. */
6729+ if (!working_dev )
6730+ break ;
6731+
6732+ cond_resched ();
6733+ }
6734+
6735+ return 0 ;
6736+ }
6737+
66056738/*
66066739 * Trim the whole filesystem by:
66076740 * 1) trimming the free space in each block group
@@ -6613,9 +6746,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
66136746 */
66146747int btrfs_trim_fs (struct btrfs_fs_info * fs_info , struct fstrim_range * range )
66156748{
6616- struct btrfs_fs_devices * fs_devices = fs_info -> fs_devices ;
66176749 struct btrfs_block_group * cache = NULL ;
6618- struct btrfs_device * device ;
66196750 u64 group_trimmed ;
66206751 u64 range_end = U64_MAX ;
66216752 u64 start ;
@@ -6686,24 +6817,8 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
66866817 if (ret == - ERESTARTSYS || ret == - EINTR )
66876818 return ret ;
66886819
6689- mutex_lock (& fs_devices -> device_list_mutex );
6690- list_for_each_entry (device , & fs_devices -> devices , dev_list ) {
6691- if (test_bit (BTRFS_DEV_STATE_MISSING , & device -> dev_state ))
6692- continue ;
6693-
6694- ret = btrfs_trim_free_extents (device , & group_trimmed );
6695-
6696- trimmed += group_trimmed ;
6697- if (ret == - ERESTARTSYS || ret == - EINTR )
6698- break ;
6699- if (ret ) {
6700- dev_failed ++ ;
6701- if (!dev_ret )
6702- dev_ret = ret ;
6703- continue ;
6704- }
6705- }
6706- mutex_unlock (& fs_devices -> device_list_mutex );
6820+ ret = btrfs_trim_free_extents (fs_info , & group_trimmed , & dev_failed , & dev_ret );
6821+ trimmed += group_trimmed ;
67076822
67086823 if (dev_failed )
67096824 btrfs_warn (fs_info ,
0 commit comments