@@ -1193,7 +1193,8 @@ dbuf_verify(dmu_buf_impl_t *db)
1193
1193
if ((db -> db_blkptr == NULL || BP_IS_HOLE (db -> db_blkptr )) &&
1194
1194
(db -> db_buf == NULL || db -> db_buf -> b_data ) &&
1195
1195
db -> db .db_data && db -> db_blkid != DMU_BONUS_BLKID &&
1196
- db -> db_state != DB_FILL && (dn == NULL || !dn -> dn_free_txg )) {
1196
+ db -> db_state != DB_FILL && (dn == NULL || !dn -> dn_free_txg ) &&
1197
+ RW_LOCK_HELD (& db -> db_rwlock )) {
1197
1198
/*
1198
1199
* If the blkptr isn't set but they have nonzero data,
1199
1200
* it had better be dirty, otherwise we'll lose that
@@ -1697,7 +1698,9 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
1697
1698
int bonuslen = DN_SLOTS_TO_BONUSLEN (dn -> dn_num_slots );
1698
1699
dr -> dt .dl .dr_data = kmem_alloc (bonuslen , KM_SLEEP );
1699
1700
arc_space_consume (bonuslen , ARC_SPACE_BONUS );
1701
+ rw_enter (& db -> db_rwlock , RW_READER );
1700
1702
memcpy (dr -> dt .dl .dr_data , db -> db .db_data , bonuslen );
1703
+ rw_exit (& db -> db_rwlock );
1701
1704
} else if (zfs_refcount_count (& db -> db_holds ) > db -> db_dirtycnt ) {
1702
1705
dnode_t * dn = DB_DNODE (db );
1703
1706
int size = arc_buf_size (db -> db_buf );
@@ -1727,7 +1730,9 @@ dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
1727
1730
} else {
1728
1731
dr -> dt .dl .dr_data = arc_alloc_buf (spa , db , type , size );
1729
1732
}
1733
+ rw_enter (& db -> db_rwlock , RW_READER );
1730
1734
memcpy (dr -> dt .dl .dr_data -> b_data , db -> db .db_data , size );
1735
+ rw_exit (& db -> db_rwlock );
1731
1736
} else {
1732
1737
db -> db_buf = NULL ;
1733
1738
dbuf_clear_data (db );
@@ -2999,7 +3004,9 @@ dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx, boolean_t failed)
2999
3004
ASSERT (db -> db_blkid != DMU_BONUS_BLKID );
3000
3005
/* we were freed while filling */
3001
3006
/* XXX dbuf_undirty? */
3007
+ rw_enter (& db -> db_rwlock , RW_WRITER );
3002
3008
memset (db -> db .db_data , 0 , db -> db .db_size );
3009
+ rw_exit (& db -> db_rwlock );
3003
3010
db -> db_freed_in_flight = FALSE;
3004
3011
db -> db_state = DB_CACHED ;
3005
3012
DTRACE_SET_STATE (db ,
@@ -3374,8 +3381,10 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
3374
3381
* parentp = NULL ;
3375
3382
return (err );
3376
3383
}
3384
+ mutex_enter (& (* parentp )-> db_mtx );
3377
3385
* bpp = ((blkptr_t * )(* parentp )-> db .db_data ) +
3378
3386
(blkid & ((1ULL << epbs ) - 1 ));
3387
+ mutex_exit (& (* parentp )-> db_mtx );
3379
3388
return (0 );
3380
3389
} else {
3381
3390
/* the block is referenced from the dnode */
@@ -4560,10 +4569,12 @@ dbuf_lightweight_bp(dbuf_dirty_record_t *dr)
4560
4569
return (& dn -> dn_phys -> dn_blkptr [dr -> dt .dll .dr_blkid ]);
4561
4570
} else {
4562
4571
dmu_buf_impl_t * parent_db = dr -> dr_parent -> dr_dbuf ;
4572
+ ASSERT (MUTEX_HELD (& parent_db -> db_mtx ));
4563
4573
int epbs = dn -> dn_indblkshift - SPA_BLKPTRSHIFT ;
4564
4574
VERIFY3U (parent_db -> db_level , = = , 1 );
4565
4575
VERIFY3P (DB_DNODE (parent_db ), = = , dn );
4566
4576
VERIFY3U (dr -> dt .dll .dr_blkid >> epbs , = = , parent_db -> db_blkid );
4577
+ ASSERT (RW_LOCK_HELD (& parent_db -> db_rwlock ));
4567
4578
blkptr_t * bp = parent_db -> db .db_data ;
4568
4579
return (& bp [dr -> dt .dll .dr_blkid & ((1 << epbs ) - 1 )]);
4569
4580
}
@@ -4574,12 +4585,22 @@ dbuf_lightweight_ready(zio_t *zio)
4574
4585
{
4575
4586
dbuf_dirty_record_t * dr = zio -> io_private ;
4576
4587
blkptr_t * bp = zio -> io_bp ;
4588
+ dmu_buf_impl_t * parent_db = NULL ;
4577
4589
4578
4590
if (zio -> io_error != 0 )
4579
4591
return ;
4580
4592
4581
4593
dnode_t * dn = dr -> dr_dnode ;
4582
4594
4595
+ EQUIV (dr -> dr_parent == NULL , dn -> dn_phys -> dn_nlevels == 1 );
4596
+ if (dr -> dr_parent == NULL ) {
4597
+ parent_db = dn -> dn_dbuf ;
4598
+ } else {
4599
+ parent_db = dr -> dr_parent -> dr_dbuf ;
4600
+ }
4601
+ mutex_enter (& parent_db -> db_mtx );
4602
+
4603
+ rw_enter (& parent_db -> db_rwlock , RW_READER );
4583
4604
blkptr_t * bp_orig = dbuf_lightweight_bp (dr );
4584
4605
spa_t * spa = dmu_objset_spa (dn -> dn_objset );
4585
4606
int64_t delta = bp_get_dsize_sync (spa , bp ) -
@@ -4599,16 +4620,13 @@ dbuf_lightweight_ready(zio_t *zio)
4599
4620
BP_SET_FILL (bp , fill );
4600
4621
}
4601
4622
4602
- dmu_buf_impl_t * parent_db ;
4603
- EQUIV (dr -> dr_parent == NULL , dn -> dn_phys -> dn_nlevels == 1 );
4604
- if (dr -> dr_parent == NULL ) {
4605
- parent_db = dn -> dn_dbuf ;
4606
- } else {
4607
- parent_db = dr -> dr_parent -> dr_dbuf ;
4623
+ if (!rw_tryupgrade (& parent_db -> db_rwlock )) {
4624
+ rw_exit (& parent_db -> db_rwlock );
4625
+ rw_enter (& parent_db -> db_rwlock , RW_WRITER );
4608
4626
}
4609
- rw_enter (& parent_db -> db_rwlock , RW_WRITER );
4610
4627
* bp_orig = * bp ;
4611
4628
rw_exit (& parent_db -> db_rwlock );
4629
+ mutex_exit (& parent_db -> db_mtx );
4612
4630
}
4613
4631
4614
4632
static void
@@ -4640,6 +4658,7 @@ noinline static void
4640
4658
dbuf_sync_lightweight (dbuf_dirty_record_t * dr , dmu_tx_t * tx )
4641
4659
{
4642
4660
dnode_t * dn = dr -> dr_dnode ;
4661
+ dmu_buf_impl_t * parent_db = NULL ;
4643
4662
zio_t * pio ;
4644
4663
if (dn -> dn_phys -> dn_nlevels == 1 ) {
4645
4664
pio = dn -> dn_zio ;
@@ -4658,6 +4677,11 @@ dbuf_sync_lightweight(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
4658
4677
* See comment in dbuf_write(). This is so that zio->io_bp_orig
4659
4678
* will have the old BP in dbuf_lightweight_done().
4660
4679
*/
4680
+ if (dr -> dr_dnode -> dn_phys -> dn_nlevels != 1 ) {
4681
+ parent_db = dr -> dr_parent -> dr_dbuf ;
4682
+ mutex_enter (& parent_db -> db_mtx );
4683
+ rw_enter (& parent_db -> db_rwlock , RW_READER );
4684
+ }
4661
4685
dr -> dr_bp_copy = * dbuf_lightweight_bp (dr );
4662
4686
4663
4687
dr -> dr_zio = zio_write (pio , dmu_objset_spa (dn -> dn_objset ),
@@ -4667,6 +4691,11 @@ dbuf_sync_lightweight(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
4667
4691
dbuf_lightweight_done , dr , ZIO_PRIORITY_ASYNC_WRITE ,
4668
4692
ZIO_FLAG_MUSTSUCCEED | dr -> dt .dll .dr_flags , & zb );
4669
4693
4694
+ if (parent_db ) {
4695
+ rw_exit (& parent_db -> db_rwlock );
4696
+ mutex_exit (& parent_db -> db_mtx );
4697
+ }
4698
+
4670
4699
zio_nowait (dr -> dr_zio );
4671
4700
}
4672
4701
@@ -4823,7 +4852,9 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
4823
4852
} else {
4824
4853
* datap = arc_alloc_buf (os -> os_spa , db , type , psize );
4825
4854
}
4855
+ rw_enter (& db -> db_rwlock , RW_READER );
4826
4856
memcpy ((* datap )-> b_data , db -> db .db_data , psize );
4857
+ rw_exit (& db -> db_rwlock );
4827
4858
}
4828
4859
db -> db_data_pending = dr ;
4829
4860
@@ -4929,6 +4960,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
4929
4960
4930
4961
if (dn -> dn_type == DMU_OT_DNODE ) {
4931
4962
i = 0 ;
4963
+ rw_enter (& db -> db_rwlock , RW_READER );
4932
4964
while (i < db -> db .db_size ) {
4933
4965
dnode_phys_t * dnp =
4934
4966
(void * )(((char * )db -> db .db_data ) + i );
@@ -4954,6 +4986,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
4954
4986
DNODE_MIN_SIZE ;
4955
4987
}
4956
4988
}
4989
+ rw_exit (& db -> db_rwlock );
4957
4990
} else {
4958
4991
if (BP_IS_HOLE (bp )) {
4959
4992
fill = 0 ;
@@ -4962,6 +4995,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
4962
4995
}
4963
4996
}
4964
4997
} else {
4998
+ rw_enter (& db -> db_rwlock , RW_READER );
4965
4999
blkptr_t * ibp = db -> db .db_data ;
4966
5000
ASSERT3U (db -> db .db_size , = = , 1 <<dn -> dn_phys -> dn_indblkshift );
4967
5001
for (i = db -> db .db_size >> SPA_BLKPTRSHIFT ; i > 0 ; i -- , ibp ++ ) {
@@ -4971,6 +5005,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
4971
5005
BLK_CONFIG_SKIP , BLK_VERIFY_HALT );
4972
5006
fill += BP_GET_FILL (ibp );
4973
5007
}
5008
+ rw_exit (& db -> db_rwlock );
4974
5009
}
4975
5010
DB_DNODE_EXIT (db );
4976
5011
@@ -5005,6 +5040,8 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
5005
5040
DB_DNODE_EXIT (db );
5006
5041
ASSERT3U (epbs , < , 31 );
5007
5042
5043
+ mutex_enter (& db -> db_mtx );
5044
+ rw_enter (& db -> db_rwlock , RW_READER );
5008
5045
/* Determine if all our children are holes */
5009
5046
for (i = 0 , bp = db -> db .db_data ; i < 1ULL << epbs ; i ++ , bp ++ ) {
5010
5047
if (!BP_IS_HOLE (bp ))
@@ -5021,10 +5058,14 @@ dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
5021
5058
* anybody from reading the blocks we're about to
5022
5059
* zero out.
5023
5060
*/
5024
- rw_enter (& db -> db_rwlock , RW_WRITER );
5061
+ if (!rw_tryupgrade (& db -> db_rwlock )) {
5062
+ rw_exit (& db -> db_rwlock );
5063
+ rw_enter (& db -> db_rwlock , RW_WRITER );
5064
+ }
5025
5065
memset (db -> db .db_data , 0 , db -> db .db_size );
5026
- rw_exit (& db -> db_rwlock );
5027
5066
}
5067
+ rw_exit (& db -> db_rwlock );
5068
+ mutex_exit (& db -> db_mtx );
5028
5069
}
5029
5070
5030
5071
static void
@@ -5220,11 +5261,11 @@ dbuf_remap_impl(dnode_t *dn, blkptr_t *bp, krwlock_t *rw, dmu_tx_t *tx)
5220
5261
* avoid lock contention, only grab it when we are actually
5221
5262
* changing the BP.
5222
5263
*/
5223
- if (rw != NULL )
5264
+ if (rw != NULL && !rw_tryupgrade (rw )) {
5265
+ rw_exit (rw );
5224
5266
rw_enter (rw , RW_WRITER );
5267
+ }
5225
5268
* bp = bp_copy ;
5226
- if (rw != NULL )
5227
- rw_exit (rw );
5228
5269
}
5229
5270
}
5230
5271
@@ -5240,6 +5281,8 @@ dbuf_remap(dnode_t *dn, dmu_buf_impl_t *db, dmu_tx_t *tx)
5240
5281
if (!spa_feature_is_active (spa , SPA_FEATURE_DEVICE_REMOVAL ))
5241
5282
return ;
5242
5283
5284
+ mutex_enter (& db -> db_mtx );
5285
+ rw_enter (& db -> db_rwlock , RW_READER );
5243
5286
if (db -> db_level > 0 ) {
5244
5287
blkptr_t * bp = db -> db .db_data ;
5245
5288
for (int i = 0 ; i < db -> db .db_size >> SPA_BLKPTRSHIFT ; i ++ ) {
@@ -5258,6 +5301,8 @@ dbuf_remap(dnode_t *dn, dmu_buf_impl_t *db, dmu_tx_t *tx)
5258
5301
}
5259
5302
}
5260
5303
}
5304
+ rw_exit (& db -> db_rwlock );
5305
+ mutex_exit (& db -> db_mtx );
5261
5306
}
5262
5307
5263
5308
0 commit comments