@@ -992,9 +992,14 @@ func open(dir string, l *slog.Logger, r prometheus.Registerer, opts *Options, rn
992992 db .metrics .maxBytes .Set (float64 (maxBytes ))
993993 db .metrics .retentionDuration .Set ((time .Duration (opts .RetentionDuration ) * time .Millisecond ).Seconds ())
994994
995+ // Calling db.reload() calls db.reloadBlocks() which requires cmtx to be locked.
996+ db .cmtx .Lock ()
995997 if err := db .reload (); err != nil {
998+ db .cmtx .Unlock ()
996999 return nil , err
9971000 }
1001+ db .cmtx .Unlock ()
1002+
9981003 // Set the min valid time for the ingested samples
9991004 // to be no lower than the maxt of the last block.
10001005 minValidTime := int64 (math .MinInt64 )
@@ -1363,6 +1368,7 @@ func (db *DB) CompactOOOHead(ctx context.Context) error {
13631368// Callback for testing.
13641369var compactOOOHeadTestingCallback func ()
13651370
1371+ // The db.cmtx mutex should be held before calling this method.
13661372func (db * DB ) compactOOOHead (ctx context.Context ) error {
13671373 if ! db .oooWasEnabled .Load () {
13681374 return nil
@@ -1417,6 +1423,7 @@ func (db *DB) compactOOOHead(ctx context.Context) error {
14171423
14181424// compactOOO creates a new block per possible block range in the compactor's directory from the OOO Head given.
14191425// Each ULID in the result corresponds to a block in a unique time range.
1426+ // The db.cmtx mutex should be held before calling this method.
14201427func (db * DB ) compactOOO (dest string , oooHead * OOOCompactionHead ) (_ []ulid.ULID , err error ) {
14211428 start := time .Now ()
14221429
@@ -1461,7 +1468,7 @@ func (db *DB) compactOOO(dest string, oooHead *OOOCompactionHead) (_ []ulid.ULID
14611468}
14621469
14631470// compactHead compacts the given RangeHead.
1464- // The compaction mutex should be held before calling this method.
1471+ // The db.cmtx should be held before calling this method.
14651472func (db * DB ) compactHead (head * RangeHead ) error {
14661473 uids , err := db .compactor .Write (db .dir , head , head .MinTime (), head .BlockMaxTime (), nil )
14671474 if err != nil {
@@ -1487,7 +1494,7 @@ func (db *DB) compactHead(head *RangeHead) error {
14871494}
14881495
14891496// compactBlocks compacts all the eligible on-disk blocks.
1490- // The compaction mutex should be held before calling this method.
1497+ // The db.cmtx should be held before calling this method.
14911498func (db * DB ) compactBlocks () (err error ) {
14921499 // Check for compactions of multiple blocks.
14931500 for {
@@ -1544,6 +1551,7 @@ func getBlock(allBlocks []*Block, id ulid.ULID) (*Block, bool) {
15441551}
15451552
15461553// reload reloads blocks and truncates the head and its WAL.
1554+ // The db.cmtx mutex should be held before calling this method.
15471555func (db * DB ) reload () error {
15481556 if err := db .reloadBlocks (); err != nil {
15491557 return fmt .Errorf ("reloadBlocks: %w" , err )
@@ -1560,6 +1568,7 @@ func (db *DB) reload() error {
15601568
15611569// reloadBlocks reloads blocks without touching head.
15621570// Blocks that are obsolete due to replacement or retention will be deleted.
1571+ // The db.cmtx mutex should be held before calling this method.
15631572func (db * DB ) reloadBlocks () (err error ) {
15641573 defer func () {
15651574 if err != nil {
@@ -1568,13 +1577,9 @@ func (db *DB) reloadBlocks() (err error) {
15681577 db .metrics .reloads .Inc ()
15691578 }()
15701579
1571- // Now that we reload TSDB every minute, there is a high chance for a race condition with a reload
1572- // triggered by CleanTombstones(). We need to lock the reload to avoid the situation where
1573- // a normal reload and CleanTombstones try to delete the same block.
1574- db .mtx .Lock ()
1575- defer db .mtx .Unlock ()
1576-
1580+ db .mtx .RLock ()
15771581 loadable , corrupted , err := openBlocks (db .logger , db .dir , db .blocks , db .chunkPool , db .opts .PostingsDecoderFactory )
1582+ db .mtx .RUnlock ()
15781583 if err != nil {
15791584 return err
15801585 }
@@ -1600,11 +1605,13 @@ func (db *DB) reloadBlocks() (err error) {
16001605 if len (corrupted ) > 0 {
16011606 // Corrupted but no child loaded for it.
16021607 // Close all new blocks to release the lock for windows.
1608+ db .mtx .RLock ()
16031609 for _ , block := range loadable {
16041610 if _ , open := getBlock (db .blocks , block .Meta ().ULID ); ! open {
16051611 block .Close ()
16061612 }
16071613 }
1614+ db .mtx .RUnlock ()
16081615 errs := tsdb_errors .NewMulti ()
16091616 for ulid , err := range corrupted {
16101617 if err != nil {
@@ -1643,8 +1650,10 @@ func (db *DB) reloadBlocks() (err error) {
16431650 })
16441651
16451652 // Swap new blocks first for subsequently created readers to be seen.
1653+ db .mtx .Lock ()
16461654 oldBlocks := db .blocks
16471655 db .blocks = toLoad
1656+ db .mtx .Unlock ()
16481657
16491658 // Only check overlapping blocks when overlapping compaction is enabled.
16501659 if db .opts .EnableOverlappingCompaction {
0 commit comments