diff --git a/pkg/receive/multitsdb.go b/pkg/receive/multitsdb.go index 5c593900b8..985bbe3a7a 100644 --- a/pkg/receive/multitsdb.go +++ b/pkg/receive/multitsdb.go @@ -403,6 +403,17 @@ func (m *MultiTSDB) initTSDBIfNeeded(tenantID string, t *tenant) error { const compactionDelayPercentBlockLength = 10 +// lostFoundDir is the directory name that ext4 (and some other filesystems) +// create automatically at the root of every partition. When a receiver's +// --tsdb.path points directly at a mount point, the directory scan in Open() +// and RemoveLockFilesIfAny() would otherwise treat it as a tenant name and +// attempt to open or clean a TSDB for it, producing spurious errors on +// startup. A name-based skip is the simplest cross-platform fix; checking the +// inode or filesystem type would require platform-specific syscalls and adds +// complexity without meaningful safety benefit, since a tenant legitimately +// named "lost+found" is not a realistic concern. +const lostFoundDir = "lost+found" + // generateCompactionDelay() generates a time.Duration of up to compactionDelayPercentBlockLength% of the block range. Used to stagger compactions & uploads. func (t *tenant) generateCompactionDelay() time.Duration { return time.Duration(rand.Int63n((t.maxBlockDuration*compactionDelayPercentBlockLength)/100)) * time.Millisecond @@ -626,6 +637,9 @@ func (t *MultiTSDB) Open() error { if !f.IsDir() { continue } + if f.Name() == lostFoundDir { + continue + } g.Go(func() error { _, err := t.getOrLoadTenant(f.Name()) @@ -810,6 +824,9 @@ func (t *MultiTSDB) RemoveLockFilesIfAny() error { if !fi.IsDir() { continue } + if fi.Name() == lostFoundDir { + continue + } if err := os.Remove(filepath.Join(t.defaultTenantDataDir(fi.Name()), "lock")); err != nil { if os.IsNotExist(err) { continue diff --git a/pkg/receive/multitsdb_test.go b/pkg/receive/multitsdb_test.go index 93ce1f78e1..03f193ed87 100644 --- a/pkg/receive/multitsdb_test.go +++ b/pkg/receive/multitsdb_test.go @@ -170,6 +170,22 @@ func TestMultiTSDB(t *testing.T) { testMulitTSDBSeries(t, m) }) + t.Run("open ignores lost+found directory", func(t *testing.T) { + lostFound := filepath.Join(dir, "lost+found") + testutil.Ok(t, os.MkdirAll(lostFound, 0750)) + + m := NewMultiTSDB(dir, logger, prometheus.NewRegistry(), &tsdb.Options{ + MinBlockDuration: (2 * time.Hour).Milliseconds(), + MaxBlockDuration: (2 * time.Hour).Milliseconds(), + RetentionDuration: (6 * time.Hour).Milliseconds(), + NoLockfile: true, + }, labels.FromStrings("replica", "01"), "tenant_id", nil, false, false, metadata.NoneFunc, WithGCImmediately()) + defer m.Close() + + testutil.Ok(t, m.Open()) + testutil.Equals(t, (*tenant)(nil), m.testGetTenant("lost+found")) + }) + t.Run("flush with one sample produces a block", func(t *testing.T) { const testTenant = "test_tenant" m := NewMultiTSDB(dir, logger, prometheus.NewRegistry(), &tsdb.Options{