Skip to content

Commit 364d86d

Browse files
committed
storage: avoid excessive walking of the auxiliary directory
Pebble incrementally accounts its disk usage and this accounting is used to cheaply service Capacity calls. The auxiliary directory is a subdirectory of the data directory, and various Cockroach subsystems write to it directly without any incremental accounting of their disk usage. As a result, Capacity calculations walk the auxiliary directory to calculate the directory's disk usage anew each time. Previously, this calculation occurred during every call to Capacity. This commit adapts Pebble.Capacity to cache the computed capacity and recalculate it at most every minute. Ideally, we would incrementally account disk usage within the auxiliary directory (#96344). This change is a stopgap, avoiding the bulk of the CPU usage incurred by Capacity recalculations during IMPORTs. Epic: none Release note: none
1 parent a3789d3 commit 364d86d

File tree

2 files changed

+66
-34
lines changed

2 files changed

+66
-34
lines changed

pkg/storage/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ go_library(
9191
"//pkg/util/timeutil",
9292
"//pkg/util/tracing",
9393
"//pkg/util/uuid",
94+
"@com_github_cockroachdb_crlib//crtime",
9495
"@com_github_cockroachdb_crlib//fifo",
9596
"@com_github_cockroachdb_errors//:errors",
9697
"@com_github_cockroachdb_errors//oserror",

pkg/storage/pebble.go

Lines changed: 65 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ import (
4343
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
4444
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
4545
"github.com/cockroachdb/cockroach/pkg/util/tracing"
46+
"github.com/cockroachdb/crlib/crtime"
4647
"github.com/cockroachdb/crlib/fifo"
4748
"github.com/cockroachdb/errors"
4849
"github.com/cockroachdb/errors/oserror"
@@ -803,10 +804,15 @@ type engineConfig struct {
803804

804805
// Pebble is a wrapper around a Pebble database instance.
805806
type Pebble struct {
806-
cfg engineConfig
807-
db *pebble.DB
808-
closed atomic.Bool
809-
auxDir string
807+
cfg engineConfig
808+
db *pebble.DB
809+
closed atomic.Bool
810+
auxDir string
811+
auxiliarySize struct {
812+
mu syncutil.Mutex
813+
computedAt crtime.Mono
814+
size int64
815+
}
810816
ballastPath string
811817
properties roachpb.StoreProperties
812818

@@ -1919,37 +1925,11 @@ func (p *Pebble) Capacity() (roachpb.StoreCapacity, error) {
19191925
m := p.db.Metrics()
19201926
totalUsedBytes := int64(m.DiskSpaceUsage())
19211927

1922-
// We don't have incremental accounting of the disk space usage of files
1923-
// in the auxiliary directory. Walk the auxiliary directory and all its
1924-
// subdirectories, adding to the total used bytes.
1925-
if errOuter := filepath.Walk(p.auxDir, func(path string, info os.FileInfo, err error) error {
1926-
if err != nil {
1927-
// This can happen if CockroachDB removes files out from under us -
1928-
// just keep going to get the best estimate we can.
1929-
if oserror.IsNotExist(err) {
1930-
return nil
1931-
}
1932-
// Special-case: if the store-dir is configured using the root of some fs,
1933-
// e.g. "/mnt/db", we might have special fs-created files like lost+found
1934-
// that we can't read, so just ignore them rather than crashing.
1935-
if oserror.IsPermission(err) && filepath.Base(path) == "lost+found" {
1936-
return nil
1937-
}
1938-
return err
1939-
}
1940-
if path == p.ballastPath {
1941-
// Skip the ballast. Counting it as used is likely to confuse
1942-
// users, and it's more akin to space that is just unavailable
1943-
// like disk space often restricted to a root user.
1944-
return nil
1945-
}
1946-
if info.Mode().IsRegular() {
1947-
totalUsedBytes += info.Size()
1948-
}
1949-
return nil
1950-
}); errOuter != nil {
1951-
return roachpb.StoreCapacity{}, errOuter
1928+
auxiliarySize, err := p.auxiliaryDirSize()
1929+
if err != nil {
1930+
return roachpb.StoreCapacity{}, err
19521931
}
1932+
totalUsedBytes += auxiliarySize
19531933

19541934
// If no size limitation have been placed on the store size or if the
19551935
// limitation is greater than what's available, just return the actual
@@ -1982,6 +1962,57 @@ func (p *Pebble) Capacity() (roachpb.StoreCapacity, error) {
19821962
}, nil
19831963
}
19841964

1965+
// auxiliaryDirSize computes the size of the auxiliary directory. There are
1966+
// multiple Cockroach subsystems that write into the auxiliary directory, and
1967+
// they don't incrementally account for their disk space usage. This function
1968+
// walks the auxiliary directory and all its subdirectories, summing the file
1969+
// sizes. This walk can be expensive, so we cache the result and only recompute
1970+
// if it's over 1 minute stale.
1971+
//
1972+
// TODO(jackson): Eventually we should update the various subsystems writing
1973+
// into the auxiliary directory to incrementally account for their disk space
1974+
// usage. See #96344.
1975+
func (p *Pebble) auxiliaryDirSize() (int64, error) {
1976+
p.auxiliarySize.mu.Lock()
1977+
defer p.auxiliarySize.mu.Unlock()
1978+
if crtime.NowMono().Sub(p.auxiliarySize.computedAt) < time.Minute {
1979+
return p.auxiliarySize.size, nil
1980+
}
1981+
1982+
p.auxiliarySize.size = 0
1983+
err := filepath.Walk(p.auxDir, func(path string, info os.FileInfo, err error) error {
1984+
if err != nil {
1985+
// This can happen if CockroachDB removes files out from under us -
1986+
// just keep going to get the best estimate we can.
1987+
if oserror.IsNotExist(err) {
1988+
return nil
1989+
}
1990+
// Special-case: if the store-dir is configured using the root of some fs,
1991+
// e.g. "/mnt/db", we might have special fs-created files like lost+found
1992+
// that we can't read, so just ignore them rather than erroring out.
1993+
if oserror.IsPermission(err) && filepath.Base(path) == "lost+found" {
1994+
return nil
1995+
}
1996+
return err
1997+
}
1998+
if path == p.ballastPath {
1999+
// Skip the ballast. Counting it as used is likely to confuse
2000+
// users, and it's more akin to space that is just unavailable
2001+
// like disk space often restricted to a root user.
2002+
return nil
2003+
}
2004+
if info.Mode().IsRegular() {
2005+
p.auxiliarySize.size += info.Size()
2006+
}
2007+
return nil
2008+
})
2009+
if err != nil {
2010+
return 0, err
2011+
}
2012+
p.auxiliarySize.computedAt = crtime.NowMono()
2013+
return p.auxiliarySize.size, err
2014+
}
2015+
19852016
// Flush implements the Engine interface.
19862017
func (p *Pebble) Flush() error {
19872018
return p.db.Flush()

0 commit comments

Comments
 (0)