Skip to content

Commit da59cfb

Browse files
authored
Merge pull request #2156 from motiejus/btrfs-read_ahead_kb
[btrfs] add btrfs-specific `bdi/read_ahead_kb`
2 parents 28c34d4 + f6c8051 commit da59cfb

File tree

4 files changed

+88
-66
lines changed

4 files changed

+88
-66
lines changed

README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,13 @@ Controller-level and node-level deployments will both have priorityClassName set
9898
As noted in [GCP PD documentation](https://cloud.google.com/kubernetes-engine/docs/how-to/persistent-volumes/gce-pd-csi-driver), `ext4` and `xfs` are officially supported. `btrfs` support is experimental:
9999
- As of writing, Ubuntu VM images support btrfs, but [COS does not](https://cloud.google.com/container-optimized-os/docs/concepts/supported-filesystems).
100100

101-
`btrfs` filesystem accepts two "special" mount options:
101+
`btrfs` filesystem accepts the following "special" mount options and the sysfs paths they target:
102102

103-
- `btrfs-data-bg_reclaim_threshold`
104-
- `btrfs-metadata-bg_reclaim_threshold`
103+
- `btrfs-data-bg_reclaim_threshold`: `/sys/fs/btrfs/FS-UUID/allocation/data/bg_reclaim_threshold`.
104+
- `btrfs-metadata-bg_reclaim_thresho: `/sys/fs/btrfs/FS-UUID/allocation/metadata/bg_reclaim_threshold`.
105+
- `btrfs-bdi-read_ahead_kb`: `/sys/fs/btrfs/FS-UUID/bdi/read_ahead_kb`.
105106

106-
Which writes to `/sys/fs/btrfs/FS-UUID/allocation/{,meta}data/bg_reclaim_threshold`, as documented [in btrfs docs](https://btrfs.readthedocs.io/en/latest/ch-sysfs.html#uuid-allocations-data-metadata-system).
107+
See more in the [in btrfs docs](https://btrfs.readthedocs.io/en/latest/ch-sysfs.html#uuid-allocations-data-metadata-system).
107108

108109
## Further Documentation
109110

pkg/gce-pd-csi-driver/node.go

Lines changed: 43 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,14 @@ const (
124124
readAheadKBMountFlagRegexPattern = "^read_ahead_kb=(.+)$"
125125
btrfsReclaimDataRegexPattern = "^btrfs-allocation-data-bg_reclaim_threshold=(\\d{1,2})$" // 0-99 are valid, incl. 00
126126
btrfsReclaimMetadataRegexPattern = "^btrfs-allocation-metadata-bg_reclaim_threshold=(\\d{1,2})$" // ditto ^
127+
btrfsReadAheadKBRegexPattern = "^btrfs-bdi-read_ahead_kb=(\\d+)$"
127128
)
128129

129130
var (
130131
readAheadKBMountFlagRegex = regexp.MustCompile(readAheadKBMountFlagRegexPattern)
131132
btrfsReclaimDataRegex = regexp.MustCompile(btrfsReclaimDataRegexPattern)
132133
btrfsReclaimMetadataRegex = regexp.MustCompile(btrfsReclaimMetadataRegexPattern)
134+
btrfsReadAheadKBRegex = regexp.MustCompile(btrfsReadAheadKBRegexPattern)
133135
)
134136

135137
func getDefaultFsType() string {
@@ -402,7 +404,7 @@ func (ns *GCENodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStage
402404
// Part 3: Mount device to stagingTargetPath
403405
fstype := getDefaultFsType()
404406

405-
var btrfsReclaimData, btrfsReclaimMetadata string
407+
var btrfsReclaimData, btrfsReclaimMetadata, btrfsReadAheadKb string
406408
shouldUpdateReadAhead := false
407409
var readAheadKB int64
408410
options := []string{}
@@ -418,7 +420,7 @@ func (ns *GCENodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStage
418420
}
419421

420422
if mnt.FsType == fsTypeBtrfs {
421-
btrfsReclaimData, btrfsReclaimMetadata = extractBtrfsReclaimFlags(mnt.MountFlags)
423+
btrfsReclaimData, btrfsReclaimMetadata, btrfsReadAheadKb = extractBtrfsFlags(mnt.MountFlags)
422424
}
423425
} else if blk := volumeCapability.GetBlock(); blk != nil {
424426
// Noop for Block NodeStageVolume
@@ -465,47 +467,52 @@ func (ns *GCENodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStage
465467
}
466468
}
467469

468-
// Part 5: Update read_ahead
470+
// Part 5: Update read_ahead for the block device
469471
if shouldUpdateReadAhead {
470472
if err := ns.updateReadAhead(devicePath, readAheadKB); err != nil {
471473
return nil, status.Errorf(codes.Internal, "failure updating readahead for %s to %dKB: %v", devicePath, readAheadKB, err.Error())
472474
}
473475
}
474476

475-
// Part 6: if configured, write sysfs values
477+
btrfsSysfs := map[string]string{}
478+
479+
if btrfsReadAheadKb != "" {
480+
btrfsSysfs["bdi/read_ahead_kb"] = btrfsReadAheadKb
481+
}
482+
476483
if !readonly {
477-
sysfs := map[string]string{}
478484
if btrfsReclaimData != "" {
479-
sysfs["allocation/data/bg_reclaim_threshold"] = btrfsReclaimData
485+
btrfsSysfs["allocation/data/bg_reclaim_threshold"] = btrfsReclaimData
480486
}
481487
if btrfsReclaimMetadata != "" {
482-
sysfs["allocation/metadata/bg_reclaim_threshold"] = btrfsReclaimMetadata
483-
}
484-
485-
if len(sysfs) > 0 {
486-
args := []string{"--match-tag", "UUID", "--output", "value", stagingTargetPath}
487-
cmd := ns.Mounter.Exec.Command("blkid", args...)
488-
var stderr bytes.Buffer
489-
cmd.SetStderr(&stderr)
490-
klog.V(4).Infof(
491-
"running %q for volume %s",
492-
strings.Join(append([]string{"blkid"}, args...), " "),
493-
volumeID,
494-
)
495-
uuid, err := cmd.Output()
496-
if err != nil {
497-
klog.Errorf("blkid failed for %s. stderr:\n%s", volumeID, stderr.String())
498-
return nil, status.Errorf(codes.Internal, "blkid failed: %v", err)
499-
}
500-
uuid = bytes.TrimRight(uuid, "\n")
488+
btrfsSysfs["allocation/metadata/bg_reclaim_threshold"] = btrfsReclaimMetadata
489+
}
490+
}
501491

502-
for key, value := range sysfs {
503-
path := fmt.Sprintf("%s/fs/btrfs/%s/%s", ns.SysfsPath, uuid, key)
504-
if err := writeSysfs(path, value); err != nil {
505-
return nil, status.Error(codes.Internal, err.Error())
506-
}
507-
klog.V(4).Infof("NodeStageVolume set %s %s=%s", volumeID, key, value)
492+
// Part 6: if configured, write sysfs values
493+
if len(btrfsSysfs) > 0 {
494+
args := []string{"--match-tag", "UUID", "--output", "value", stagingTargetPath}
495+
cmd := ns.Mounter.Exec.Command("blkid", args...)
496+
var stderr bytes.Buffer
497+
cmd.SetStderr(&stderr)
498+
klog.V(4).Infof(
499+
"running %q for volume %s",
500+
strings.Join(append([]string{"blkid"}, args...), " "),
501+
volumeID,
502+
)
503+
uuid, err := cmd.Output()
504+
if err != nil {
505+
klog.Errorf("blkid failed for %s. stderr:\n%s", volumeID, stderr.String())
506+
return nil, status.Errorf(codes.Internal, "blkid failed: %v", err)
507+
}
508+
uuid = bytes.TrimRight(uuid, "\n")
509+
510+
for key, value := range btrfsSysfs {
511+
path := fmt.Sprintf("%s/fs/btrfs/%s/%s", ns.SysfsPath, uuid, key)
512+
if err := writeSysfs(path, value); err != nil {
513+
return nil, status.Error(codes.Internal, err.Error())
508514
}
515+
klog.V(4).Infof("NodeStageVolume set %s %s=%s", volumeID, key, value)
509516
}
510517
}
511518

@@ -526,7 +533,6 @@ func writeSysfs(path, value string) (_err error) {
526533
if _, err := f.Write([]byte(value)); err != nil {
527534
return err
528535
}
529-
530536
return nil
531537
}
532538

@@ -546,16 +552,18 @@ func (ns *GCENodeServer) updateReadAhead(devicePath string, readAheadKB int64) e
546552
return nil
547553
}
548554

549-
func extractBtrfsReclaimFlags(mountFlags []string) (string, string) {
550-
var reclaimData, reclaimMetadata string
555+
func extractBtrfsFlags(mountFlags []string) (string, string, string) {
556+
var reclaimData, reclaimMetadata, readAheadKb string
551557
for _, mountFlag := range mountFlags {
552558
if got := btrfsReclaimDataRegex.FindStringSubmatch(mountFlag); len(got) == 2 {
553559
reclaimData = got[1]
554560
} else if got := btrfsReclaimMetadataRegex.FindStringSubmatch(mountFlag); len(got) == 2 {
555561
reclaimMetadata = got[1]
562+
} else if got := btrfsReadAheadKBRegex.FindStringSubmatch(mountFlag); len(got) == 2 {
563+
readAheadKb = got[1]
556564
}
557565
}
558-
return reclaimData, reclaimMetadata
566+
return reclaimData, reclaimMetadata, readAheadKb
559567
}
560568

561569
func extractReadAheadKBMountFlag(mountFlags []string) (int64, bool, error) {

pkg/gce-pd-csi-driver/node_test.go

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"context"
2020
"fmt"
2121
"os"
22+
"path"
2223
"path/filepath"
2324
"strings"
2425
"testing"
@@ -625,17 +626,24 @@ func TestNodeStageVolume(t *testing.T) {
625626
defer os.RemoveAll(tempDir)
626627
stagingPath := filepath.Join(tempDir, defaultStagingPath)
627628

628-
btrfsUUID := "00000000-0000-0000-0000-000000000001"
629-
btrfsPrefix := fmt.Sprintf("%s/sys/fs/btrfs/%s/allocation", tempDir, btrfsUUID)
629+
var (
630+
btrfsUUID = "00000000-0000-0000-0000-000000000001"
631+
btrfsPrefix = fmt.Sprintf("%s/sys/fs/btrfs/%s", tempDir, btrfsUUID)
632+
btrfsFixtures = map[string]string{
633+
"allocation/data/bg_reclaim_threshold": "0\n",
634+
"allocation/metadata/bg_reclaim_threshold": "0\n",
635+
"bdi/read_ahead_kb": "4096\n",
636+
}
637+
)
630638

631-
for _, suffix := range []string{"data", "metadata"} {
632-
dir := btrfsPrefix + "/" + suffix
639+
for fname, contents := range btrfsFixtures {
640+
fullPath := btrfsPrefix + "/" + fname
641+
dir := path.Dir(fullPath)
633642
if err := os.MkdirAll(dir, 0755); err != nil {
634643
t.Fatalf("Failed to set up fake sysfs dir %q: %v", dir, err)
635644
}
636-
fname := dir + "/bg_reclaim_threshold"
637-
if err := os.WriteFile(fname, []byte("0\n"), 0644); err != nil {
638-
t.Fatalf("write %q: %v", fname, err)
645+
if err := os.WriteFile(fullPath, []byte(contents), 0644); err != nil {
646+
t.Fatalf("write %q: %v", fullPath, err)
639647
}
640648
}
641649

@@ -653,6 +661,7 @@ func TestNodeStageVolume(t *testing.T) {
653661
readAheadSectors string
654662
btrfsReclaimData string
655663
btrfsReclaimMetadata string
664+
btrfsReadAheadKb string
656665
sectorSizeInBytes int
657666
expErrCode codes.Code
658667
}{
@@ -907,7 +916,7 @@ func TestNodeStageVolume(t *testing.T) {
907916
},
908917
},
909918
{
910-
name: "Valid request, set btrfs-allocation-{,meta}data-bg_reclaim_threshold",
919+
name: "Valid request, set btrfs props",
911920
req: &csi.NodeStageVolumeRequest{
912921
VolumeId: volumeID,
913922
StagingTargetPath: stagingPath,
@@ -918,6 +927,7 @@ func TestNodeStageVolume(t *testing.T) {
918927
MountFlags: []string{
919928
"btrfs-allocation-data-bg_reclaim_threshold=90",
920929
"btrfs-allocation-metadata-bg_reclaim_threshold=91",
930+
"btrfs-bdi-read_ahead_kb=128",
921931
},
922932
},
923933
},
@@ -931,6 +941,7 @@ func TestNodeStageVolume(t *testing.T) {
931941
readonlyBit: "0",
932942
btrfsReclaimData: "90",
933943
btrfsReclaimMetadata: "91",
944+
btrfsReadAheadKb: "128",
934945
expCommandList: []fakeCmd{
935946
{
936947
cmd: "blkid",
@@ -1256,29 +1267,29 @@ func TestNodeStageVolume(t *testing.T) {
12561267
if tc.expReadAheadUpdate == false && readAheadUpdateCalled == true {
12571268
t.Fatalf("Test updated read ahead, but it was not expected.")
12581269
}
1259-
if tc.btrfsReclaimData == "" && tc.btrfsReclaimMetadata == "" && blkidCalled {
1270+
if tc.btrfsReclaimData == "" && tc.btrfsReclaimMetadata == "" && tc.btrfsReadAheadKb == "" && blkidCalled {
12601271
t.Fatalf("blkid was called, but was not expected.")
12611272
}
12621273

1263-
if tc.btrfsReclaimData != "" {
1264-
fname := btrfsPrefix + "/data/bg_reclaim_threshold"
1265-
got, err := os.ReadFile(fname)
1266-
if err != nil {
1267-
t.Fatalf("read %q: %v", fname, err)
1268-
}
1269-
if s := strings.TrimSpace(string(got)); s != tc.btrfsReclaimData {
1270-
t.Fatalf("%q: expected %q, got %q", fname, tc.btrfsReclaimData, s)
1271-
}
1274+
btrfsProps := map[string]string{
1275+
"/allocation/data/bg_reclaim_threshold": tc.btrfsReclaimData,
1276+
"/allocation/metadata/bg_reclaim_threshold": tc.btrfsReclaimMetadata,
1277+
"/bdi/read_ahead_kb": tc.btrfsReadAheadKb,
12721278
}
1273-
if tc.btrfsReclaimMetadata != "" {
1274-
fname := btrfsPrefix + "/metadata/bg_reclaim_threshold"
1275-
got, err := os.ReadFile(fname)
1279+
1280+
for fname, prop := range btrfsProps {
1281+
if prop == "" {
1282+
continue
1283+
}
1284+
1285+
got, err := os.ReadFile(btrfsPrefix + fname)
12761286
if err != nil {
1277-
t.Fatalf("read %q: %v", fname, err)
1287+
t.Fatalf("read %q: %v", btrfsPrefix+fname, err)
12781288
}
1279-
if s := strings.TrimSpace(string(got)); s != tc.btrfsReclaimMetadata {
1280-
t.Fatalf("%q: expected %q, got %q", fname, tc.btrfsReclaimMetadata, s)
1289+
if s := strings.TrimSpace(string(got)); s != prop {
1290+
t.Fatalf("%q: expected %q, got %q", btrfsPrefix+fname, prop, s)
12811291
}
1292+
12821293
}
12831294
})
12841295
}

pkg/gce-pd-csi-driver/utils.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -306,18 +306,20 @@ func collectMountOptions(fsType string, mntFlags []string) []string {
306306
var options []string
307307

308308
for _, opt := range mntFlags {
309+
// The flags below are special flags that aren't
310+
// passed directly as an options to the mount command.
309311
if readAheadKBMountFlagRegex.FindString(opt) != "" {
310-
// The read_ahead_kb flag is a special flag that isn't
311-
// passed directly as an option to the mount command.
312312
continue
313313
}
314-
315314
if btrfsReclaimDataRegex.FindString(opt) != "" {
316315
continue
317316
}
318317
if btrfsReclaimMetadataRegex.FindString(opt) != "" {
319318
continue
320319
}
320+
if btrfsReadAheadKBRegex.FindString(opt) != "" {
321+
continue
322+
}
321323

322324
options = append(options, opt)
323325
}

0 commit comments

Comments
 (0)