Skip to content

Commit 056b059

Browse files
committed
block: improve adaptive compression profiles
We allow different adaptive ratios for data and value blocks and improve the profiles.
1 parent 392f102 commit 056b059

File tree

4 files changed

+67
-49
lines changed

4 files changed

+67
-49
lines changed

sstable/block/compression.go

Lines changed: 42 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,20 @@ type CompressionProfile struct {
2929
//
3030
// Note that MinLZ is only supported with table formats v6+. Older formats
3131
// fall back to Snappy.
32-
DataBlocks compression.Setting
33-
ValueBlocks compression.Setting
32+
DataBlocks CompressionSetting
33+
ValueBlocks CompressionSetting
3434
OtherBlocks compression.Setting
3535

3636
// Blocks that are reduced by less than this percentage are stored
3737
// uncompressed.
3838
MinReductionPercent uint8
39+
}
40+
41+
// CompressionSetting is a compression setting for value or data blocks. It
42+
// contains a compression.Setting and an optional percentage which enables
43+
// adaptive compression.
44+
type CompressionSetting struct {
45+
compression.Setting
3946

4047
// AdaptiveReductionCutoffPercent (when set to a non-zero value) enables
4148
// adaptive compressors for data and value blocks which fall back to the
@@ -45,6 +52,20 @@ type CompressionProfile struct {
4552
AdaptiveReductionCutoffPercent uint8
4653
}
4754

55+
// SimpleCompressionSetting returns a CompressionSetting that always uses the
56+
// given compression.
57+
func SimpleCompressionSetting(s compression.Setting) CompressionSetting {
58+
return CompressionSetting{Setting: s}
59+
}
60+
61+
// AdaptiveCompressionSetting returns a CompressionSetting that adaptively
62+
// chooses between the enclosed setting or the "other blocks" setting.
63+
func AdaptiveCompressionSetting(
64+
s compression.Setting, reductionCutoffPercent uint8,
65+
) CompressionSetting {
66+
return CompressionSetting{Setting: s, AdaptiveReductionCutoffPercent: reductionCutoffPercent}
67+
}
68+
4869
// UsesMinLZ returns true if the profile uses the MinLZ compression algorithm
4970
// (for any block kind).
5071
func (p *CompressionProfile) UsesMinLZ() bool {
@@ -68,33 +89,32 @@ var (
6889
// FastCompression automatically chooses between Snappy/MinLZ1 and Zstd1 for
6990
// sstable and blob file value blocks.
7091
FastCompression = registerCompressionProfile(CompressionProfile{
71-
Name: "Fast",
72-
DataBlocks: fastestCompression,
73-
ValueBlocks: compression.ZstdLevel1,
74-
OtherBlocks: fastestCompression,
75-
MinReductionPercent: 10,
76-
AdaptiveReductionCutoffPercent: 30,
92+
Name: "Fast",
93+
DataBlocks: SimpleCompressionSetting(fastestCompression),
94+
ValueBlocks: AdaptiveCompressionSetting(compression.ZstdLevel1, 30),
95+
OtherBlocks: fastestCompression,
96+
MinReductionPercent: 10,
7797
})
7898

7999
// BalancedCompression automatically chooses between Snappy/MinLZ1 and Zstd1
80100
// for data and value blocks.
81101
BalancedCompression = registerCompressionProfile(CompressionProfile{
82-
Name: "Balanced",
83-
DataBlocks: compression.ZstdLevel1,
84-
ValueBlocks: compression.ZstdLevel1,
85-
OtherBlocks: fastestCompression,
86-
MinReductionPercent: 5,
87-
AdaptiveReductionCutoffPercent: 15,
102+
Name: "Balanced",
103+
DataBlocks: AdaptiveCompressionSetting(compression.ZstdLevel1, 30),
104+
ValueBlocks: AdaptiveCompressionSetting(compression.ZstdLevel1, 15),
105+
OtherBlocks: fastestCompression,
106+
MinReductionPercent: 5,
88107
})
89108

90109
// GoodCompression uses Zstd1 for data and value blocks.
110+
//
111+
// Note: in practice, we have observed very little size benefit to using
112+
// higher zstd levels like ZstdLevel3 (while paying a significant compression
113+
// performance cost).
91114
GoodCompression = registerCompressionProfile(CompressionProfile{
92-
Name: "Good",
93-
// In practice, we have observed very little size benefit to using higher
94-
// zstd levels like ZstdLevel3 while paying a significant compression
95-
// performance cost.
96-
DataBlocks: compression.ZstdLevel1,
97-
ValueBlocks: compression.ZstdLevel1,
115+
Name: "Good",
116+
DataBlocks: SimpleCompressionSetting(compression.ZstdLevel1),
117+
ValueBlocks: SimpleCompressionSetting(compression.ZstdLevel1),
98118
OtherBlocks: fastestCompression,
99119
MinReductionPercent: 3,
100120
})
@@ -119,8 +139,8 @@ var fastestCompression = func() compression.Setting {
119139
func simpleCompressionProfile(name string, setting compression.Setting) *CompressionProfile {
120140
return registerCompressionProfile(CompressionProfile{
121141
Name: name,
122-
DataBlocks: setting,
123-
ValueBlocks: setting,
142+
DataBlocks: SimpleCompressionSetting(setting),
143+
ValueBlocks: SimpleCompressionSetting(setting),
124144
OtherBlocks: setting,
125145
MinReductionPercent: 12,
126146
})

sstable/block/compressor.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,20 +40,20 @@ func MakeCompressor(profile *CompressionProfile) Compressor {
4040
}
4141

4242
func maybeAdaptiveCompressor(
43-
profile *CompressionProfile, setting compression.Setting,
43+
profile *CompressionProfile, setting CompressionSetting,
4444
) compression.Compressor {
45-
if profile.AdaptiveReductionCutoffPercent != 0 && setting != profile.OtherBlocks {
45+
if setting.AdaptiveReductionCutoffPercent != 0 && setting.Setting != profile.OtherBlocks {
4646
params := compression.AdaptiveCompressorParams{
47-
Slow: setting,
47+
Slow: setting.Setting,
4848
Fast: profile.OtherBlocks,
49-
ReductionCutoff: float64(profile.AdaptiveReductionCutoffPercent) * 0.01,
49+
ReductionCutoff: float64(setting.AdaptiveReductionCutoffPercent) * 0.01,
5050
SampleEvery: 10,
5151
SampleHalfLife: 256 * 1024, // 256 KB
5252
SamplingSeed: rand.Uint64(),
5353
}
5454
return compression.NewAdaptiveCompressor(params)
5555
}
56-
return compression.GetCompressor(setting)
56+
return compression.GetCompressor(setting.Setting)
5757
}
5858

5959
// Close must be called when the Compressor is no longer needed.

sstable/block/compressor_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ func TestCompressor(t *testing.T) {
2525
dst := make([]byte, 0, 1024)
2626
for runs := 0; runs < 100; runs++ {
2727
profile := &CompressionProfile{
28-
DataBlocks: settings[rand.IntN(len(settings))],
29-
ValueBlocks: settings[rand.IntN(len(settings))],
28+
DataBlocks: SimpleCompressionSetting(settings[rand.IntN(len(settings))]),
29+
ValueBlocks: SimpleCompressionSetting(settings[rand.IntN(len(settings))]),
3030
OtherBlocks: settings[rand.IntN(len(settings))],
3131
MinReductionPercent: 0,
3232
}

sstable/compressionanalyzer/buckets.go

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -99,49 +99,47 @@ func (c Compressibility) String() string {
9999
var Profiles = [...]*block.CompressionProfile{
100100
{
101101
Name: "Snappy",
102-
DataBlocks: compression.SnappySetting,
103-
ValueBlocks: compression.SnappySetting,
102+
DataBlocks: block.SimpleCompressionSetting(compression.SnappySetting),
103+
ValueBlocks: block.SimpleCompressionSetting(compression.SnappySetting),
104104
OtherBlocks: compression.SnappySetting,
105105
MinReductionPercent: 0,
106106
},
107107

108108
{
109109
Name: "MinLZ1",
110-
DataBlocks: compression.MinLZFastest,
111-
ValueBlocks: compression.MinLZFastest,
110+
DataBlocks: block.SimpleCompressionSetting(compression.MinLZFastest),
111+
ValueBlocks: block.SimpleCompressionSetting(compression.MinLZFastest),
112112
OtherBlocks: compression.MinLZFastest,
113113
MinReductionPercent: 0,
114114
},
115115
{
116116
Name: "Zstd1",
117-
DataBlocks: compression.ZstdLevel1,
118-
ValueBlocks: compression.ZstdLevel1,
117+
DataBlocks: block.SimpleCompressionSetting(compression.ZstdLevel1),
118+
ValueBlocks: block.SimpleCompressionSetting(compression.ZstdLevel1),
119119
OtherBlocks: compression.ZstdLevel1,
120120
MinReductionPercent: 0,
121121
},
122122

123123
{
124-
Name: "Auto1/30",
125-
DataBlocks: compression.ZstdLevel1,
126-
ValueBlocks: compression.ZstdLevel1,
127-
OtherBlocks: compression.MinLZFastest,
128-
AdaptiveReductionCutoffPercent: 30,
129-
MinReductionPercent: 0,
124+
Name: "Auto1/30",
125+
DataBlocks: block.AdaptiveCompressionSetting(compression.ZstdLevel1, 30),
126+
ValueBlocks: block.AdaptiveCompressionSetting(compression.ZstdLevel1, 30),
127+
OtherBlocks: compression.MinLZFastest,
128+
MinReductionPercent: 0,
130129
},
131130

132131
{
133-
Name: "Auto1/15",
134-
DataBlocks: compression.ZstdLevel1,
135-
ValueBlocks: compression.ZstdLevel1,
136-
OtherBlocks: compression.MinLZFastest,
137-
AdaptiveReductionCutoffPercent: 15,
138-
MinReductionPercent: 0,
132+
Name: "Auto1/15",
133+
DataBlocks: block.AdaptiveCompressionSetting(compression.ZstdLevel1, 15),
134+
ValueBlocks: block.AdaptiveCompressionSetting(compression.ZstdLevel1, 15),
135+
OtherBlocks: compression.MinLZFastest,
136+
MinReductionPercent: 0,
139137
},
140138

141139
{
142140
Name: "Zstd3",
143-
DataBlocks: compression.ZstdLevel3,
144-
ValueBlocks: compression.ZstdLevel3,
141+
DataBlocks: block.SimpleCompressionSetting(compression.ZstdLevel3),
142+
ValueBlocks: block.SimpleCompressionSetting(compression.ZstdLevel3),
145143
OtherBlocks: compression.ZstdLevel3,
146144
MinReductionPercent: 0,
147145
},

0 commit comments

Comments
 (0)