Skip to content

Commit 272f42d

Browse files
authored
fix: fallback to normal delete when deleting a small ranges (#644)
### Motivation In the current implementation, we will get terrible performance for iterator-related operations when we have many small `DeleteRange` ranges. FYI: https://rocksdb.org/blog/2018/11/21/delete-range.html It will make our `putWithSequence` method execute very slowly. (based on the current memtable size) The reason is that the implementation of pebble(rocksDB) will create the fragmented range deletion tombstones for range deletion. Even Pebble will cache the tombstones, but it will invalidate when you get a new delete range operation. Therefore, we need to rebuild the deletion iterator after the new `DeleteRange` operation. And it will get worse along with more `DeleteRange` operations. FYI: https://github.com/cockroachdb/pebble/blob/dbc1c128682f7efcdb76352432249780b12447f7/mem_table.go#L248-L251 Also, rocksdb [article](https://rocksdb.org/blog/2018/11/21/delete-range.html) also mentioned that in `Future Work` section. ### Modification - To support atomic operation for the user side, we can fallback the `DeleteRange` operation back to the normal delete if the actual keys are lower than `DeleteRangeThreshold(default 100)`. ### Next 1. Introduce policies to make `DeleteRangeThreshold` configurable. 2. Introduce policies to configure the memtable size. because the smaller size can trigger flush and compact the deletion tombstones. 3. other small improvements.
1 parent f4a877c commit 272f42d

File tree

2 files changed

+85
-4
lines changed

2 files changed

+85
-4
lines changed

server/kv/db.go

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -593,17 +593,41 @@ func (d *db) applyDelete(batch WriteBatch, notifications *notifications, delReq
593593
}
594594
}
595595

596+
const DeleteRangeThreshold = 100
597+
596598
func (d *db) applyDeleteRange(batch WriteBatch, notifications *notifications, delReq *proto.DeleteRangeRequest, updateOperationCallback UpdateOperationCallback) (*proto.DeleteRangeResponse, error) {
597599
if notifications != nil {
598600
notifications.DeletedRange(delReq.StartInclusive, delReq.EndExclusive)
599601
}
600602

601-
if err := updateOperationCallback.OnDeleteRange(batch, delReq.StartInclusive, delReq.EndExclusive); err != nil {
603+
it, err := batch.RangeScan(delReq.StartInclusive, delReq.EndExclusive)
604+
if err != nil {
602605
return nil, err
603606
}
604-
605-
if err := batch.DeleteRange(delReq.StartInclusive, delReq.EndExclusive); err != nil {
606-
return nil, errors.Wrap(err, "oxia db: failed to delete range")
607+
var validKeys []string
608+
var validKeysNum = 0
609+
for ; it.Valid(); it.Next() {
610+
validKeysNum++
611+
if validKeysNum <= DeleteRangeThreshold {
612+
validKeys = append(validKeys, it.Key())
613+
}
614+
if err = updateOperationCallback.OnDelete(batch, it.Key()); err != nil {
615+
return nil, errors.Wrap(multierr.Combine(err, it.Close()), "oxia db: failed to callback on delete range")
616+
}
617+
}
618+
if err := it.Close(); err != nil {
619+
return nil, errors.Wrap(err, "oxia db: failed to close iterator on delete range")
620+
}
621+
if validKeysNum > DeleteRangeThreshold {
622+
if err := batch.DeleteRange(delReq.StartInclusive, delReq.EndExclusive); err != nil {
623+
return nil, errors.Wrap(err, "oxia db: failed to delete range")
624+
}
625+
} else {
626+
for _, key := range validKeys {
627+
if err := batch.Delete(key); err != nil {
628+
return nil, errors.Wrap(err, "oxia db: failed to delete range")
629+
}
630+
}
607631
}
608632

609633
d.log.Debug(
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// Copyright 2025 StreamNative, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package kv
16+
17+
import (
18+
"fmt"
19+
"testing"
20+
"time"
21+
22+
"github.com/stretchr/testify/assert"
23+
"k8s.io/utils/ptr"
24+
25+
"github.com/streamnative/oxia/common"
26+
"github.com/streamnative/oxia/proto"
27+
)
28+
29+
func BenchmarkDeleteRange(b *testing.B) {
30+
factory, err := NewPebbleKVFactory(&FactoryOptions{
31+
InMemory: true,
32+
CacheSizeMB: 1024,
33+
})
34+
assert.NoError(b, err)
35+
db, err := NewDB(common.DefaultNamespace, 1, factory, 0, common.SystemClock)
36+
assert.NoError(b, err)
37+
defer db.Close()
38+
for i := range b.N {
39+
_, err := db.ProcessWrite(&proto.WriteRequest{
40+
Puts: []*proto.PutRequest{
41+
{
42+
Key: "00000000000000000001",
43+
PartitionKey: ptr.To("00000000000000000001"),
44+
Value: []byte("00000000000000000000"),
45+
SequenceKeyDelta: []uint64{1},
46+
},
47+
},
48+
DeleteRanges: []*proto.DeleteRangeRequest{
49+
{
50+
StartInclusive: "00000000000000000001-00000000000000000000",
51+
EndExclusive: fmt.Sprintf("00000000000000000001-%020d", i),
52+
},
53+
},
54+
}, int64(i), uint64(time.Now().UnixMilli()), NoOpCallback)
55+
assert.NoError(b, err)
56+
}
57+
}

0 commit comments

Comments
 (0)