Skip to content

Commit 353aceb

Browse files
meta/sql:merge statement in batch unlink (#6537)
1 parent 10a8366 commit 353aceb

File tree

2 files changed

+82
-30
lines changed

2 files changed

+82
-30
lines changed

pkg/meta/sql.go

Lines changed: 80 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2631,13 +2631,14 @@ func (m *dbMeta) doBatchUnlink(ctx Context, parent Ino, entries []Entry, length
26312631
trash Ino
26322632
n *node // n edges : 1 inode
26332633
opened bool // node is opened
2634-
trashName string
2634+
trashName string // cached trash entry name when hard links go to trash
26352635
}
26362636
var entryInfos []entryInfo
26372637
var totalLength, totalSpace, totalInodes int64
26382638
if userGroupQuotas != nil {
26392639
*userGroupQuotas = make([]userGroupQuotaDelta, 0, len(entries))
26402640
}
2641+
// main transaction: validate, collect metadata, update inode/link counts, and prepare DB mutations
26412642
err := m.txn(func(s *xorm.Session) error {
26422643
pn := node{Inode: parent}
26432644
ok, err := s.Get(&pn)
@@ -2661,6 +2662,7 @@ func (m *dbMeta) doBatchUnlink(ctx Context, parent Ino, entries []Entry, length
26612662
entryInfos = make([]entryInfo, 0, len(entries))
26622663
now := time.Now().UnixNano()
26632664

2665+
// collect unique inode ids from entries (avoid operating N times on same inode for hard links)
26642666
inodes := make([]Ino, 0, len(entries))
26652667
inodeM := make(map[Ino]struct{}) // filter hardlinks
26662668
for _, entry := range entries {
@@ -2680,12 +2682,13 @@ func (m *dbMeta) doBatchUnlink(ctx Context, parent Ino, entries []Entry, length
26802682
if err := s.ForUpdate().In("inode", inodes).Find(&nodes); err != nil {
26812683
return err
26822684
}
2683-
// some inodes may not exist
26842685
nodeMap := make(map[Ino]*node, len(nodes))
2686+
// build quick lookup map from inode to *node
26852687
for i := range nodes {
26862688
nodeMap[nodes[i].Inode] = &nodes[i]
26872689
}
26882690

2691+
// iterate all target entries, apply basic checks and build info for each edge
26892692
dumpNode := &node{}
26902693
for i := range entryInfos {
26912694
info := &entryInfos[i]
@@ -2748,53 +2751,57 @@ func (m *dbMeta) doBatchUnlink(ctx Context, parent Ino, entries []Entry, length
27482751
nowUnix := time.Now().Unix()
27492752
visited := make(map[Ino]bool)
27502753
visited[0] = true // skip dummyNode
2754+
2755+
// buffers for batched operations
2756+
edgesDel := make([]edge, 0)
2757+
sustainedIns := make([]interface{}, 0)
2758+
delfilesIns := make([]interface{}, 0)
2759+
nodesDel := make([]Ino, 0)
2760+
symlinksDel := make([]Ino, 0)
2761+
xattrsDel := make([]Ino, 0)
2762+
edgesIns := make([]interface{}, 0)
2763+
2764+
// walk each edge to decide whether to move to trash, decrement nlink or delete inode & xattrs
27512765
for _, info := range entryInfos {
27522766
if info.n.Type == TypeDirectory {
27532767
continue
27542768
}
2755-
e := edge{Parent: parent, Name: info.e.Name}
2756-
if _, err := s.Delete(&e); err != nil {
2757-
return err
2758-
}
2769+
2770+
edgesDel = append(edgesDel, edge{Parent: parent, Name: info.e.Name})
27592771
if !visited[info.n.Inode] {
27602772
if info.n.Nlink > 0 {
2773+
// inode still referenced somewhere: only update metadata
27612774
if _, err := s.Cols("nlink", "ctime", "ctimensec", "parent").Update(info.n, &node{Inode: info.n.Inode}); err != nil {
27622775
return err
27632776
}
27642777
if info.n.Type == TypeFile {
27652778
recordUserGroupDeletionStats(info.n, 0, userGroupQuotas, parent.IsTrash())
27662779
}
27672780
} else {
2781+
// last link removed: prepare to delete inode and related rows
27682782
var entrySpace int64
27692783
needRecordStats := false
27702784
switch info.n.Type {
27712785
case TypeFile:
27722786
entrySpace = align4K(info.n.Length)
27732787
needRecordStats = true
27742788
if info.opened {
2775-
if err := mustInsert(s, &sustained{Sid: m.sid, Inode: info.e.Inode}); err != nil {
2776-
return err
2777-
}
2789+
sustainedIns = append(sustainedIns, &sustained{Sid: m.sid, Inode: info.e.Inode})
27782790
if _, err := s.Cols("nlink", "ctime", "ctimensec").Update(info.n, &node{Inode: info.n.Inode}); err != nil {
27792791
return err
27802792
}
27812793
} else {
2782-
if err := mustInsert(s, &delfile{info.e.Inode, info.n.Length, nowUnix}); err != nil {
2783-
return err
2784-
}
2785-
if _, err := s.Delete(&node{Inode: info.e.Inode}); err != nil {
2786-
return err
2787-
}
2794+
// regular, un-opened file: add to delfile and delete inode later
2795+
delfilesIns = append(delfilesIns, &delfile{info.e.Inode, info.n.Length, nowUnix})
2796+
nodesDel = append(nodesDel, info.e.Inode)
27882797
}
27892798
case TypeSymlink:
2790-
if _, err := s.Delete(&symlink{Inode: info.e.Inode}); err != nil {
2791-
return err
2792-
}
2799+
// symlink: record for batched delete from symlink table
2800+
symlinksDel = append(symlinksDel, info.e.Inode)
27932801
fallthrough
27942802
default:
2795-
if _, err := s.Delete(&node{Inode: info.e.Inode}); err != nil {
2796-
return err
2797-
}
2803+
// other non-file types: record for direct inode deletion
2804+
nodesDel = append(nodesDel, info.e.Inode)
27982805
if info.n.Type != TypeFile {
27992806
entrySpace = align4K(0)
28002807
needRecordStats = true
@@ -2804,27 +2811,71 @@ func (m *dbMeta) doBatchUnlink(ctx Context, parent Ino, entries []Entry, length
28042811
recordGlobalDeletionStats(info.n, entrySpace, &totalLength, &totalSpace, &totalInodes)
28052812
recordUserGroupDeletionStats(info.n, entrySpace, userGroupQuotas, parent.IsTrash())
28062813
}
2807-
if _, err := s.Delete(&xattr{Inode: info.e.Inode}); err != nil {
2808-
return err
2809-
}
2814+
xattrsDel = append(xattrsDel, info.e.Inode)
28102815
}
28112816
m.of.InvalidateChunk(info.e.Inode, invalidateAttrOnly)
28122817
}
28132818
if info.n.Nlink > 0 && info.trash > 0 {
2819+
// still has links and should be moved to trash; create new trash edge
28142820
if info.trashName == "" {
28152821
info.trashName = m.trashEntry(parent, info.e.Inode, string(info.e.Name))
28162822
}
2817-
if err = mustInsert(s, &edge{
2823+
edgesIns = append(edgesIns, &edge{
28182824
Parent: info.trash,
28192825
Name: []byte(info.trashName),
28202826
Inode: info.n.Inode,
2821-
Type: info.n.Type}); err != nil {
2822-
return err
2823-
}
2827+
Type: info.n.Type})
28242828
}
28252829
visited[info.n.Inode] = true
28262830
}
28272831

2832+
if len(edgesDel) > 0 {
2833+
query := s.Table(&edge{})
2834+
for j, e := range edgesDel {
2835+
if j == 0 {
2836+
query = query.Where("parent = ? AND name = ?", e.Parent, e.Name)
2837+
} else {
2838+
query = query.Or("parent = ? AND name = ?", e.Parent, e.Name)
2839+
}
2840+
}
2841+
if _, err := query.Delete(&edge{}); err != nil {
2842+
return err
2843+
}
2844+
}
2845+
2846+
// execute SQL statements in batches
2847+
if len(sustainedIns) > 0 {
2848+
if err := mustInsert(s, sustainedIns...); err != nil {
2849+
return err
2850+
}
2851+
}
2852+
if len(delfilesIns) > 0 {
2853+
if err := mustInsert(s, delfilesIns...); err != nil {
2854+
return err
2855+
}
2856+
}
2857+
if len(nodesDel) > 0 {
2858+
if _, err := s.In("inode", nodesDel).Delete(&node{}); err != nil {
2859+
return err
2860+
}
2861+
}
2862+
if len(symlinksDel) > 0 {
2863+
if _, err := s.In("inode", symlinksDel).Delete(&symlink{}); err != nil {
2864+
return err
2865+
}
2866+
}
2867+
if len(xattrsDel) > 0 {
2868+
if _, err := s.In("inode", xattrsDel).Delete(&xattr{}); err != nil {
2869+
return err
2870+
}
2871+
}
2872+
if len(edgesIns) > 0 {
2873+
if err := mustInsert(s, edgesIns...); err != nil {
2874+
return err
2875+
}
2876+
}
2877+
2878+
// optionally update parent directory timestamps
28282879
if updateParent {
28292880
var _n int64
28302881
if _n, err = s.Cols("mtime", "ctime", "mtimensec", "ctimensec").Update(&pn, &node{Inode: pn.Inode}); err != nil || _n == 0 {
@@ -2849,6 +2900,7 @@ func (m *dbMeta) doBatchUnlink(ctx Context, parent Ino, entries []Entry, length
28492900
return errno(err)
28502901
}
28512902

2903+
// outside of transaction: update global stats and trigger data deletion callbacks
28522904
visited := make(map[Ino]bool)
28532905
visited[0] = true // skip dummyNode
28542906
for _, info := range entryInfos {
@@ -2861,7 +2913,6 @@ func (m *dbMeta) doBatchUnlink(ctx Context, parent Ino, entries []Entry, length
28612913
}
28622914
}
28632915
m.updateStats(totalSpace, totalInodes)
2864-
28652916
*length = totalLength
28662917
*space = totalSpace
28672918
*inodes = totalInodes

pkg/meta/utils.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,8 @@ func updateLocks(ls []plockRecord, nl plockRecord) []plockRecord {
285285
func (m *baseMeta) emptyDir(ctx Context, inode Ino, skipCheckTrash bool, count *uint64, concurrent chan int) syscall.Errno {
286286
for {
287287
var entries []*Entry
288-
if st := m.en.doReaddir(ctx, inode, 0, &entries, 10000); st != 0 && st != syscall.ENOENT {
288+
// By operating in batches of 500, we can achieve the best performance experience.
289+
if st := m.en.doReaddir(ctx, inode, 0, &entries, 500); st != 0 && st != syscall.ENOENT {
289290
return st
290291
}
291292
if len(entries) == 0 {

0 commit comments

Comments
 (0)