Skip to content

Commit 994fdd7

Browse files
committed
Don't create new scratch VHD per image for CimFS
CimFS layers don't need to create a new scratch VHD per image. The scratch VHDs used with CimFS are empty so we can just create one base VHD and one differencing VHD and copy it for every scratch snapshot. (Note that UVM VHDs are still unique per image because the VHD information is embedded in the UVM BCD during import) Signed-off-by: Amit Barve <[email protected]>
1 parent d9a867a commit 994fdd7

File tree

3 files changed

+409
-184
lines changed

3 files changed

+409
-184
lines changed

plugins/snapshots/windows/cimfs.go

Lines changed: 227 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,17 @@ package windows
2121

2222
import (
2323
"context"
24+
"encoding/json"
2425
"fmt"
2526
"os"
2627
"path/filepath"
2728
"strings"
29+
"syscall"
2830

31+
"github.com/Microsoft/go-winio/pkg/security"
32+
"github.com/Microsoft/go-winio/vhd"
2933
"github.com/Microsoft/hcsshim"
34+
"github.com/Microsoft/hcsshim/computestorage"
3035
"github.com/Microsoft/hcsshim/pkg/cimfs"
3136
cimlayer "github.com/Microsoft/hcsshim/pkg/ociwclayer/cim"
3237
"github.com/containerd/containerd/v2/core/mount"
@@ -39,6 +44,14 @@ import (
3944
"github.com/containerd/plugin"
4045
"github.com/containerd/plugin/registry"
4146
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
47+
"golang.org/x/sys/windows"
48+
)
49+
50+
const (
51+
baseVHDName = "blank-base.vhdx"
52+
templateVHDName = "blank.vhdx"
53+
vhdMaxSizeInBytes uint64 = 10 * 1024 * 1024 * 1024 // 10 GB
54+
vhdBlockSizeInBytes uint32 = 1 * 1024 * 1024 // 1 MB
4255
)
4356

4457
// Composite image FileSystem (CimFS) is a new read-only filesystem (similar to overlayFS on Linux) created
@@ -78,6 +91,10 @@ func NewCimFSSnapshotter(root string) (snapshots.Snapshotter, error) {
7891
return nil, err
7992
}
8093

94+
if err = createScratchVHDs(context.Background(), baseSn.root); err != nil {
95+
return nil, fmt.Errorf("failed to init base scratch VHD: %w", err)
96+
}
97+
8198
return &cimFSSnapshotter{
8299
windowsBaseSnapshotter: baseSn,
83100
cimDir: filepath.Join(baseSn.info.HomeDir, "cim-layers"),
@@ -139,30 +156,28 @@ func (s *cimFSSnapshotter) Usage(ctx context.Context, key string) (snapshots.Usa
139156
}
140157

141158
func (s *cimFSSnapshotter) Prepare(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
142-
m, err := s.createSnapshot(ctx, snapshots.KindActive, key, parent, opts)
143-
if err != nil {
144-
return m, err
145-
}
146-
m[0].Type = "CimFS"
147-
return m, nil
159+
return s.createSnapshot(ctx, snapshots.KindActive, key, parent, opts)
148160
}
149161

150162
func (s *cimFSSnapshotter) View(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
151-
m, err := s.createSnapshot(ctx, snapshots.KindView, key, parent, opts)
152-
if err != nil {
153-
return m, err
154-
}
155-
m[0].Type = "CimFS"
156-
return m, nil
163+
return s.createSnapshot(ctx, snapshots.KindView, key, parent, opts)
157164
}
158165

159-
func (s *cimFSSnapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, error) {
160-
mounts, err := s.windowsBaseSnapshotter.Mounts(ctx, key)
166+
func (s *cimFSSnapshotter) Mounts(ctx context.Context, key string) (_ []mount.Mount, err error) {
167+
var snapshot storage.Snapshot
168+
err = s.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
169+
snapshot, err = storage.GetSnapshot(ctx, key)
170+
if err != nil {
171+
return fmt.Errorf("failed to get snapshot mount: %w", err)
172+
}
173+
174+
return nil
175+
})
161176
if err != nil {
162177
return nil, err
163178
}
164-
mounts[0].Type = "CimFS"
165-
return mounts, nil
179+
180+
return s.mounts(snapshot, key), nil
166181
}
167182

168183
func (s *cimFSSnapshotter) Commit(ctx context.Context, name, key string, opts ...snapshots.Opt) error {
@@ -215,3 +230,199 @@ func (s *cimFSSnapshotter) Remove(ctx context.Context, key string) error {
215230
}
216231
return nil
217232
}
233+
234+
func (s *cimFSSnapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, key, parent string, opts []snapshots.Opt) (_ []mount.Mount, err error) {
235+
var newSnapshot storage.Snapshot
236+
err = s.ms.WithTransaction(ctx, true, func(ctx context.Context) (retErr error) {
237+
newSnapshot, err = storage.CreateSnapshot(ctx, kind, key, parent, opts...)
238+
if err != nil {
239+
return fmt.Errorf("failed to create snapshot: %w", err)
240+
}
241+
242+
log.G(ctx).Debug("createSnapshot")
243+
// Create the new snapshot dir
244+
snDir := s.getSnapshotDir(newSnapshot.ID)
245+
if err = os.MkdirAll(snDir, 0700); err != nil {
246+
return fmt.Errorf("failed to create snapshot dir %s: %w", snDir, err)
247+
}
248+
defer func() {
249+
if retErr != nil {
250+
os.RemoveAll(snDir)
251+
}
252+
}()
253+
254+
if strings.Contains(key, snapshots.UnpackKeyPrefix) {
255+
// IO/disk space optimization: Do nothing
256+
//
257+
// We only need one sandbox.vhdx for the container. Skip making one for this
258+
// snapshot if this isn't the snapshot that just houses the final sandbox.vhd
259+
// that will be mounted as the containers scratch. Currently the key for a snapshot
260+
// where a layer will be extracted to will have the string `extract-` in it.
261+
return nil
262+
}
263+
264+
if len(newSnapshot.ParentIDs) == 0 {
265+
return fmt.Errorf("scratch snapshot without any parents isn't supported")
266+
}
267+
268+
parentLayerPaths := s.parentIDsToParentPaths(newSnapshot.ParentIDs)
269+
var snapshotInfo snapshots.Info
270+
for _, o := range opts {
271+
o(&snapshotInfo)
272+
}
273+
274+
sizeInBytes, err := getRequestedScratchSize(ctx, snapshotInfo)
275+
if err != nil {
276+
return err
277+
}
278+
279+
var makeUVMScratch bool
280+
if _, ok := snapshotInfo.Labels[uvmScratchLabel]; ok {
281+
makeUVMScratch = true
282+
}
283+
284+
// This has to be run first to avoid clashing with the containers sandbox.vhdx.
285+
if makeUVMScratch {
286+
if err = s.createUVMScratchLayer(ctx, snDir, parentLayerPaths); err != nil {
287+
return fmt.Errorf("failed to make UVM's scratch layer: %w", err)
288+
}
289+
}
290+
if err = s.createScratchLayer(ctx, snDir, sizeInBytes); err != nil {
291+
return fmt.Errorf("failed to create scratch layer: %w", err)
292+
}
293+
return nil
294+
})
295+
if err != nil {
296+
return nil, err
297+
}
298+
299+
return s.mounts(newSnapshot, key), nil
300+
}
301+
302+
// In case of CimFS layers, the scratch VHDs are fully empty (WCIFS layers have reparse points in scratch VHDs, hence those VHDs are unique per image), so we create only one scratch VHD and then copy & expand it for every scratch layer creation.
303+
func (s *cimFSSnapshotter) createScratchLayer(ctx context.Context, snDir string, sizeInBytes uint64) error {
304+
dest := filepath.Join(snDir, "sandbox.vhdx")
305+
if err := copyScratchDisk(filepath.Join(s.root, templateVHDName), dest); err != nil {
306+
return err
307+
}
308+
309+
if sizeInBytes != 0 {
310+
if err := hcsshim.ExpandSandboxSize(s.info, filepath.Base(snDir), sizeInBytes); err != nil {
311+
return fmt.Errorf("failed to expand sandbox vhdx size to %d bytes: %w", sizeInBytes, err)
312+
}
313+
}
314+
return nil
315+
}
316+
317+
func (s *cimFSSnapshotter) mounts(sn storage.Snapshot, key string) []mount.Mount {
318+
var (
319+
roFlag string
320+
)
321+
322+
if sn.Kind == snapshots.KindView {
323+
roFlag = "ro"
324+
} else {
325+
roFlag = "rw"
326+
}
327+
328+
source := s.getSnapshotDir(sn.ID)
329+
parentLayerPaths := s.parentIDsToParentPaths(sn.ParentIDs)
330+
331+
mountType := "CimFS"
332+
333+
// error is not checked here, as a string array will never fail to Marshal
334+
parentLayersJSON, _ := json.Marshal(parentLayerPaths)
335+
parentLayersOption := mount.ParentLayerPathsFlag + string(parentLayersJSON)
336+
337+
options := []string{
338+
roFlag,
339+
}
340+
if len(sn.ParentIDs) != 0 {
341+
options = append(options, parentLayersOption)
342+
}
343+
mounts := []mount.Mount{
344+
{
345+
Source: source,
346+
Type: mountType,
347+
Options: options,
348+
},
349+
}
350+
351+
return mounts
352+
}
353+
354+
// creates a base scratch VHD and a differencing VHD from that base VHD inside the given `path`
355+
// directory. Once these VHDs are created, every scratch snapshot will make a copy of the differencing VHD to
356+
// be used as the scratch for that snapshot. We could ideally just have a base VHD and no differencing VHD and
357+
// copy the base VHD for every scratch snapshot. However, base VHDs are slightly bigger in size and so take
358+
// longer to copy so we keep a differencing VHD and copy that.
359+
func createScratchVHDs(ctx context.Context, path string) (err error) {
360+
baseVHDPath := filepath.Join(path, baseVHDName)
361+
diffVHDPath := filepath.Join(path, templateVHDName)
362+
baseVHDExists := false
363+
diffVHDExists := false
364+
365+
if _, err = os.Stat(baseVHDPath); err == nil {
366+
baseVHDExists = true
367+
} else if !os.IsNotExist(err) {
368+
return fmt.Errorf("failed to stat base VHD: %w", err)
369+
}
370+
371+
_, err = os.Stat(diffVHDPath)
372+
if err != nil && !os.IsNotExist(err) {
373+
return fmt.Errorf("failed to stat diff VHD: %w", err)
374+
} else if baseVHDExists && err == nil {
375+
diffVHDExists = true
376+
} else {
377+
// remove this diff VHD, it must be recreated with the new base VHD.
378+
os.RemoveAll(diffVHDPath)
379+
}
380+
381+
defer func() {
382+
if err != nil {
383+
os.RemoveAll(baseVHDPath)
384+
os.RemoveAll(diffVHDPath)
385+
}
386+
}()
387+
388+
if !baseVHDExists {
389+
var baseVHDHandle syscall.Handle
390+
createParams := &vhd.CreateVirtualDiskParameters{
391+
Version: 2,
392+
Version2: vhd.CreateVersion2{
393+
MaximumSize: vhdMaxSizeInBytes,
394+
BlockSizeInBytes: vhdBlockSizeInBytes,
395+
},
396+
}
397+
baseVHDHandle, err = vhd.CreateVirtualDisk(baseVHDPath, vhd.VirtualDiskAccessNone, vhd.CreateVirtualDiskFlagNone, createParams)
398+
if err != nil {
399+
return fmt.Errorf("failed to create base vhd: %w", err)
400+
}
401+
402+
err = computestorage.FormatWritableLayerVhd(ctx, windows.Handle(baseVHDHandle))
403+
// we always wanna close the handle whether format succeeds for not.
404+
closeErr := syscall.CloseHandle(baseVHDHandle)
405+
if err != nil {
406+
return err
407+
} else if closeErr != nil {
408+
return fmt.Errorf("failed to close vhdx handle: %w", closeErr)
409+
}
410+
}
411+
412+
if !diffVHDExists {
413+
// Create the differencing disk that will be what's copied for the final rw layer
414+
// for a container.
415+
if err = vhd.CreateDiffVhd(diffVHDPath, baseVHDPath, vhdBlockSizeInBytes); err != nil {
416+
return fmt.Errorf("failed to create differencing disk: %w", err)
417+
}
418+
}
419+
420+
// re assigning group access even if we didn't create the VHD shouldn't throw an error
421+
if err = security.GrantVmGroupAccess(baseVHDPath); err != nil {
422+
return fmt.Errorf("failed to grant vm group access to %s: %w", baseVHDPath, err)
423+
}
424+
if err = security.GrantVmGroupAccess(diffVHDPath); err != nil {
425+
return fmt.Errorf("failed to grant vm group access to %s: %w", diffVHDPath, err)
426+
}
427+
return nil
428+
}

0 commit comments

Comments
 (0)