@@ -21,12 +21,17 @@ package windows
21
21
22
22
import (
23
23
"context"
24
+ "encoding/json"
24
25
"fmt"
25
26
"os"
26
27
"path/filepath"
27
28
"strings"
29
+ "syscall"
28
30
31
+ "github.com/Microsoft/go-winio/pkg/security"
32
+ "github.com/Microsoft/go-winio/vhd"
29
33
"github.com/Microsoft/hcsshim"
34
+ "github.com/Microsoft/hcsshim/computestorage"
30
35
"github.com/Microsoft/hcsshim/pkg/cimfs"
31
36
cimlayer "github.com/Microsoft/hcsshim/pkg/ociwclayer/cim"
32
37
"github.com/containerd/containerd/v2/core/mount"
@@ -39,6 +44,14 @@ import (
39
44
"github.com/containerd/plugin"
40
45
"github.com/containerd/plugin/registry"
41
46
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
47
+ "golang.org/x/sys/windows"
48
+ )
49
+
50
+ const (
51
+ baseVHDName = "blank-base.vhdx"
52
+ templateVHDName = "blank.vhdx"
53
+ vhdMaxSizeInBytes uint64 = 10 * 1024 * 1024 * 1024 // 10 GB
54
+ vhdBlockSizeInBytes uint32 = 1 * 1024 * 1024 // 1 MB
42
55
)
43
56
44
57
// Composite image FileSystem (CimFS) is a new read-only filesystem (similar to overlayFS on Linux) created
@@ -78,6 +91,10 @@ func NewCimFSSnapshotter(root string) (snapshots.Snapshotter, error) {
78
91
return nil , err
79
92
}
80
93
94
+ if err = createScratchVHDs (context .Background (), baseSn .root ); err != nil {
95
+ return nil , fmt .Errorf ("failed to init base scratch VHD: %w" , err )
96
+ }
97
+
81
98
return & cimFSSnapshotter {
82
99
windowsBaseSnapshotter : baseSn ,
83
100
cimDir : filepath .Join (baseSn .info .HomeDir , "cim-layers" ),
@@ -139,30 +156,28 @@ func (s *cimFSSnapshotter) Usage(ctx context.Context, key string) (snapshots.Usa
139
156
}
140
157
141
158
func (s * cimFSSnapshotter ) Prepare (ctx context.Context , key , parent string , opts ... snapshots.Opt ) ([]mount.Mount , error ) {
142
- m , err := s .createSnapshot (ctx , snapshots .KindActive , key , parent , opts )
143
- if err != nil {
144
- return m , err
145
- }
146
- m [0 ].Type = "CimFS"
147
- return m , nil
159
+ return s .createSnapshot (ctx , snapshots .KindActive , key , parent , opts )
148
160
}
149
161
150
162
func (s * cimFSSnapshotter ) View (ctx context.Context , key , parent string , opts ... snapshots.Opt ) ([]mount.Mount , error ) {
151
- m , err := s .createSnapshot (ctx , snapshots .KindView , key , parent , opts )
152
- if err != nil {
153
- return m , err
154
- }
155
- m [0 ].Type = "CimFS"
156
- return m , nil
163
+ return s .createSnapshot (ctx , snapshots .KindView , key , parent , opts )
157
164
}
158
165
159
- func (s * cimFSSnapshotter ) Mounts (ctx context.Context , key string ) ([]mount.Mount , error ) {
160
- mounts , err := s .windowsBaseSnapshotter .Mounts (ctx , key )
166
+ func (s * cimFSSnapshotter ) Mounts (ctx context.Context , key string ) (_ []mount.Mount , err error ) {
167
+ var snapshot storage.Snapshot
168
+ err = s .ms .WithTransaction (ctx , false , func (ctx context.Context ) error {
169
+ snapshot , err = storage .GetSnapshot (ctx , key )
170
+ if err != nil {
171
+ return fmt .Errorf ("failed to get snapshot mount: %w" , err )
172
+ }
173
+
174
+ return nil
175
+ })
161
176
if err != nil {
162
177
return nil , err
163
178
}
164
- mounts [ 0 ]. Type = "CimFS"
165
- return mounts , nil
179
+
180
+ return s . mounts ( snapshot , key ) , nil
166
181
}
167
182
168
183
func (s * cimFSSnapshotter ) Commit (ctx context.Context , name , key string , opts ... snapshots.Opt ) error {
@@ -215,3 +230,199 @@ func (s *cimFSSnapshotter) Remove(ctx context.Context, key string) error {
215
230
}
216
231
return nil
217
232
}
233
+
234
+ func (s * cimFSSnapshotter ) createSnapshot (ctx context.Context , kind snapshots.Kind , key , parent string , opts []snapshots.Opt ) (_ []mount.Mount , err error ) {
235
+ var newSnapshot storage.Snapshot
236
+ err = s .ms .WithTransaction (ctx , true , func (ctx context.Context ) (retErr error ) {
237
+ newSnapshot , err = storage .CreateSnapshot (ctx , kind , key , parent , opts ... )
238
+ if err != nil {
239
+ return fmt .Errorf ("failed to create snapshot: %w" , err )
240
+ }
241
+
242
+ log .G (ctx ).Debug ("createSnapshot" )
243
+ // Create the new snapshot dir
244
+ snDir := s .getSnapshotDir (newSnapshot .ID )
245
+ if err = os .MkdirAll (snDir , 0700 ); err != nil {
246
+ return fmt .Errorf ("failed to create snapshot dir %s: %w" , snDir , err )
247
+ }
248
+ defer func () {
249
+ if retErr != nil {
250
+ os .RemoveAll (snDir )
251
+ }
252
+ }()
253
+
254
+ if strings .Contains (key , snapshots .UnpackKeyPrefix ) {
255
+ // IO/disk space optimization: Do nothing
256
+ //
257
+ // We only need one sandbox.vhdx for the container. Skip making one for this
258
+ // snapshot if this isn't the snapshot that just houses the final sandbox.vhd
259
+ // that will be mounted as the containers scratch. Currently the key for a snapshot
260
+ // where a layer will be extracted to will have the string `extract-` in it.
261
+ return nil
262
+ }
263
+
264
+ if len (newSnapshot .ParentIDs ) == 0 {
265
+ return fmt .Errorf ("scratch snapshot without any parents isn't supported" )
266
+ }
267
+
268
+ parentLayerPaths := s .parentIDsToParentPaths (newSnapshot .ParentIDs )
269
+ var snapshotInfo snapshots.Info
270
+ for _ , o := range opts {
271
+ o (& snapshotInfo )
272
+ }
273
+
274
+ sizeInBytes , err := getRequestedScratchSize (ctx , snapshotInfo )
275
+ if err != nil {
276
+ return err
277
+ }
278
+
279
+ var makeUVMScratch bool
280
+ if _ , ok := snapshotInfo .Labels [uvmScratchLabel ]; ok {
281
+ makeUVMScratch = true
282
+ }
283
+
284
+ // This has to be run first to avoid clashing with the containers sandbox.vhdx.
285
+ if makeUVMScratch {
286
+ if err = s .createUVMScratchLayer (ctx , snDir , parentLayerPaths ); err != nil {
287
+ return fmt .Errorf ("failed to make UVM's scratch layer: %w" , err )
288
+ }
289
+ }
290
+ if err = s .createScratchLayer (ctx , snDir , sizeInBytes ); err != nil {
291
+ return fmt .Errorf ("failed to create scratch layer: %w" , err )
292
+ }
293
+ return nil
294
+ })
295
+ if err != nil {
296
+ return nil , err
297
+ }
298
+
299
+ return s .mounts (newSnapshot , key ), nil
300
+ }
301
+
302
+ // In case of CimFS layers, the scratch VHDs are fully empty (WCIFS layers have reparse points in scratch VHDs, hence those VHDs are unique per image), so we create only one scratch VHD and then copy & expand it for every scratch layer creation.
303
+ func (s * cimFSSnapshotter ) createScratchLayer (ctx context.Context , snDir string , sizeInBytes uint64 ) error {
304
+ dest := filepath .Join (snDir , "sandbox.vhdx" )
305
+ if err := copyScratchDisk (filepath .Join (s .root , templateVHDName ), dest ); err != nil {
306
+ return err
307
+ }
308
+
309
+ if sizeInBytes != 0 {
310
+ if err := hcsshim .ExpandSandboxSize (s .info , filepath .Base (snDir ), sizeInBytes ); err != nil {
311
+ return fmt .Errorf ("failed to expand sandbox vhdx size to %d bytes: %w" , sizeInBytes , err )
312
+ }
313
+ }
314
+ return nil
315
+ }
316
+
317
+ func (s * cimFSSnapshotter ) mounts (sn storage.Snapshot , key string ) []mount.Mount {
318
+ var (
319
+ roFlag string
320
+ )
321
+
322
+ if sn .Kind == snapshots .KindView {
323
+ roFlag = "ro"
324
+ } else {
325
+ roFlag = "rw"
326
+ }
327
+
328
+ source := s .getSnapshotDir (sn .ID )
329
+ parentLayerPaths := s .parentIDsToParentPaths (sn .ParentIDs )
330
+
331
+ mountType := "CimFS"
332
+
333
+ // error is not checked here, as a string array will never fail to Marshal
334
+ parentLayersJSON , _ := json .Marshal (parentLayerPaths )
335
+ parentLayersOption := mount .ParentLayerPathsFlag + string (parentLayersJSON )
336
+
337
+ options := []string {
338
+ roFlag ,
339
+ }
340
+ if len (sn .ParentIDs ) != 0 {
341
+ options = append (options , parentLayersOption )
342
+ }
343
+ mounts := []mount.Mount {
344
+ {
345
+ Source : source ,
346
+ Type : mountType ,
347
+ Options : options ,
348
+ },
349
+ }
350
+
351
+ return mounts
352
+ }
353
+
354
+ // creates a base scratch VHD and a differencing VHD from that base VHD inside the given `path`
355
+ // directory. Once these VHDs are created, every scratch snapshot will make a copy of the differencing VHD to
356
+ // be used as the scratch for that snapshot. We could ideally just have a base VHD and no differencing VHD and
357
+ // copy the base VHD for every scratch snapshot. However, base VHDs are slightly bigger in size and so take
358
+ // longer to copy so we keep a differencing VHD and copy that.
359
+ func createScratchVHDs (ctx context.Context , path string ) (err error ) {
360
+ baseVHDPath := filepath .Join (path , baseVHDName )
361
+ diffVHDPath := filepath .Join (path , templateVHDName )
362
+ baseVHDExists := false
363
+ diffVHDExists := false
364
+
365
+ if _ , err = os .Stat (baseVHDPath ); err == nil {
366
+ baseVHDExists = true
367
+ } else if ! os .IsNotExist (err ) {
368
+ return fmt .Errorf ("failed to stat base VHD: %w" , err )
369
+ }
370
+
371
+ _ , err = os .Stat (diffVHDPath )
372
+ if err != nil && ! os .IsNotExist (err ) {
373
+ return fmt .Errorf ("failed to stat diff VHD: %w" , err )
374
+ } else if baseVHDExists && err == nil {
375
+ diffVHDExists = true
376
+ } else {
377
+ // remove this diff VHD, it must be recreated with the new base VHD.
378
+ os .RemoveAll (diffVHDPath )
379
+ }
380
+
381
+ defer func () {
382
+ if err != nil {
383
+ os .RemoveAll (baseVHDPath )
384
+ os .RemoveAll (diffVHDPath )
385
+ }
386
+ }()
387
+
388
+ if ! baseVHDExists {
389
+ var baseVHDHandle syscall.Handle
390
+ createParams := & vhd.CreateVirtualDiskParameters {
391
+ Version : 2 ,
392
+ Version2 : vhd.CreateVersion2 {
393
+ MaximumSize : vhdMaxSizeInBytes ,
394
+ BlockSizeInBytes : vhdBlockSizeInBytes ,
395
+ },
396
+ }
397
+ baseVHDHandle , err = vhd .CreateVirtualDisk (baseVHDPath , vhd .VirtualDiskAccessNone , vhd .CreateVirtualDiskFlagNone , createParams )
398
+ if err != nil {
399
+ return fmt .Errorf ("failed to create base vhd: %w" , err )
400
+ }
401
+
402
+ err = computestorage .FormatWritableLayerVhd (ctx , windows .Handle (baseVHDHandle ))
403
+ // we always wanna close the handle whether format succeeds for not.
404
+ closeErr := syscall .CloseHandle (baseVHDHandle )
405
+ if err != nil {
406
+ return err
407
+ } else if closeErr != nil {
408
+ return fmt .Errorf ("failed to close vhdx handle: %w" , closeErr )
409
+ }
410
+ }
411
+
412
+ if ! diffVHDExists {
413
+ // Create the differencing disk that will be what's copied for the final rw layer
414
+ // for a container.
415
+ if err = vhd .CreateDiffVhd (diffVHDPath , baseVHDPath , vhdBlockSizeInBytes ); err != nil {
416
+ return fmt .Errorf ("failed to create differencing disk: %w" , err )
417
+ }
418
+ }
419
+
420
+ // re assigning group access even if we didn't create the VHD shouldn't throw an error
421
+ if err = security .GrantVmGroupAccess (baseVHDPath ); err != nil {
422
+ return fmt .Errorf ("failed to grant vm group access to %s: %w" , baseVHDPath , err )
423
+ }
424
+ if err = security .GrantVmGroupAccess (diffVHDPath ); err != nil {
425
+ return fmt .Errorf ("failed to grant vm group access to %s: %w" , diffVHDPath , err )
426
+ }
427
+ return nil
428
+ }
0 commit comments