@@ -21,6 +21,7 @@ package container
21
21
import (
22
22
"bytes"
23
23
"context"
24
+ "encoding/csv"
24
25
"fmt"
25
26
"io"
26
27
"os"
@@ -35,6 +36,7 @@ import (
35
36
"github.com/docker/docker/pkg/stdcopy"
36
37
"github.com/docker/go-connections/nat"
37
38
"github.com/pkg/errors"
39
+ "k8s.io/utils/pointer"
38
40
39
41
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
40
42
)
@@ -43,6 +45,10 @@ const (
43
45
httpProxy = "HTTP_PROXY"
44
46
httpsProxy = "HTTPS_PROXY"
45
47
noProxy = "NO_PROXY"
48
+
49
+ btrfsStorage = "btrfs"
50
+ zfsStorage = "zfs"
51
+ xfsStorage = "xfs"
46
52
)
47
53
48
54
type dockerRuntime struct {
@@ -372,8 +378,10 @@ func (d *dockerRuntime) RunContainer(ctx context.Context, runConfig *RunContaine
372
378
}
373
379
374
380
restartPolicy := runConfig .RestartPolicy
381
+ restartMaximumRetryCount := 0
375
382
if restartPolicy == "" {
376
- restartPolicy = "unless-stopped"
383
+ restartPolicy = "on-failure"
384
+ restartMaximumRetryCount = 1
377
385
}
378
386
379
387
hostConfig := dockercontainer.HostConfig {
@@ -383,11 +391,12 @@ func (d *dockerRuntime) RunContainer(ctx context.Context, runConfig *RunContaine
383
391
// including some ones docker would otherwise do by default.
384
392
// for now this is what we want. in the future we may revisit this.
385
393
Privileged : true ,
386
- SecurityOpt : []string {"seccomp=unconfined" }, // ignore seccomp
394
+ SecurityOpt : []string {"seccomp=unconfined" , "apparmor=unconfined" }, // ignore seccomp
387
395
NetworkMode : dockercontainer .NetworkMode (runConfig .Network ),
388
396
Tmpfs : runConfig .Tmpfs ,
389
397
PortBindings : nat.PortMap {},
390
- RestartPolicy : dockercontainer.RestartPolicy {Name : restartPolicy },
398
+ RestartPolicy : dockercontainer.RestartPolicy {Name : restartPolicy , MaximumRetryCount : restartMaximumRetryCount },
399
+ Init : pointer .Bool (false ),
391
400
}
392
401
networkConfig := network.NetworkingConfig {}
393
402
@@ -398,21 +407,21 @@ func (d *dockerRuntime) RunContainer(ctx context.Context, runConfig *RunContaine
398
407
}
399
408
}
400
409
401
- // mount /dev/mapper if docker storage driver if Btrfs or ZFS
402
- // https://github.com/kubernetes-sigs/kind/pull/1464
403
- needed , err := d .needsDevMapper (ctx )
410
+ info , err := d .dockerClient .Info (ctx )
404
411
if err != nil {
405
412
return errors .Wrapf (err , "unable to get Docker engine info, failed to create container %q" , runConfig .Name )
406
413
}
407
414
408
- if needed {
415
+ // mount /dev/mapper if docker storage driver if Btrfs or ZFS
416
+ // https://github.com/kubernetes-sigs/kind/pull/1464
417
+ if d .needsDevMapper (info ) {
409
418
hostConfig .Binds = append (hostConfig .Binds , "/dev/mapper:/dev/mapper:ro" )
410
419
}
411
420
412
421
envVars := environmentVariables (runConfig )
413
422
414
423
// pass proxy environment variables to be used by node's docker daemon
415
- proxyDetails , err := d .getProxyDetails (ctx , runConfig .Network )
424
+ proxyDetails , err := d .getProxyDetails (ctx , runConfig .Network , runConfig . Name )
416
425
if err != nil {
417
426
return errors .Wrapf (err , "error getting subnets for %q" , runConfig .Network )
418
427
}
@@ -421,15 +430,30 @@ func (d *dockerRuntime) RunContainer(ctx context.Context, runConfig *RunContaine
421
430
}
422
431
containerConfig .Env = envVars
423
432
433
+ // handle Docker on Btrfs or ZFS
434
+ // https://github.com/kubernetes-sigs/kind/issues/1416#issuecomment-606514724
435
+ if d .mountDevMapper (info ) {
436
+ runConfig .Mounts = append (runConfig .Mounts , Mount {
437
+ Source : "/dev/mapper" ,
438
+ Target : "/dev/mapper" ,
439
+ })
440
+ }
441
+
424
442
configureVolumes (runConfig , & containerConfig , & hostConfig )
425
443
configurePortMappings (runConfig .PortMappings , & containerConfig , & hostConfig )
426
444
427
- if d .usernsRemap (ctx ) {
445
+ if d .usernsRemap (info ) {
428
446
// We need this argument in order to make this command work
429
447
// in systems that have userns-remap enabled on the docker daemon
430
448
hostConfig .UsernsMode = "host"
431
449
}
432
450
451
+ // enable /dev/fuse explicitly for fuse-overlayfs
452
+ // (Rootless Docker does not automatically mount /dev/fuse with --privileged)
453
+ if d .mountFuse (info ) {
454
+ hostConfig .Devices = append (hostConfig .Devices , dockercontainer.DeviceMapping {PathOnHost : "/dev/fuse" })
455
+ }
456
+
433
457
// Make sure we have the image
434
458
if err := d .PullContainerImageIfNotExists (ctx , runConfig .Image ); err != nil {
435
459
return errors .Wrapf (err , "error pulling container image %s" , runConfig .Image )
@@ -511,13 +535,8 @@ func (d *dockerRuntime) RunContainer(ctx context.Context, runConfig *RunContaine
511
535
// needsDevMapper checks whether we need to mount /dev/mapper.
512
536
// This is required when the docker storage driver is Btrfs or ZFS.
513
537
// https://github.com/kubernetes-sigs/kind/pull/1464
514
- func (d * dockerRuntime ) needsDevMapper (ctx context.Context ) (bool , error ) {
515
- info , err := d .dockerClient .Info (ctx )
516
- if err != nil {
517
- return false , err
518
- }
519
-
520
- return info .Driver == "btrfs" || info .Driver == "zfs" , nil
538
+ func (d * dockerRuntime ) needsDevMapper (info types.Info ) bool {
539
+ return info .Driver == btrfsStorage || info .Driver == zfsStorage
521
540
}
522
541
523
542
// ownerAndGroup gets the user configuration for the container (user:group).
@@ -601,7 +620,7 @@ type proxyDetails struct {
601
620
602
621
// getProxyDetails returns a struct with the host environment proxy settings
603
622
// that should be passed to the nodes.
604
- func (d * dockerRuntime ) getProxyDetails (ctx context.Context , network string ) (* proxyDetails , error ) {
623
+ func (d * dockerRuntime ) getProxyDetails (ctx context.Context , network string , nodeNames ... string ) (* proxyDetails , error ) {
605
624
var val string
606
625
details := proxyDetails {Envs : make (map [string ]string )}
607
626
proxyEnvs := []string {httpProxy , httpsProxy , noProxy }
@@ -626,21 +645,24 @@ func (d *dockerRuntime) getProxyDetails(ctx context.Context, network string) (*p
626
645
if err != nil {
627
646
return & details , err
628
647
}
629
- noProxyList := strings .Join (append (subnets , details .Envs [noProxy ]), "," )
630
- details .Envs [noProxy ] = noProxyList
631
- details .Envs [strings .ToLower (noProxy )] = noProxyList
648
+ noProxyList := append (subnets , details .Envs [noProxy ])
649
+ noProxyList = append (noProxyList , nodeNames ... )
650
+ // Add pod and service dns names to no_proxy to allow in cluster
651
+ // Note: this is best effort based on the default CoreDNS spec
652
+ // https://github.com/kubernetes/dns/blob/master/docs/specification.md
653
+ // Any user created pod/service hostnames, namespaces, custom DNS services
654
+ // are expected to be no-proxied by the user explicitly.
655
+ noProxyList = append (noProxyList , ".svc" , ".svc.cluster" , ".svc.cluster.local" )
656
+ noProxyJoined := strings .Join (noProxyList , "," )
657
+ details .Envs [noProxy ] = noProxyJoined
658
+ details .Envs [strings .ToLower (noProxy )] = noProxyJoined
632
659
}
633
660
634
661
return & details , nil
635
662
}
636
663
637
664
// usernsRemap checks if userns-remap is enabled in dockerd.
638
- func (d * dockerRuntime ) usernsRemap (ctx context.Context ) bool {
639
- info , err := d .dockerClient .Info (ctx )
640
- if err != nil {
641
- return false
642
- }
643
-
665
+ func (d * dockerRuntime ) usernsRemap (info types.Info ) bool {
644
666
for _ , secOpt := range info .SecurityOptions {
645
667
if strings .Contains (secOpt , "name=userns" ) {
646
668
return true
@@ -649,6 +671,49 @@ func (d *dockerRuntime) usernsRemap(ctx context.Context) bool {
649
671
return false
650
672
}
651
673
674
+ // mountDevMapper checks if the Docker storage driver is Btrfs or ZFS
675
+ // or if the backing filesystem is Btrfs.
676
+ func (d * dockerRuntime ) mountDevMapper (info types.Info ) bool {
677
+ storage := ""
678
+ storage = strings .ToLower (strings .TrimSpace (info .Driver ))
679
+ if storage == btrfsStorage || storage == zfsStorage || storage == "devicemapper" {
680
+ return true
681
+ }
682
+
683
+ // check the backing file system
684
+ // docker info -f '{{json .DriverStatus }}'
685
+ // [["Backing Filesystem","extfs"],["Supports d_type","true"],["Native Overlay Diff","true"]]
686
+ for _ , item := range info .DriverStatus {
687
+ if item [0 ] == "Backing Filesystem" {
688
+ storage = strings .ToLower (item [1 ])
689
+ break
690
+ }
691
+ }
692
+
693
+ return storage == btrfsStorage || storage == zfsStorage || storage == xfsStorage
694
+ }
695
+
696
+ // rootless: use fuse-overlayfs by default
697
+ // https://github.com/kubernetes-sigs/kind/issues/2275
698
+ func (d * dockerRuntime ) mountFuse (info types.Info ) bool {
699
+ for _ , o := range info .SecurityOptions {
700
+ // o is like "name=seccomp,profile=default", or "name=rootless",
701
+ csvReader := csv .NewReader (strings .NewReader (o ))
702
+ sliceSlice , err := csvReader .ReadAll ()
703
+ if err != nil {
704
+ return false
705
+ }
706
+ for _ , f := range sliceSlice {
707
+ for _ , ff := range f {
708
+ if ff == "name=rootless" {
709
+ return true
710
+ }
711
+ }
712
+ }
713
+ }
714
+ return false
715
+ }
716
+
652
717
func isSELinuxEnforcing () bool {
653
718
dat , err := os .ReadFile ("/sys/fs/selinux/enforce" )
654
719
if err != nil {
0 commit comments