diff --git a/INSTALL.md b/INSTALL.md index b5188606..d49ab271 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -23,7 +23,7 @@ curl -sfL https://raw.githubusercontent.com/portainer/kubesolo/develop/install.s # With options curl -sfL https://get.kubesolo.io | sudo sh -s -- \ - --version=v1.1.3 \ + --version=v1.1.4 \ --path=/opt/kubesolo \ --run-mode=service @@ -58,7 +58,7 @@ This script provides basic installation with minimal dependencies for the most c wget -O - https://raw.githubusercontent.com/portainer/kubesolo/develop/install-minimal.sh | sh # Or with environment variables -KUBESOLO_VERSION=v1.1.3 KUBESOLO_PATH=/opt/kubesolo sh install-minimal.sh +KUBESOLO_VERSION=v1.1.4 KUBESOLO_PATH=/opt/kubesolo sh install-minimal.sh ``` **Features:** @@ -93,7 +93,7 @@ chmod +x kubesolo-service.sh All installers support these environment variables: ```bash -export KUBESOLO_VERSION="v1.1.3" # Version to install +export KUBESOLO_VERSION="v1.1.4" # Version to install export KUBESOLO_PATH="/var/lib/kubesolo" # Installation path export KUBESOLO_PORTAINER_EDGE_ID="your-id" # Portainer Edge ID export KUBESOLO_PORTAINER_EDGE_KEY="your-key" # Portainer Edge Key @@ -138,7 +138,7 @@ For air-gapped installations: ```bash # Pre-download the binary -wget https://github.com/portainer/kubesolo/releases/download/v1.1.3/kubesolo-v1.1.3-linux-arm64.tar.gz +wget https://github.com/portainer/kubesolo/releases/download/v1.1.4/kubesolo-v1.1.4-linux-arm64.tar.gz # Extract and install manually tar -xzf kubesolo-*.tar.gz @@ -173,7 +173,7 @@ curl -sfL https://get.kubesolo.io | sudo sh - # Combined with other options curl -sfL https://get.kubesolo.io | sudo sh -s -- \ --proxy=http://proxy.company.com:8080 \ - --version=v1.1.3 \ + --version=v1.1.4 \ --path=/opt/kubesolo ``` diff --git a/cmd/kubesolo/main.go b/cmd/kubesolo/main.go index b77987ad..135055a8 100644 --- a/cmd/kubesolo/main.go +++ b/cmd/kubesolo/main.go @@ -48,6 +48,8 @@ type kubesolo struct { loadBalancer bool localStorage bool localStorageSharedPath string + fullMode bool + disableIPv6 bool embedded types.Embedded } @@ -74,6 +76,8 @@ func service() (*kubesolo, error) { loadBalancer: *flags.LoadBalancer, localStorage: *flags.LocalStorage, localStorageSharedPath: *flags.LocalStorageSharedPath, + fullMode: *flags.Full, + disableIPv6: *flags.DisableIPv6, }, nil } @@ -114,10 +118,16 @@ func (s *kubesolo) run() { cancel() }() + profile := "edge" + if s.fullMode { + profile = "full" + } + log.Info(). Str("version", Version). Str("build-date", BuildDate). Str("commit", Commit). + Str("profile", profile). Msg("starting kubesolo...") log.Info().Str("component", "kubesolo").Msg("ensuring all embedded dependencies are available...") @@ -131,11 +141,14 @@ func (s *kubesolo) run() { } log.Info().Str("component", "kubesolo").Msg("starting kubesolo services... this may take a few minutes...") - services := []struct { + type service struct { name string start func() readyCh chan struct{} - }{ + } + + // infraServices must be fully ready before pod masquerade is set up. + infraServices := []service{ { name: "containerd", start: func() { @@ -176,6 +189,10 @@ func (s *kubesolo) run() { }, readyCh: controllerReadyCh, }, + } + + // nodeServices start after masquerade is guaranteed to be in place. + nodeServices := []service{ { name: "kubelet", start: func() { @@ -189,7 +206,7 @@ func (s *kubesolo) run() { { name: "kubeproxy", start: func() { - kubeproxyService := kubeproxy.NewService(ctx, cancel, kubeproxyReadyCh, s.embedded.AdminKubeconfigFile) + kubeproxyService := kubeproxy.NewService(ctx, cancel, kubeproxyReadyCh, s.embedded.AdminKubeconfigFile, s.embedded.FullMode) s.wg.Go(func() { kubeproxyService.Run(kubeletReadyCh) }) @@ -198,7 +215,23 @@ func (s *kubesolo) run() { }, } - for _, svc := range services { + for _, svc := range infraServices { + log.Info().Str("component", "kubesolo").Msgf("starting %s...", svc.name) + svc.start() + if !waitForService(ctx, svc.name, svc.readyCh) { + return + } + } + + // Ensure pod→external masquerade (SNAT) is in place before kubelet starts. + // kine persists cluster state across reboots, so kubelet will immediately + // reconcile existing pods — they must not start into a network with no SNAT. + log.Info().Str("component", "kubesolo").Msg("setting up pod masquerade rules...") + if err := network.EnsurePodMasquerade(types.DefaultPodCIDR); err != nil { + log.Fatal().Err(err).Msg("failed to set up pod masquerade") + } + + for _, svc := range nodeServices { log.Info().Str("component", "kubesolo").Msgf("starting %s...", svc.name) svc.start() if !waitForService(ctx, svc.name, svc.readyCh) { @@ -207,7 +240,7 @@ func (s *kubesolo) run() { } log.Info().Str("component", "kubesolo").Msg("deploying coredns...") - if err := coredns.Deploy(s.embedded.AdminKubeconfigFile); err != nil { + if err := coredns.Deploy(s.embedded.AdminKubeconfigFile, s.embedded.DisableIPv6); err != nil { log.Fatal().Err(err).Msg("failed to deploy coredns") } @@ -272,6 +305,10 @@ func cleanStaleState(basePath string) { if name == "containerd" || name == "containerd-shim-runc-v2" || name == "crun" { continue } + // Preserve registry + if name == "registry" { + continue + } target := filepath.Join(containerdDir, name) if err := os.RemoveAll(target); err == nil { @@ -397,12 +434,12 @@ func (s *kubesolo) bootstrap() { }, // Containerd paths - ContainerdDir: filepath.Join(basePath, types.DefaultContainerdDir), - ContainerdSocketFile: filepath.Join(basePath, types.DefaultContainerdDir, types.DefaultContainerdSocket), - ContainerdBinaryFile: filepath.Join(basePath, types.DefaultContainerdDir, "containerd"), - ContainerdImagesDir: filepath.Join(basePath, types.DefaultContainerdDir, "images"), - ContainerdShimBinaryFile: filepath.Join(basePath, types.DefaultContainerdDir, "containerd-shim-runc-v2"), - ContainerdConfigFile: filepath.Join(basePath, types.DefaultContainerdDir, "config.toml"), + ContainerdDir: filepath.Join(basePath, types.DefaultContainerdDir), + ContainerdSocketFile: filepath.Join(basePath, types.DefaultContainerdDir, types.DefaultContainerdSocket), + ContainerdBinaryFile: filepath.Join(basePath, types.DefaultContainerdDir, "containerd"), + ContainerdImagesDir: filepath.Join(basePath, types.DefaultContainerdDir, "images"), + ContainerdShimBinaryFile: filepath.Join(basePath, types.DefaultContainerdDir, "containerd-shim-runc-v2"), + ContainerdConfigFile: filepath.Join(basePath, types.DefaultContainerdDir, "config.toml"), ContainerdRootDir: filepath.Join(basePath, types.DefaultContainerdDir, "root"), ContainerdStateDir: filepath.Join(basePath, types.DefaultContainerdDir, "state"), ContainerdRegistryConfigDir: filepath.Join(basePath, types.DefaultContainerdDir, "registry"), @@ -453,5 +490,11 @@ func (s *kubesolo) bootstrap() { // Portainer Edge IsPortainerEdge: s.portainerEdgeID != "" && s.portainerEdgeKey != "", + + // Full mode + FullMode: s.fullMode, + + // IPv6 + DisableIPv6: s.disableIPv6, } } diff --git a/install-minimal.sh b/install-minimal.sh index 2ad8ae81..ab432d93 100644 --- a/install-minimal.sh +++ b/install-minimal.sh @@ -22,7 +22,7 @@ case $ARCH in esac # Configuration -KUBESOLO_VERSION="${KUBESOLO_VERSION:-v1.1.3}" +KUBESOLO_VERSION="${KUBESOLO_VERSION:-v1.1.4}" CONFIG_PATH="${KUBESOLO_PATH:-/var/lib/kubesolo}" INSTALL_PATH="/usr/local/bin/kubesolo" USE_MUSL="${USE_MUSL:-false}" diff --git a/install.sh b/install.sh index 15b8c1b0..1294c7d6 100644 --- a/install.sh +++ b/install.sh @@ -1060,7 +1060,7 @@ install_binary() { # ── Script entry point ──────────────────────────────────────────────────────── # Default configuration from environment variables -KUBESOLO_VERSION="${KUBESOLO_VERSION:-v1.1.3}" +KUBESOLO_VERSION="${KUBESOLO_VERSION:-v1.1.4}" CONFIG_PATH="${KUBESOLO_PATH:-/var/lib/kubesolo}" APISERVER_EXTRA_SANS="${KUBESOLO_APISERVER_EXTRA_SANS:-}" PORTAINER_EDGE_ID="${KUBESOLO_PORTAINER_EDGE_ID:-}" diff --git a/internal/config/flags/flags.go b/internal/config/flags/flags.go index 52f37c6b..cd029774 100644 --- a/internal/config/flags/flags.go +++ b/internal/config/flags/flags.go @@ -25,4 +25,6 @@ var ( LocalStorageSharedPath = Application.Flag("local-storage-shared-path", "Path to the shared file system for the local storage. Defaults to empty string.").Envar("KUBESOLO_LOCAL_STORAGE_SHARED_PATH").Default("").String() Debug = Application.Flag("debug", "Enable debug logging. Defaults to false.").Envar("KUBESOLO_DEBUG").Default("false").Bool() PprofServer = Application.Flag("pprof-server", "Enable pprof server. Defaults to false.").Envar("KUBESOLO_PPROF_SERVER").Default("false").Bool() + Full = Application.Flag("full", "Disable memory-saving overrides and use upstream Kubernetes defaults. Kubesolo still uses NodeSetter in favour of the scheduler. Recommended for CI and developer environments where memory is not constrained. Leave unset for edge deployments.").Envar("KUBESOLO_FULL").Default("false").Bool() + DisableIPv6 = Application.Flag("disable-ipv6", "Disable IPv6 support. When set, CoreDNS will not serve ip6.arpa reverse zones and kubelet will register with an explicit IPv4 node address. Defaults to false.").Envar("KUBESOLO_DISABLE_IPV6").Default("false").Bool() ) diff --git a/internal/core/embedded/config.go b/internal/core/embedded/config.go index 0d850a93..06a58da9 100644 --- a/internal/core/embedded/config.go +++ b/internal/core/embedded/config.go @@ -12,7 +12,7 @@ func generateCNIConfigFile() map[string]any { "type": "bridge", "bridge": "cni0", "isGateway": true, - "ipMasq": true, + "ipMasq": false, "hairpinMode": true, "capabilities": map[string]any{ "portMappings": true, diff --git a/internal/runtime/network/masquerade.go b/internal/runtime/network/masquerade.go new file mode 100644 index 00000000..5dd878a4 --- /dev/null +++ b/internal/runtime/network/masquerade.go @@ -0,0 +1,118 @@ +package network + +import ( + "fmt" + "os" + "os/exec" + "strings" + + "github.com/portainer/kubesolo/types" + "github.com/rs/zerolog/log" +) + +const masqueradeComment = "kubesolo: pod masquerade" + +// EnsurePodMasquerade programs a SNAT/masquerade rule for pod egress traffic so +// pods can reach external IPs. It is idempotent and mode-aware: iptables on +// systems that have the ip_tables kernel module, nftables otherwise. +// +// This must be called before kubelet starts (so no pod ever starts without SNAT +// in place) and after flushNftablesNat (to restore the rule after the nat table +// is flushed for kube-proxy compatibility). +func EnsurePodMasquerade(podCIDR string) error { + if _, err := os.Stat("/proc/net/ip_tables_names"); err == nil { + return ensureIPTablesMasquerade(podCIDR) + } + return ensureNftablesMasquerade(podCIDR) +} + +func ensureIPTablesMasquerade(podCIDR string) error { + // -w 5: wait up to 5 s for the xtables lock. This function is called during + // kube-proxy startup when other processes may also be modifying iptables; + // without -w, concurrent access causes "Resource temporarily unavailable". + args := []string{ + "-w", "5", + "-t", "nat", "-C", "POSTROUTING", + "-s", podCIDR, "!", "-d", podCIDR, + "-m", "comment", "--comment", masqueradeComment, + "-j", "MASQUERADE", + } + + if err := exec.Command("iptables", args...).Run(); err == nil { + log.Debug().Str("component", "network").Msg("pod masquerade rule already present (iptables)") + return nil + } + + args[4] = "-A" + if out, err := exec.Command("iptables", args...).CombinedOutput(); err != nil { + return fmt.Errorf("iptables: failed to add pod masquerade rule: %v (output: %s)", err, out) + } + + log.Info().Str("component", "network"). + Str("cidr", podCIDR). + Msg("added pod masquerade rule (iptables)") + return nil +} + +func nftCombinedOutput(args ...string) ([]byte, error) { + return exec.Command("nft", args...).CombinedOutput() +} + +func nftAlreadyExists(out []byte) bool { + msg := strings.ToLower(string(out)) + return strings.Contains(msg, "file exists") || strings.Contains(msg, "already exists") +} + +// ensureNftObject ensures an nftables object (table or chain) exists. It tries +// listArgs first; if the object is absent it runs addArgs, tolerating a +// concurrent creation racing us to it. +func ensureNftObject(listArgs, addArgs []string) error { + if _, err := nftCombinedOutput(listArgs...); err == nil { + return nil + } + if out, err := nftCombinedOutput(addArgs...); err != nil && !nftAlreadyExists(out) { + return fmt.Errorf("nft %s: %v (output: %s)", strings.Join(addArgs, " "), err, out) + } + return nil +} + +func ensureNftablesMasquerade(podCIDR string) error { + if err := ensureNftObject( + []string{"list", "table", "ip", types.DefaultNftMasqTable}, + []string{"add", "table", "ip", types.DefaultNftMasqTable}, + ); err != nil { + return err + } + + if err := ensureNftObject( + []string{"list", "chain", "ip", types.DefaultNftMasqTable, "postrouting"}, + []string{"add", "chain", "ip", types.DefaultNftMasqTable, "postrouting", + "{ type nat hook postrouting priority srcnat; policy accept; }"}, + ); err != nil { + return err + } + + out, err := nftCombinedOutput("list", "chain", "ip", types.DefaultNftMasqTable, "postrouting") + if err != nil { + return fmt.Errorf("nft list chain ip %s postrouting: %v (output: %s)", types.DefaultNftMasqTable, err, out) + } + if strings.Contains(string(out), masqueradeComment) { + log.Debug().Str("component", "network").Msg("pod masquerade rule already present (nftables)") + return nil + } + + args := []string{ + "add", "rule", "ip", types.DefaultNftMasqTable, "postrouting", + "ip", "saddr", podCIDR, "ip", "daddr", "!=", podCIDR, + "masquerade", "comment", `"` + masqueradeComment + `"`, + } + if out, err := nftCombinedOutput(args...); err != nil { + return fmt.Errorf("nft %s: %v (output: %s)", strings.Join(args, " "), err, out) + } + + log.Info().Str("component", "network"). + Str("cidr", podCIDR). + Str("table", types.DefaultNftMasqTable). + Msg("added pod masquerade rule (nftables)") + return nil +} diff --git a/pkg/components/coredns/configuration.go b/pkg/components/coredns/configuration.go index 99bb1143..92e6ac78 100644 --- a/pkg/components/coredns/configuration.go +++ b/pkg/components/coredns/configuration.go @@ -9,8 +9,25 @@ import ( "k8s.io/client-go/kubernetes" ) -// CoreDNSConfig contains minimal CoreDNS Corefile configuration -const CoreDNSConfig = `.:53 { +const coreDNSConfigIPv4Only = `.:53 { + errors + loop + cache 30 { + disable denial cluster.local + } + kubernetes cluster.local in-addr.arpa { + pods insecure + fallthrough in-addr.arpa + ttl 30 + } + forward . /etc/resolv.conf + minimal + reload + health :8080 + ready :8181 +}` + +const coreDNSConfigDualStack = `.:53 { errors loop cache 30 { @@ -28,18 +45,25 @@ const CoreDNSConfig = `.:53 { ready :8181 }` +func coreDNSConfig(disableIPv6 bool) string { + if disableIPv6 { + return coreDNSConfigIPv4Only + } + return coreDNSConfigDualStack +} + // createConfigMap creates a configMap with the bare minimum CoreDNS configuration // it creates a new configmap if it does not exist // it updates the configmap if it already exists // it returns an error if it fails -func createConfigMap(ctx context.Context, clientset *kubernetes.Clientset) error { +func createConfigMap(ctx context.Context, clientset *kubernetes.Clientset, disableIPv6 bool) error { configMap := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: coreDNSConfigMapName, Namespace: coreDNSNamespace, }, Data: map[string]string{ - "Corefile": CoreDNSConfig, + "Corefile": coreDNSConfig(disableIPv6), }, } diff --git a/pkg/components/coredns/coredns.go b/pkg/components/coredns/coredns.go index 67cb3a07..81dcae68 100644 --- a/pkg/components/coredns/coredns.go +++ b/pkg/components/coredns/coredns.go @@ -22,7 +22,7 @@ const ( ) // Deploy deploys all the necessary Kubernetes resources for CoreDNS -func Deploy(adminKubeconfig string) error { +func Deploy(adminKubeconfig string, disableIPv6 bool) error { time.Sleep(types.DefaultComponentSleep) ctx, cancel := context.WithTimeout(context.Background(), types.DefaultContextTimeout) @@ -33,7 +33,7 @@ func Deploy(adminKubeconfig string) error { return fmt.Errorf("failed to create kubernetes client: %v", err) } - if err := createConfigMap(ctx, clientset); err != nil { + if err := createConfigMap(ctx, clientset, disableIPv6); err != nil { return fmt.Errorf("failed to create CoreDNS ConfigMap: %v", err) } diff --git a/pkg/kubernetes/apiserver/flags.go b/pkg/kubernetes/apiserver/flags.go index 20a94b50..827b696b 100644 --- a/pkg/kubernetes/apiserver/flags.go +++ b/pkg/kubernetes/apiserver/flags.go @@ -10,16 +10,11 @@ func (s *service) configureAPIServerFlags(command *cobra.Command) error { // networking settings _ = flags.Set("insecure-port", "0") - _ = flags.Set("secure-port", "6443") - _ = flags.Set("bind-address", "0.0.0.0") _ = flags.Set("advertise-address", s.nodeIP) _ = flags.Set("service-cluster-ip-range", types.DefaultServiceClusterIPRange) // etcd configuration _ = flags.Set("etcd-servers", types.DefaultKineEndpoint) - _ = flags.Set("etcd-compaction-interval", "5m") - _ = flags.Set("etcd-count-metric-poll-period", "0") - _ = flags.Set("etcd-db-metric-poll-interval", "0") // security and certificates _ = flags.Set("cert-dir", s.pkiAPIServerDir) @@ -40,33 +35,40 @@ func (s *service) configureAPIServerFlags(command *cobra.Command) error { _ = flags.Set("proxy-client-cert-file", s.requestHeaderClientCert) _ = flags.Set("proxy-client-key-file", s.requestHeaderClientKey) - // authorization and admission + // authorization _ = flags.Set("allow-privileged", "true") _ = flags.Set("authorization-mode", "Node,RBAC") - _ = flags.Set("enable-admission-plugins", "NodeRestriction,ServiceAccount,ValidatingAdmissionWebhook,MutatingAdmissionWebhook,DefaultStorageClass,CertificateApproval,CertificateSigning,CertificateSubjectRestriction,ValidatingAdmissionPolicy,MutatingAdmissionPolicy") - _ = flags.Set("disable-admission-plugins", "RuntimeClass,PodSecurity,ClusterTrustBundleAttest,DefaultIngressClass,TaintNodesByCondition,DefaultTolerationSeconds,StorageObjectInUseProtection,PersistentVolumeClaimResize,ResourceQuota,LimitRanger,Priority") - _ = flags.Set("enable-bootstrap-token-auth", "false") - - // performance and resource limits - _ = flags.Set("max-requests-inflight", "2000") - _ = flags.Set("max-mutating-requests-inflight", "1000") - _ = flags.Set("min-request-timeout", "180") - _ = flags.Set("request-timeout", "900s") - _ = flags.Set("kubelet-timeout", "30s") - _ = flags.Set("watch-cache", "true") - _ = flags.Set("event-ttl", "1h") - - // features and garbage collection - _ = flags.Set("enable-garbage-collector", "true") - _ = flags.Set("profiling", "false") // feature gates - disable SizeBasedListCostEstimate to suppress "Error getting keys" messages _ = flags.Set("feature-gates", "SizeBasedListCostEstimate=false") - // audit logging - _ = flags.Set("audit-log-path", "-") - _ = flags.Set("audit-log-maxage", "0") - _ = flags.Set("audit-log-maxbackup", "0") - _ = flags.Set("audit-log-maxsize", "0") + // Edge-optimised overrides — only applied when not in full mode. + // When full mode is enabled, upstream Kubernetes defaults are used instead. + if !s.fullMode { + // etcd metric collection + _ = flags.Set("etcd-count-metric-poll-period", "0") + _ = flags.Set("etcd-db-metric-poll-interval", "0") + + // request throttling and timeouts + _ = flags.Set("max-requests-inflight", "2000") + _ = flags.Set("max-mutating-requests-inflight", "1000") + _ = flags.Set("min-request-timeout", "180") + _ = flags.Set("request-timeout", "900s") + _ = flags.Set("kubelet-timeout", "30s") + + // diagnostics + _ = flags.Set("profiling", "false") + + // admission control + _ = flags.Set("enable-admission-plugins", "NodeRestriction,ServiceAccount,ValidatingAdmissionWebhook,MutatingAdmissionWebhook,DefaultStorageClass,CertificateApproval,CertificateSigning,CertificateSubjectRestriction,ValidatingAdmissionPolicy,MutatingAdmissionPolicy") + _ = flags.Set("disable-admission-plugins", "RuntimeClass,PodSecurity,ClusterTrustBundleAttest,DefaultIngressClass,TaintNodesByCondition,DefaultTolerationSeconds,StorageObjectInUseProtection,PersistentVolumeClaimResize,ResourceQuota,LimitRanger,Priority") + + // audit logging + _ = flags.Set("audit-log-path", "-") + _ = flags.Set("audit-log-maxage", "0") + _ = flags.Set("audit-log-maxbackup", "0") + _ = flags.Set("audit-log-maxsize", "0") + } + return nil } diff --git a/pkg/kubernetes/apiserver/service.go b/pkg/kubernetes/apiserver/service.go index cfe26d43..951b40bc 100644 --- a/pkg/kubernetes/apiserver/service.go +++ b/pkg/kubernetes/apiserver/service.go @@ -28,6 +28,7 @@ type service struct { requestHeaderCAFile string requestHeaderClientCert string requestHeaderClientKey string + fullMode bool kubeSoloWebhook *webhook.Service } @@ -51,6 +52,7 @@ func NewService(ctx context.Context, cancel context.CancelFunc, apiServerReady c requestHeaderCAFile: embedded.RequestHeaderCerts.CACert, requestHeaderClientCert: embedded.RequestHeaderCerts.ClientCert, requestHeaderClientKey: embedded.RequestHeaderCerts.ClientKey, + fullMode: embedded.FullMode, kubeSoloWebhook: webhook.NewService(nodeName, embedded.NodeIP, embedded.PKIDir, embedded.AdminKubeconfigFile, embedded.LoadBalancer), } } diff --git a/pkg/kubernetes/controller/flags.go b/pkg/kubernetes/controller/flags.go index b179a269..a7eab37e 100644 --- a/pkg/kubernetes/controller/flags.go +++ b/pkg/kubernetes/controller/flags.go @@ -9,11 +9,8 @@ func (s *service) configureControllerManagerFlags(command *cobra.Command) { flags := command.Flags() // controller manager settings - _ = flags.Set("bind-address", "0.0.0.0") - _ = flags.Set("secure-port", "10257") _ = flags.Set("allocate-node-cidrs", "true") _ = flags.Set("cluster-cidr", types.DefaultPodCIDR) - _ = flags.Set("v", "0") _ = flags.Set("service-account-private-key-file", s.serviceAccountKeyFile) _ = flags.Set("kubeconfig", s.adminKubeconfigFile) _ = flags.Set("authentication-kubeconfig", s.adminKubeconfigFile) @@ -23,50 +20,51 @@ func (s *service) configureControllerManagerFlags(command *cobra.Command) { _ = flags.Set("tls-cert-file", s.controllerManagerCertFile) _ = flags.Set("tls-private-key-file", s.controllerManagerKeyFile) _ = flags.Set("leader-elect", "false") - _ = flags.Set("profiling", "false") _ = flags.Set("use-service-account-credentials", "true") - // controllers - _ = flags.Set("controllers", "deployment,replicaset,service,serviceaccount,namespace,attachdetach,endpoint,daemonset,statefulset,root-ca-certificate-publisher-controller,serviceaccount-token-controller,node-ipam-controller,endpointslice-controller,persistentvolume-binder-controller,job-controller,cronjob-controller,garbage-collector-controller,disruption,csrsigning,clusterrole-aggregation") + // Edge-optimised overrides — only applied when not in full mode. + // When full mode is enabled, upstream Kubernetes defaults are used instead. + if !s.fullMode { + // controllers + _ = flags.Set("controllers", "deployment,replicaset,service,serviceaccount,namespace,attachdetach,endpoint,daemonset,statefulset,root-ca-certificate-publisher-controller,serviceaccount-token-controller,node-ipam-controller,endpointslice-controller,persistentvolume-binder-controller,job-controller,cronjob-controller,garbage-collector-controller,disruption,csrsigning,clusterrole-aggregation") - // thresholds - _ = flags.Set("terminated-pod-gc-threshold", "20") - _ = flags.Set("large-cluster-size-threshold", "10") - _ = flags.Set("unhealthy-zone-threshold", "0.7") + _ = flags.Set("profiling", "false") + _ = flags.Set("terminated-pod-gc-threshold", "20") + _ = flags.Set("large-cluster-size-threshold", "10") + _ = flags.Set("unhealthy-zone-threshold", "0.7") - // sync settings - _ = flags.Set("concurrent-deployment-syncs", "2") - _ = flags.Set("concurrent-replicaset-syncs", "2") - _ = flags.Set("concurrent-daemonset-syncs", "2") - _ = flags.Set("concurrent-job-syncs", "2") - _ = flags.Set("concurrent-endpoint-syncs", "2") - _ = flags.Set("concurrent-service-endpoint-syncs", "2") - _ = flags.Set("concurrent-gc-syncs", "2") - _ = flags.Set("concurrent-namespace-syncs", "2") - _ = flags.Set("concurrent-cron-job-syncs", "2") - _ = flags.Set("concurrent-horizontal-pod-autoscaler-syncs", "2") - _ = flags.Set("concurrent-rc-syncs", "2") - _ = flags.Set("concurrent-resource-quota-syncs", "2") - _ = flags.Set("concurrent-service-syncs", "2") - _ = flags.Set("concurrent-serviceaccount-token-syncs", "2") - _ = flags.Set("concurrent-statefulset-syncs", "2") - _ = flags.Set("concurrent-ttl-after-finished-syncs", "2") - _ = flags.Set("concurrent-ephemeralvolume-syncs", "2") - _ = flags.Set("concurrent-validating-admission-policy-status-syncs", "2") - _ = flags.Set("mirroring-concurrent-service-endpoint-syncs", "2") + // sync settings + _ = flags.Set("concurrent-deployment-syncs", "2") + _ = flags.Set("concurrent-replicaset-syncs", "2") + _ = flags.Set("concurrent-job-syncs", "2") + _ = flags.Set("concurrent-endpoint-syncs", "2") + _ = flags.Set("concurrent-service-endpoint-syncs", "2") + _ = flags.Set("concurrent-gc-syncs", "2") + _ = flags.Set("concurrent-namespace-syncs", "2") + _ = flags.Set("concurrent-cron-job-syncs", "2") + _ = flags.Set("concurrent-horizontal-pod-autoscaler-syncs", "2") + _ = flags.Set("concurrent-rc-syncs", "2") + _ = flags.Set("concurrent-resource-quota-syncs", "2") + _ = flags.Set("concurrent-service-syncs", "2") + _ = flags.Set("concurrent-serviceaccount-token-syncs", "2") + _ = flags.Set("concurrent-statefulset-syncs", "2") + _ = flags.Set("concurrent-ttl-after-finished-syncs", "2") + _ = flags.Set("concurrent-ephemeralvolume-syncs", "2") + _ = flags.Set("concurrent-validating-admission-policy-status-syncs", "2") + _ = flags.Set("mirroring-concurrent-service-endpoint-syncs", "2") - // sync period - _ = flags.Set("horizontal-pod-autoscaler-sync-period", "60s") - _ = flags.Set("node-monitor-period", "60s") - _ = flags.Set("pvclaimbinder-sync-period", "120s") - _ = flags.Set("resource-quota-sync-period", "15m") - _ = flags.Set("namespace-sync-period", "15m") - _ = flags.Set("route-reconciliation-period", "60s") - _ = flags.Set("attach-detach-reconcile-sync-period", "10m") - _ = flags.Set("node-monitor-grace-period", "300s") - _ = flags.Set("min-resync-period", "12h") + // sync period + _ = flags.Set("horizontal-pod-autoscaler-sync-period", "60s") + _ = flags.Set("node-monitor-period", "60s") + _ = flags.Set("pvclaimbinder-sync-period", "120s") + _ = flags.Set("resource-quota-sync-period", "15m") + _ = flags.Set("namespace-sync-period", "15m") + _ = flags.Set("route-reconciliation-period", "60s") + _ = flags.Set("attach-detach-reconcile-sync-period", "10m") + _ = flags.Set("node-monitor-grace-period", "300s") - // api server interactions - _ = flags.Set("kube-api-qps", "50") - _ = flags.Set("kube-api-burst", "100") + // api server interactions + _ = flags.Set("kube-api-qps", "50") + _ = flags.Set("kube-api-burst", "100") + } } diff --git a/pkg/kubernetes/controller/service.go b/pkg/kubernetes/controller/service.go index b46a9803..96705a5c 100644 --- a/pkg/kubernetes/controller/service.go +++ b/pkg/kubernetes/controller/service.go @@ -19,6 +19,7 @@ type service struct { caFile string adminKubeconfigFile string serviceAccountKeyFile string + fullMode bool } // NewService creates a new controller service @@ -33,5 +34,6 @@ func NewService(ctx context.Context, cancel context.CancelFunc, controllerReady caFile: embedded.CACerts.Cert, adminKubeconfigFile: embedded.AdminKubeconfigFile, serviceAccountKeyFile: embedded.ServiceAccountKeyFile, + fullMode: embedded.FullMode, } } diff --git a/pkg/kubernetes/kubelet/args.go b/pkg/kubernetes/kubelet/args.go index 2ca21012..fb2a8421 100644 --- a/pkg/kubernetes/kubelet/args.go +++ b/pkg/kubernetes/kubelet/args.go @@ -8,6 +8,7 @@ func (s *service) configureKubeletArgs(command *cobra.Command) { command.SetArgs([]string{ "--config", s.kubeletConfigFile, "--hostname-override", s.nodeName, + "--node-ip", s.nodeIP, "--root-dir", s.kubeletDir, "--kubeconfig", s.kubeletKubeConfigFile, }) diff --git a/pkg/kubernetes/kubelet/config.go b/pkg/kubernetes/kubelet/config.go index 24703371..60fc3f50 100644 --- a/pkg/kubernetes/kubelet/config.go +++ b/pkg/kubernetes/kubelet/config.go @@ -51,13 +51,11 @@ func (s *service) writeKubeletConfigFile() error { } func (s *service) generateKubeletConfig() map[string]any { - return map[string]any{ - "kind": "KubeletConfiguration", - "apiVersion": "kubelet.config.k8s.io/v1beta1", - "enableServer": true, + config := map[string]any{ + "kind": "KubeletConfiguration", + "apiVersion": "kubelet.config.k8s.io/v1beta1", "containerRuntimeEndpoint": "unix://" + s.containerdSockFile, - "imageServiceEndpoint": "unix://" + s.containerdSockFile, "authentication": map[string]any{ "anonymous": map[string]any{ @@ -88,52 +86,42 @@ func (s *service) generateKubeletConfig() map[string]any { "cgroupDriver": cgroupDriver(), - "registerNode": true, - "readOnlyPort": 0, - "port": 10250, - "syncFrequency": "5m0s", - "fileCheckFrequency": "2m0s", - "httpCheckFrequency": "2m0s", - "nodeStatusUpdateFrequency": "60s", - "nodeStatusReportFrequency": "15m0s", - "volumeStatsAggPeriod": "5m0s", - "imageMinimumGCAge": "10m0s", - "imageMaximumGCAge": "0s", - "imageGCHighThresholdPercent": 95, - "imageGCLowThresholdPercent": 80, - "runtimeRequestTimeout": "60s", - "cpuManagerReconcilePeriod": "60s", - "streamingConnectionIdleTimeout": "1h0m0s", - "rotateCertificates": true, - - "registerWithTaints": []map[string]any{}, - - "evictionHard": map[string]string{ - "memory.available": "75Mi", - "nodefs.available": "50Mi", - }, - "systemReserved": map[string]string{"memory": "25Mi"}, - "kubeReserved": map[string]string{"memory": "25Mi"}, - "failSwapOn": false, - - "kubeAPIQPS": 10, - "kubeAPIBurst": 20, - "serializeImagePulls": true, - "imagePullProgressDeadline": "1m", - - "registryPullQPS": 5, - "registryBurst": 10, - - "eventRecordQPS": 5, - "eventBurst": 10, + "readOnlyPort": 0, + "rotateCertificates": true, - "containerLogMaxSize": "512Ki", - "enableProfilingHandler": false, - "enableDebugFlagsHandler": false, - "maxPods": 20, + "failSwapOn": false, + } - "featureGates": map[string]bool{ - "RotateKubeletServerCertificate": true, - }, + // Edge-optimised overrides — only applied when not in full mode. + // When full mode is enabled, upstream Kubernetes defaults are used instead. + if !s.fullMode { + config["enableProfilingHandler"] = false + config["enableDebugFlagsHandler"] = false + config["streamingConnectionIdleTimeout"] = "1h0s" + config["syncFrequency"] = "5m0s" + config["fileCheckFrequency"] = "2m0s" + config["httpCheckFrequency"] = "2m0s" + config["nodeStatusUpdateFrequency"] = "60s" + config["nodeStatusReportFrequency"] = "15m0s" + config["volumeStatsAggPeriod"] = "5m0s" + config["imageMinimumGCAge"] = "10m0s" + config["imageMaximumGCAge"] = "0s" + config["imageGCHighThresholdPercent"] = 95 + config["runtimeRequestTimeout"] = "60s" + config["cpuManagerReconcilePeriod"] = "60s" + config["kubeAPIQPS"] = 10 + config["kubeAPIBurst"] = 20 + config["eventRecordQPS"] = 5 + config["eventBurst"] = 10 + config["containerLogMaxSize"] = "512Ki" + config["maxPods"] = 20 + config["evictionHard"] = map[string]string{ + "memory.available": "75Mi", + "nodefs.available": "50Mi", + } + config["systemReserved"] = map[string]string{"memory": "25Mi"} + config["kubeReserved"] = map[string]string{"memory": "25Mi"} } + + return config } diff --git a/pkg/kubernetes/kubelet/service.go b/pkg/kubernetes/kubelet/service.go index 609ebeb0..bb6e3125 100644 --- a/pkg/kubernetes/kubelet/service.go +++ b/pkg/kubernetes/kubelet/service.go @@ -28,6 +28,7 @@ type service struct { nodeIP string kubeletCertPath string adminKubeconfig string + fullMode bool } // NewService creates a new kubelet service @@ -49,5 +50,6 @@ func NewService(ctx context.Context, cancel context.CancelFunc, kubeletReady cha keyFile: embedded.KubeletCerts.Key, nodeName: system.GetHostname(), adminKubeconfig: embedded.AdminKubeconfigFile, + fullMode: embedded.FullMode, } } diff --git a/pkg/kubernetes/kubeproxy/executor.go b/pkg/kubernetes/kubeproxy/executor.go index b2d3256f..7e0b34df 100644 --- a/pkg/kubernetes/kubeproxy/executor.go +++ b/pkg/kubernetes/kubeproxy/executor.go @@ -7,6 +7,7 @@ import ( "syscall" "time" + "github.com/portainer/kubesolo/internal/runtime/network" kubesoloservice "github.com/portainer/kubesolo/internal/runtime/service" "github.com/portainer/kubesolo/types" "github.com/rs/zerolog/log" @@ -25,6 +26,12 @@ func flushNftablesNat() { return } log.Info().Str("component", "kubeproxy").Msg("flushed nftables ip nat table to avoid iptables-nft conflicts") + + // The flush above wipes CNI masquerade rules for already-running pods. + // Re-add immediately so the gap where pods have no SNAT is negligible. + if err := network.EnsurePodMasquerade(types.DefaultPodCIDR); err != nil { + log.Warn().Str("component", "kubeproxy").Msgf("failed to restore pod masquerade after nat flush: %v", err) + } } // Run starts the kube proxy in the following order: @@ -83,6 +90,12 @@ func (s *service) postSetup() error { s.cancelShutdown() return err } + // Re-verify masquerade is in place. flushNftablesNat may have run before + // kube-proxy's own chains were programmed; this ensures kubeproxyReady only + // fires once SNAT for pod egress is confirmed. + if err := network.EnsurePodMasquerade(types.DefaultPodCIDR); err != nil { + log.Error().Str("component", "kubeproxy").Msgf("failed to ensure pod masquerade: %v", err) + } return nil } diff --git a/pkg/kubernetes/kubeproxy/flags.go b/pkg/kubernetes/kubeproxy/flags.go index 50bb12d3..9fa4f2fd 100644 --- a/pkg/kubernetes/kubeproxy/flags.go +++ b/pkg/kubernetes/kubeproxy/flags.go @@ -31,16 +31,17 @@ func (s *service) configureKubeProxyFlags(command *cobra.Command) { // performance settings _ = flags.Set("oom-score-adj", "-998") - _ = flags.Set("profiling", "false") // proxy mode and conntrack settings _ = flags.Set("proxy-mode", proxyMode) - _ = flags.Set("conntrack-max-per-core", "1024") - _ = flags.Set("conntrack-min", "1024") - _ = flags.Set("min-sync-period", "10s") + if !s.fullMode { + _ = flags.Set("profiling", "false") + _ = flags.Set("conntrack-max-per-core", "1024") + _ = flags.Set("conntrack-min", "1024") + _ = flags.Set("min-sync-period", "10s") + } if proxyMode == "iptables" { - _ = flags.Set("iptables-masquerade-bit", "14") _ = flags.Set("masquerade-all", "true") } } diff --git a/pkg/kubernetes/kubeproxy/service.go b/pkg/kubernetes/kubeproxy/service.go index 4409bcfc..ded3fd4c 100644 --- a/pkg/kubernetes/kubeproxy/service.go +++ b/pkg/kubernetes/kubeproxy/service.go @@ -12,14 +12,16 @@ type service struct { cancel context.CancelFunc kubeproxyReady chan<- struct{} adminKubeconfigFile string + fullMode bool } // NewService creates a new kube proxy service -func NewService(ctx context.Context, cancel context.CancelFunc, kubeproxyReady chan<- struct{}, adminKubeconfigFile string) *service { +func NewService(ctx context.Context, cancel context.CancelFunc, kubeproxyReady chan<- struct{}, adminKubeconfigFile string, fullMode bool) *service { return &service{ ctx: ctx, cancel: cancel, kubeproxyReady: kubeproxyReady, adminKubeconfigFile: adminKubeconfigFile, + fullMode: fullMode, } } diff --git a/pkg/runtime/containerd/config.go b/pkg/runtime/containerd/config.go index d785d0a8..99fc040c 100644 --- a/pkg/runtime/containerd/config.go +++ b/pkg/runtime/containerd/config.go @@ -57,77 +57,24 @@ func (s *service) writeContainerdConfigFile() error { // generateConfig generates the containerd config func (s *service) generateContainerdConfig() map[string]any { return map[string]any{ - "version": 3, - "root": s.containerdRootDir, - "state": s.containerdStateDir, - "temp": "", - "plugin_dir": "", - "disabled_plugins": []string{}, - "required_plugins": []string{}, - "oom_score": 0, - "imports": []string{types.DefaultContainerdConfigDir + "/*.toml"}, + "version": 3, + "root": s.containerdRootDir, + "state": s.containerdStateDir, + "imports": []string{types.DefaultContainerdConfigDir + "/*.toml"}, "grpc": map[string]any{ "address": s.containerdSocketFile, - "uid": 0, - "gid": 0, }, "plugins": map[string]any{ - "io.containerd.cri.v1.images": map[string]any{ - "snapshotter": "overlayfs", - "disable_snapshot_annotations": true, - "discard_unpacked_layers": false, - "max_concurrent_downloads": 1, - "image_pull_progress_timeout": "2m0s", - "image_pull_with_sync_fs": false, - "stats_collect_period": 120, - "pinned_images": map[string]any{ - "sandbox": types.DefaultSandboxImage, - }, - "registry": map[string]any{ - "config_path": s.containerdRegistryConfigDir, - }, - "image_decryption": map[string]any{ - "key_model": "node", - }, - }, + "io.containerd.cri.v1.images": s.generateCRIImagesConfig(), "io.containerd.cri.v1.runtime": map[string]any{ - "enable_selinux": false, - "selinux_category_range": 1024, - "max_container_log_line_size": 16384, - "disable_apparmor": false, - "restrict_oom_score_adj": false, - "disable_proc_mount": false, - "unset_seccomp_profile": "", - "tolerate_missing_hugetlb_controller": true, - "disable_hugetlb_controller": true, - "device_ownership_from_security_context": false, - "ignore_image_defined_volumes": false, - "netns_mounts_under_state_dir": false, - "enable_unprivileged_ports": true, - "enable_unprivileged_icmp": true, - "enable_cdi": true, - "drain_exec_sync_io_timeout": "0s", - "ignore_deprecation_warnings": []string{}, "containerd": map[string]any{ - "default_runtime_name": "crun", - "ignore_blockio_not_enabled_errors": false, - "ignore_rdt_not_enabled_errors": false, + "default_runtime_name": "crun", "runtimes": map[string]any{ "crun": map[string]any{ - "runtime_type": "io.containerd.runc.v2", - "runtime_path": s.containerdShimBinaryFile, - "pod_annotations": []string{}, - "container_annotations": []string{}, - "privileged_without_host_devices": false, - "privileged_without_host_devices_all_devices_allowed": false, - "base_runtime_spec": "", - "cni_conf_dir": "", - "cni_max_conf_num": 0, - "snapshotter": "", - "sandboxer": "podsandbox", - "io_type": "", + "runtime_type": "io.containerd.runc.v2", + "runtime_path": s.containerdShimBinaryFile, "options": map[string]any{ "BinaryName": s.crunBinaryFile, "SystemdCgroup": useSystemdCgroup(), @@ -136,58 +83,56 @@ func (s *service) generateContainerdConfig() map[string]any { }, }, "cni": map[string]any{ - "bin_dir": s.containerdCNIPluginsDir, - "conf_dir": types.DefaultStandardCNIConfDir, - "max_conf_num": 1, - "setup_serially": false, - "conf_template": "", - "ip_pref": "", - "use_internal_loopback": false, + "bin_dir": s.containerdCNIPluginsDir, + "conf_dir": types.DefaultStandardCNIConfDir, }, }, - "io.containerd.gc.v1.scheduler": map[string]any{ - "pause_threshold": 0.01, - "deletion_threshold": 0, - "mutation_threshold": 50, - "schedule_delay": "5s", - "startup_delay": "200ms", - }, - - "io.containerd.grpc.v1.cri": map[string]any{ - "disable_tcp_service": true, - "stream_server_address": "127.0.0.1", - "stream_server_port": "0", - "stream_idle_timeout": "4h0m0s", - "enable_tls_streaming": false, - }, - - "io.containerd.snapshotter.v1.overlayfs": map[string]any{ - "root_path": "", - "upperdir_label": false, - "sync_remove": false, - "slow_chown": false, - "mount_options": []string{}, - }, + "io.containerd.gc.v1.scheduler": s.generateGCSchedulerConfig(), "io.containerd.runtime.v2.task": map[string]any{ "platforms": []string{"linux/amd64", "linux/arm64", "linux/arm"}, }, }, + } +} - "cgroup": map[string]any{ - "path": "", +// generateCRIImagesConfig returns the CRI images plugin configuration. +// Edge-specific overrides (max_concurrent_downloads, stats_collect_period) are +// only applied when not in full mode. +func (s *service) generateCRIImagesConfig() map[string]any { + cfg := map[string]any{ + "image_pull_progress_timeout": "2m0s", + "pinned_images": map[string]any{ + "sandbox": types.DefaultSandboxImage, }, - - "timeouts": map[string]any{ - "io.containerd.timeout.bolt.open": "0s", - "io.containerd.timeout.metrics.shimstats": "2s", - "io.containerd.timeout.shim.cleanup": "5s", - "io.containerd.timeout.shim.load": "5s", - "io.containerd.timeout.shim.shutdown": "3s", - "io.containerd.timeout.task.state": "2s", + "registry": map[string]any{ + "config_path": s.containerdRegistryConfigDir, }, } + + if !s.fullMode { + cfg["max_concurrent_downloads"] = 1 + cfg["stats_collect_period"] = 120 + } + + return cfg +} + +// generateGCSchedulerConfig returns the GC scheduler plugin configuration. +// Edge-specific overrides are only applied when not in full mode. +func (s *service) generateGCSchedulerConfig() map[string]any { + if s.fullMode { + return map[string]any{} + } + + return map[string]any{ + "pause_threshold": 0.01, + "deletion_threshold": 0, + "mutation_threshold": 50, + "schedule_delay": "5s", + "startup_delay": "200ms", + } } func (s *service) generateCustomFlags() []cli.Flag { diff --git a/pkg/runtime/containerd/service.go b/pkg/runtime/containerd/service.go index 5c5d4a52..21414f1f 100644 --- a/pkg/runtime/containerd/service.go +++ b/pkg/runtime/containerd/service.go @@ -28,6 +28,7 @@ type service struct { sandboxImageFile string localPathProvisionerImageFile string isPortainerEdge bool + fullMode bool } // NewService creates a new containerd service @@ -51,5 +52,6 @@ func NewService(ctx context.Context, cancel context.CancelFunc, containerdReady sandboxImageFile: embedded.SandboxImageFile, localPathProvisionerImageFile: embedded.LocalPathProvisionerImageFile, isPortainerEdge: embedded.IsPortainerEdge, + fullMode: embedded.FullMode, } } diff --git a/types/const.go b/types/const.go index 6d4cd8b7..f9c7f027 100644 --- a/types/const.go +++ b/types/const.go @@ -36,4 +36,5 @@ const ( DefaultContextTimeout = 15 * time.Second DefaultComponentSleep = 5 * time.Second DefaultRetryCount = 5 + DefaultNftMasqTable = "kubesolo-masq" ) diff --git a/types/types.go b/types/types.go index 48446a39..eb0c327c 100644 --- a/types/types.go +++ b/types/types.go @@ -129,6 +129,12 @@ type Embedded struct { // Portainer Edge IsPortainerEdge bool + + // Full mode — disables memory-saving overrides, uses upstream Kubernetes defaults + FullMode bool + + // IPv6 + DisableIPv6 bool } // EdgeAgentConfig contains configuration for Portainer Edge Agent