Skip to content

Commit 637eac6

Browse files
authored
Merge pull request #148 from craddm/configure-containerd
Configure containerd on isolated cluster and create Harbor service to provide internal endpoint
2 parents d6e766a + a1e4519 commit 637eac6

9 files changed

Lines changed: 273 additions & 7 deletions

File tree

infra/aks/__main__.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,17 @@ def get_kubeconfig(
127127
),
128128
)
129129

130+
# Grant the managed identity Contributor role on the access vnet so it can manage network interfaces
131+
# This allows the creation of internal load balancers to make it easier to direct traffic between the subnets
132+
authorization.RoleAssignment(
133+
"cluster_role_assignment_access_vnet",
134+
principal_id=identity.principal_id,
135+
principal_type=authorization.PrincipalType.SERVICE_PRINCIPAL,
136+
# Contributor: https://learn.microsoft.com/en-us/azure/role-based-access-control/built-in-roles
137+
role_definition_id=f"/subscriptions/{azure_config.require('subscriptionId')}/providers/Microsoft.Authorization/roleDefinitions/b24988ac-6180-42a0-ab88-20f7382dd24c",
138+
scope=networking.access_nodes.id,
139+
)
140+
130141
# Grant the managed identity Contributor role on the isolated vnet so it can manage network interfaces
131142
# This allows the creation of internal load balancers to make it easier to direct traffic to the right place on the isolated network
132143
authorization.RoleAssignment(

infra/fridge/access-cluster/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,6 @@ def patch_namespace(name: str, pss: PodSecurityStandard) -> NamespacePatch:
128128
# Pulumi exports
129129
pulumi.export("fridge_api_ip_address", config.require("fridge_api_ip_address"))
130130
pulumi.export("harbor_fqdn", harbor.harbor_fqdn)
131-
pulumi.export("harbor_ip_address", config.require("harbor_ip"))
131+
pulumi.export("harbor_ip_address", harbor.harbor_lb_ip)
132132
pulumi.export("ingress_ip", ingress_nginx.ingress_ip)
133133
pulumi.export("ingress_ports", ingress_nginx.ingress_ports)

infra/fridge/access-cluster/components/container_registry.py

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import pulumi
44
from pulumi import ComponentResource, ResourceOptions
5+
56
from pulumi_kubernetes.batch.v1 import (
67
Job,
78
JobSpecArgs,
@@ -15,6 +16,9 @@
1516
PodTemplateSpecArgs,
1617
Secret,
1718
SecurityContextArgs,
19+
Service,
20+
ServicePortArgs,
21+
ServiceSpecArgs,
1822
VolumeArgs,
1923
VolumeMountArgs,
2024
)
@@ -25,8 +29,7 @@
2529

2630
from .storage_classes import StorageClasses
2731

28-
29-
from enums import PodSecurityStandard, TlsEnvironment, tls_issuer_names
32+
from enums import K8sEnvironment, PodSecurityStandard, TlsEnvironment, tls_issuer_names
3033

3134

3235
class ContainerRegistryArgs:
@@ -75,6 +78,15 @@ def __init__(
7578
else "ReadWriteOnce",
7679
}
7780

81+
api_service_annotations = (
82+
{
83+
"service.beta.kubernetes.io/azure-load-balancer-internal": "true",
84+
"service.beta.kubernetes.io/azure-load-balancer-internal-subnet": "networking-access-nodes",
85+
}
86+
if K8sEnvironment(args.config.get("k8s_env")) == K8sEnvironment.AKS
87+
else {}
88+
)
89+
7890
self.harbor = Release(
7991
"harbor",
8092
ReleaseArgs(
@@ -86,10 +98,8 @@ def __init__(
8698
),
8799
values={
88100
"expose": {
89-
"clusterIP": {
90-
"staticClusterIP": args.config.require("harbor_ip"),
91-
},
92-
"type": "clusterIP",
101+
"type": "loadBalancer",
102+
"loadBalancer": {"annotations": api_service_annotations},
93103
"tls": {
94104
"enabled": False,
95105
"certSource": "none",
@@ -170,6 +180,26 @@ def __init__(
170180
),
171181
)
172182

183+
self.harbor_internal_loadbalancer = Service.get(
184+
"harbor-internal-lb",
185+
id=pulumi.Output.concat(self.harbor_ns.metadata.name, "/harbor"),
186+
opts=ResourceOptions.merge(
187+
child_opts,
188+
ResourceOptions(
189+
depends_on=[
190+
self.harbor,
191+
]
192+
),
193+
),
194+
)
195+
196+
# Extract the dynamically assigned IP address
197+
self.harbor_lb_ip = self.harbor_internal_loadbalancer.status.apply(
198+
lambda status: status.load_balancer.ingress[0].ip
199+
if status and status.load_balancer and status.load_balancer.ingress
200+
else None
201+
)
202+
173203
# Create a daemonset to skip TLS verification for the harbor registry
174204
# This is needed while using staging/self-signed certificates for Harbor
175205
# A daemonset is used to run the configuration on all nodes in the cluster

infra/fridge/isolated-cluster/Pulumi.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ config:
1212
access_cluster_node_cidr:
1313
type: string
1414
description: The node CIDR of the access cluster to allow access from
15+
access_cluster_stack:
16+
type: string
17+
description: The Pulumi stack name of the access cluster deployment
1518
azure_subscription_id:
1619
type: string
1720
description: Azure Subscription ID for the access cluster - use a dummy value when not deploying to AKS
@@ -43,6 +46,9 @@ config:
4346
minio_pool_size:
4447
type: string
4548
description: Size of the MinIO storage pool (e.g., 20Gi)
49+
organization_name:
50+
type: string
51+
description: The Pulumi organization name
4652
kubernetes:context:
4753
value: ""
4854
description: Kubernetes context for the FRIDGE deployment

infra/fridge/isolated-cluster/__main__.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@ def patch_namespace(name: str, pss: PodSecurityStandard) -> NamespacePatch:
2323
config = pulumi.Config()
2424
tls_environment = TlsEnvironment(config.require("tls_environment"))
2525
stack_name = pulumi.get_stack()
26+
organization = config.require("organization_name")
27+
project_name = config.require("project_name")
28+
access_stack_name = config.require("access_cluster_stack")
29+
30+
access_stack = pulumi.StackReference(
31+
f"{organization}/{project_name}/{access_stack_name}"
32+
)
2633

2734
try:
2835
k8s_environment = K8sEnvironment(config.get("k8s_env"))
@@ -199,5 +206,24 @@ def patch_namespace(name: str, pss: PodSecurityStandard) -> NamespacePatch:
199206
),
200207
)
201208

209+
# Container runtime configuration (containerd)
210+
if k8s_environment == K8sEnvironment.AKS:
211+
container_runtime_config = components.ContainerRuntimeConfig(
212+
"container-runtime-config",
213+
args=components.ContainerRuntimeConfigArgs(
214+
config=config,
215+
harbor_fqdn=access_stack.get_output("harbor_fqdn"),
216+
),
217+
opts=ResourceOptions(
218+
depends_on=resources,
219+
),
220+
)
221+
else:
222+
pulumi.log.warn(
223+
"Container runtime configuration is only applied on AKS. "
224+
"For Dawn AI and local K3s deployments, please ensure containerd is configured manually. "
225+
"If you deployed K3s using the scripts in infra/k3s, containerd should already be configured correctly."
226+
)
227+
202228
# Pulumi stack outputs
203229
pulumi.export("fridge_api_ip", config.require("fridge_api_ip"))

infra/fridge/isolated-cluster/components/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from .api_server import ApiServer, ApiServerArgs
22
from .block_storage import BlockStorage, BlockStorageArgs
33
from .cert_manager import CertManager, CertManagerArgs
4+
from .container_runtime import ContainerRuntimeConfig, ContainerRuntimeConfigArgs
45
from .minio_config import MinioConfigJob, MinioConfigArgs
56
from .network_policies import NetworkPolicies
67
from .object_storage import ObjectStorage, ObjectStorageArgs
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import pulumi
2+
from pulumi import ComponentResource, Output, ResourceOptions
3+
from string import Template
4+
from enums import PodSecurityStandard
5+
from pulumi_kubernetes.core.v1 import Namespace
6+
from pulumi_kubernetes.meta.v1 import ObjectMetaArgs
7+
from pulumi_kubernetes.yaml import ConfigGroup
8+
9+
10+
class ContainerRuntimeConfigArgs:
11+
def __init__(
12+
self,
13+
config: pulumi.config.Config,
14+
harbor_fqdn: Output[str],
15+
) -> None:
16+
self.config = config
17+
self.harbor_fqdn = harbor_fqdn
18+
19+
20+
class ContainerRuntimeConfig(ComponentResource):
21+
def __init__(
22+
self,
23+
name: str,
24+
args: ContainerRuntimeConfigArgs,
25+
opts: ResourceOptions | None = None,
26+
):
27+
super().__init__("fridge:ContainerRuntimeConfig", name, {}, opts)
28+
child_opts = ResourceOptions.merge(opts, ResourceOptions(parent=self))
29+
30+
self.config_ns = Namespace(
31+
"container-runtime-config-ns",
32+
metadata=ObjectMetaArgs(
33+
name="containerd-config",
34+
labels={} | PodSecurityStandard.PRIVILEGED.value,
35+
),
36+
opts=child_opts,
37+
)
38+
39+
yaml_template = open("k8s/containerd/registry_mirrors.yaml", "r").read()
40+
41+
registry_mirror_config = Output.all(
42+
namespace=self.config_ns.metadata.name,
43+
harbor_fqdn=args.harbor_fqdn,
44+
).apply(
45+
lambda args: Template(yaml_template).substitute(
46+
namespace=args["namespace"],
47+
harbor_fqdn=args["harbor_fqdn"],
48+
)
49+
)
50+
51+
self.configure_runtime = registry_mirror_config.apply(
52+
lambda yaml_content: ConfigGroup(
53+
"configure-container-runtime",
54+
yaml=[yaml_content],
55+
opts=ResourceOptions.merge(
56+
child_opts,
57+
ResourceOptions(
58+
depends_on=[self.config_ns],
59+
),
60+
),
61+
)
62+
)
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
apiVersion: v1
2+
kind: ConfigMap
3+
metadata:
4+
name: containerd-registry-mirrors
5+
namespace: ${namespace}
6+
data:
7+
docker-io-hosts.toml: |
8+
[host."http://${harbor_fqdn}/v2/proxy-docker.io"]
9+
capabilities = ["pull", "resolve"]
10+
skip_verify = true
11+
override_path = true
12+
13+
# Fall back to Docker Hub if Harbor fails
14+
[host."https://registry-1.docker.io"]
15+
capabilities = ["pull", "resolve"]
16+
17+
ghcr-io-hosts.toml: |
18+
[host."http://${harbor_fqdn}/v2/proxy-ghcr.io"]
19+
capabilities = ["pull", "resolve"]
20+
skip_verify = true
21+
override_path = true
22+
23+
# Fall back to GHCR if Harbor fails
24+
[host."https://ghcr.io"]
25+
capabilities = ["pull", "resolve"]
26+
27+
quay-io-hosts.toml: |
28+
[host."http://${harbor_fqdn}/v2/proxy-quay.io"]
29+
capabilities = ["pull", "resolve"]
30+
skip_verify = true
31+
override_path = true
32+
33+
# Fall back to Quay if Harbor fails
34+
[host."https://quay.io"]
35+
capabilities = ["pull", "resolve"]
36+
---
37+
apiVersion: apps/v1
38+
kind: DaemonSet
39+
metadata:
40+
name: configure-containerd-mirrors
41+
namespace: ${namespace}
42+
spec:
43+
selector:
44+
matchLabels:
45+
app: configure-containerd-mirrors
46+
template:
47+
metadata:
48+
labels:
49+
app: configure-containerd-mirrors
50+
spec:
51+
initContainers:
52+
- name: configure-containerd
53+
image: busybox
54+
command:
55+
- /bin/sh
56+
- -c
57+
- |
58+
set -e
59+
echo "Configuring containerd registry mirrors..."
60+
mkdir -p /host/etc/containerd/certs.d/docker.io
61+
mkdir -p /host/etc/containerd/certs.d/ghcr.io
62+
mkdir -p /host/etc/containerd/certs.d/quay.io
63+
64+
echo "Copying Docker, GHCR, and Quay registry mirror configurations..."
65+
cp /config/docker-io-hosts.toml /host/etc/containerd/certs.d/docker.io/hosts.toml
66+
cp /config/ghcr-io-hosts.toml /host/etc/containerd/certs.d/ghcr.io/hosts.toml
67+
cp /config/quay-io-hosts.toml /host/etc/containerd/certs.d/quay.io/hosts.toml
68+
echo "Containerd registry mirrors configured."
69+
securityContext:
70+
runAsUser: 0
71+
allowPrivilegeEscalation: false
72+
capabilities:
73+
drop:
74+
- ALL
75+
add:
76+
- DAC_OVERRIDE
77+
volumeMounts:
78+
- name: host-fs
79+
mountPath: /host/etc/containerd/certs.d
80+
- name: config
81+
mountPath: /config
82+
readOnly: true
83+
containers:
84+
- name: wait
85+
image: registry.k8s.io/pause:3.8
86+
resources:
87+
requests:
88+
cpu: 10m
89+
memory: 10Mi
90+
limits:
91+
cpu: 50m
92+
memory: 50Mi
93+
securityContext:
94+
privileged: false
95+
runAsNonRoot: true
96+
runAsUser: 65532
97+
allowPrivilegeEscalation: false
98+
capabilities:
99+
drop:
100+
- ALL
101+
volumes:
102+
- name: host-fs
103+
hostPath:
104+
path: /etc/containerd/certs.d
105+
type: DirectoryOrCreate
106+
- name: config
107+
configMap:
108+
name: containerd-registry-mirrors

infra/k3s/install.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,28 @@
55
# to allow Cilium to handle all these
66
echo 'Installing K3s...'
77

8+
# Configure containerd for private registry before K3s installation
9+
echo 'Configuring containerd registry settings...'
10+
HARBOR_HOSTNAME="${1:-harbor.fridge.internal}" # Use argument or default
11+
12+
sudo mkdir -p /etc/rancher/k3s
13+
sudo tee /etc/rancher/k3s/registries.yaml > /dev/null <<EOF
14+
mirrors:
15+
docker.io:
16+
endpoint:
17+
- https://$HARBOR_HOSTNAME/v2/proxy-docker.io
18+
quay.io:
19+
endpoint:
20+
- https://$HARBOR_HOSTNAME/v2/proxy-quay.io
21+
ghcr.io:
22+
endpoint:
23+
- https://$HARBOR_HOSTNAME/v2/proxy-ghcr.io
24+
configs:
25+
$HARBOR_HOSTNAME:
26+
tls:
27+
insecure_skip_verify: true
28+
EOF
29+
830
curl -sfL https://get.k3s.io | sh -s - \
931
--flannel-backend none \
1032
--disable-network-policy \

0 commit comments

Comments
 (0)