|
| 1 | +// Copyright (c) 2025 Gitpod GmbH. All rights reserved. |
| 2 | +// Licensed under the GNU Affero General Public License (AGPL). |
| 3 | +// See License.AGPL.txt in the project root for license information. |
| 4 | + |
| 5 | +// Copied from https://github.com/opencontainers/runc/blob/e0406b4ba62071d40f1eaa443945764e0ef56c41/libcontainer/cgroups/devices/devicefilter.go |
| 6 | +// |
| 7 | +// Implements creation of eBPF device filter program. |
| 8 | +// |
| 9 | +// Based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c |
| 10 | +// |
| 11 | +// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano) |
| 12 | +// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397 |
| 13 | +package devicefilter |
| 14 | + |
| 15 | +import ( |
| 16 | + "errors" |
| 17 | + "fmt" |
| 18 | + "math" |
| 19 | + "strconv" |
| 20 | + |
| 21 | + "github.com/cilium/ebpf/asm" |
| 22 | + "golang.org/x/sys/unix" |
| 23 | + |
| 24 | + devices "github.com/gitpod-io/gitpod/ws-daemon/pkg/libcontainer/devices" |
| 25 | +) |
| 26 | + |
| 27 | +const ( |
| 28 | + // license string format is same as kernel MODULE_LICENSE macro |
| 29 | + license = "Apache" |
| 30 | +) |
| 31 | + |
| 32 | +// DeviceFilter returns eBPF device filter program and its license string. |
| 33 | +func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) { |
| 34 | + // Generate the minimum ruleset for the device rules we are given. While we |
| 35 | + // don't care about minimum transitions in cgroupv2, using the emulator |
| 36 | + // gives us a guarantee that the behaviour of devices filtering is the same |
| 37 | + // as cgroupv1, including security hardenings to avoid misconfiguration |
| 38 | + // (such as punching holes in wildcard rules). |
| 39 | + emu := new(emulator) |
| 40 | + for _, rule := range rules { |
| 41 | + if err := emu.Apply(*rule); err != nil { |
| 42 | + return nil, "", err |
| 43 | + } |
| 44 | + } |
| 45 | + cleanRules, err := emu.Rules() |
| 46 | + if err != nil { |
| 47 | + return nil, "", err |
| 48 | + } |
| 49 | + |
| 50 | + p := &program{ |
| 51 | + defaultAllow: emu.IsBlacklist(), |
| 52 | + } |
| 53 | + p.init() |
| 54 | + |
| 55 | + for idx, rule := range cleanRules { |
| 56 | + if rule.Type == devices.WildcardDevice { |
| 57 | + // We can safely skip over wildcard entries because there should |
| 58 | + // only be one (at most) at the very start to instruct cgroupv1 to |
| 59 | + // go into allow-list mode. However we do double-check this here. |
| 60 | + if idx != 0 || rule.Allow != emu.IsBlacklist() { |
| 61 | + return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString()) |
| 62 | + } |
| 63 | + continue |
| 64 | + } |
| 65 | + if rule.Allow == p.defaultAllow { |
| 66 | + // There should be no rules which have an action equal to the |
| 67 | + // default action, the emulator removes those. |
| 68 | + return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString()) |
| 69 | + } |
| 70 | + if err := p.appendRule(rule); err != nil { |
| 71 | + return nil, "", err |
| 72 | + } |
| 73 | + } |
| 74 | + return p.finalize(), license, nil |
| 75 | +} |
| 76 | + |
| 77 | +type program struct { |
| 78 | + insts asm.Instructions |
| 79 | + defaultAllow bool |
| 80 | + blockID int |
| 81 | +} |
| 82 | + |
| 83 | +func (p *program) init() { |
| 84 | + // struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423 |
| 85 | + /* |
| 86 | + u32 access_type |
| 87 | + u32 major |
| 88 | + u32 minor |
| 89 | + */ |
| 90 | + // R2 <- type (lower 16 bit of u32 access_type at R1[0]) |
| 91 | + p.insts = append(p.insts, |
| 92 | + asm.LoadMem(asm.R2, asm.R1, 0, asm.Word), |
| 93 | + asm.And.Imm32(asm.R2, 0xFFFF)) |
| 94 | + |
| 95 | + // R3 <- access (upper 16 bit of u32 access_type at R1[0]) |
| 96 | + p.insts = append(p.insts, |
| 97 | + asm.LoadMem(asm.R3, asm.R1, 0, asm.Word), |
| 98 | + // RSh: bitwise shift right |
| 99 | + asm.RSh.Imm32(asm.R3, 16)) |
| 100 | + |
| 101 | + // R4 <- major (u32 major at R1[4]) |
| 102 | + p.insts = append(p.insts, |
| 103 | + asm.LoadMem(asm.R4, asm.R1, 4, asm.Word)) |
| 104 | + |
| 105 | + // R5 <- minor (u32 minor at R1[8]) |
| 106 | + p.insts = append(p.insts, |
| 107 | + asm.LoadMem(asm.R5, asm.R1, 8, asm.Word)) |
| 108 | +} |
| 109 | + |
| 110 | +// appendRule rule converts an OCI rule to the relevant eBPF block and adds it |
| 111 | +// to the in-progress filter program. In order to operate properly, it must be |
| 112 | +// called with a "clean" rule list (generated by devices.Emulator.Rules() -- |
| 113 | +// with any "a" rules removed). |
| 114 | +func (p *program) appendRule(rule *devices.Rule) error { |
| 115 | + if p.blockID < 0 { |
| 116 | + return errors.New("the program is finalized") |
| 117 | + } |
| 118 | + |
| 119 | + var bpfType int32 |
| 120 | + switch rule.Type { |
| 121 | + case devices.CharDevice: |
| 122 | + bpfType = int32(unix.BPF_DEVCG_DEV_CHAR) |
| 123 | + case devices.BlockDevice: |
| 124 | + bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK) |
| 125 | + default: |
| 126 | + // We do not permit 'a', nor any other types we don't know about. |
| 127 | + return fmt.Errorf("invalid type %q", string(rule.Type)) |
| 128 | + } |
| 129 | + if rule.Major > math.MaxUint32 { |
| 130 | + return fmt.Errorf("invalid major %d", rule.Major) |
| 131 | + } |
| 132 | + if rule.Minor > math.MaxUint32 { |
| 133 | + return fmt.Errorf("invalid minor %d", rule.Major) |
| 134 | + } |
| 135 | + hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1 |
| 136 | + hasMinor := rule.Minor >= 0 |
| 137 | + bpfAccess := int32(0) |
| 138 | + for _, r := range rule.Permissions { |
| 139 | + switch r { |
| 140 | + case 'r': |
| 141 | + bpfAccess |= unix.BPF_DEVCG_ACC_READ |
| 142 | + case 'w': |
| 143 | + bpfAccess |= unix.BPF_DEVCG_ACC_WRITE |
| 144 | + case 'm': |
| 145 | + bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD |
| 146 | + default: |
| 147 | + return fmt.Errorf("unknown device access %v", r) |
| 148 | + } |
| 149 | + } |
| 150 | + // If the access is rwm, skip the check. |
| 151 | + hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD) |
| 152 | + |
| 153 | + var ( |
| 154 | + blockSym = "block-" + strconv.Itoa(p.blockID) |
| 155 | + nextBlockSym = "block-" + strconv.Itoa(p.blockID+1) |
| 156 | + prevBlockLastIdx = len(p.insts) - 1 |
| 157 | + ) |
| 158 | + p.insts = append(p.insts, |
| 159 | + // if (R2 != bpfType) goto next |
| 160 | + asm.JNE.Imm(asm.R2, bpfType, nextBlockSym), |
| 161 | + ) |
| 162 | + if hasAccess { |
| 163 | + p.insts = append(p.insts, |
| 164 | + // if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next |
| 165 | + asm.Mov.Reg32(asm.R1, asm.R3), |
| 166 | + asm.And.Imm32(asm.R1, bpfAccess), |
| 167 | + asm.JNE.Reg(asm.R1, asm.R3, nextBlockSym), |
| 168 | + ) |
| 169 | + } |
| 170 | + if hasMajor { |
| 171 | + p.insts = append(p.insts, |
| 172 | + // if (R4 != major) goto next |
| 173 | + asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym), |
| 174 | + ) |
| 175 | + } |
| 176 | + if hasMinor { |
| 177 | + p.insts = append(p.insts, |
| 178 | + // if (R5 != minor) goto next |
| 179 | + asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym), |
| 180 | + ) |
| 181 | + } |
| 182 | + p.insts = append(p.insts, acceptBlock(rule.Allow)...) |
| 183 | + // set blockSym to the first instruction we added in this iteration |
| 184 | + p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].WithSymbol(blockSym) |
| 185 | + p.blockID++ |
| 186 | + return nil |
| 187 | +} |
| 188 | + |
| 189 | +func (p *program) finalize() asm.Instructions { |
| 190 | + var v int32 |
| 191 | + if p.defaultAllow { |
| 192 | + v = 1 |
| 193 | + } |
| 194 | + blockSym := "block-" + strconv.Itoa(p.blockID) |
| 195 | + p.insts = append(p.insts, |
| 196 | + // R0 <- v |
| 197 | + asm.Mov.Imm32(asm.R0, v).WithSymbol(blockSym), |
| 198 | + asm.Return(), |
| 199 | + ) |
| 200 | + p.blockID = -1 |
| 201 | + return p.insts |
| 202 | +} |
| 203 | + |
| 204 | +func acceptBlock(accept bool) asm.Instructions { |
| 205 | + var v int32 |
| 206 | + if accept { |
| 207 | + v = 1 |
| 208 | + } |
| 209 | + return []asm.Instruction{ |
| 210 | + // R0 <- v |
| 211 | + asm.Mov.Imm32(asm.R0, v), |
| 212 | + asm.Return(), |
| 213 | + } |
| 214 | +} |
0 commit comments