diff --git a/Cargo.lock b/Cargo.lock index ff0aeb8..6c308dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,19 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if 1.0.4", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -11,6 +24,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "anstream" version = "0.6.21" @@ -67,6 +86,40 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "async-broadcast" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "435a87a52755b8f27fcf321ac4f04b2802e337c8c4872923137471ec39c37532" +dependencies = [ + "event-listener", + "event-listener-strategy", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "async-trait" version = "0.1.89" @@ -78,12 +131,87 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "axum" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" +dependencies = [ + "axum-core", + "bytes", + "form_urlencoded", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "serde_core", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" +dependencies = [ + "bytes", + "futures-core", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "sync_wrapper", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "backoff" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" +dependencies = [ + "getrandom 0.2.17", + "instant", + "rand", +] + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bitflags" version = "1.3.2" @@ -117,6 +245,16 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "cc" +version = "1.2.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +dependencies = [ + "find-msvc-tools", + "shlex", +] + [[package]] name = "cfg-if" version = "0.1.10" @@ -154,6 +292,16 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "chrono" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +dependencies = [ + "num-traits", + "serde", +] + [[package]] name = "clap" version = "4.5.60" @@ -200,6 +348,15 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "containerd-shim" version = "0.10.0" @@ -244,6 +401,32 @@ dependencies = [ "ttrpc-codegen", ] +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + [[package]] name = "cpufeatures" version = "0.2.17" @@ -416,12 +599,56 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "dtoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590" + +[[package]] +name = "dyn-clone" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" + +[[package]] +name = "educe" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7bc049e1bd8cdeb31b68bbd586a9464ecf9f3944af3958a7a9d0f8b9799417" +dependencies = [ + "enum-ordinalize", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "either" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "enum-ordinalize" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1091a7bb1f8f2c4b28f1fe2cef4980ca2d410a3d727d67ecc3178c9b0800f0" +dependencies = [ + "enum-ordinalize-derive", +] + +[[package]] +name = "enum-ordinalize-derive" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "env_home" version = "0.1.0" @@ -444,12 +671,39 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "event-listener" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener-strategy" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" +dependencies = [ + "event-listener", + "pin-project-lite", +] + [[package]] name = "fastrand" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + [[package]] name = "fixedbitset" version = "0.2.0" @@ -468,6 +722,15 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + [[package]] name = "futures" version = "0.3.32" @@ -566,6 +829,29 @@ dependencies = [ "version_check", ] +[[package]] +name = "getrandom" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" +dependencies = [ + "cfg-if 1.0.4", + "libc", + "wasi", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if 1.0.4", + "libc", + "r-efi", + "wasip2", +] + [[package]] name = "getrandom" version = "0.4.1" @@ -612,6 +898,8 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ + "allocator-api2", + "equivalent", "foldhash", ] @@ -621,6 +909,30 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "headers" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3314d5adb5d94bcdf56771f2e50dbbc80bb4bdf88967526706205ac9eff24eb" +dependencies = [ + "base64", + "bytes", + "headers-core", + "http", + "httpdate", + "mime", + "sha1", +] + +[[package]] +name = "headers-core" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54b4a22553d4242c49fddb9ba998a99962b5cc6f22cb5a3482bec22522403ce4" +dependencies = [ + "http", +] + [[package]] name = "heck" version = "0.3.3" @@ -645,6 +957,155 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "hostname" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "617aaa3557aef3810a6369d0a99fac8a080891b68bd9f9812a1eeda0c0730cbd" +dependencies = [ + "cfg-if 1.0.4", + "libc", + "windows-link", +] + +[[package]] +name = "http" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +dependencies = [ + "bytes", + "itoa", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http", +] + +[[package]] +name = "http-body-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" +dependencies = [ + "bytes", + "futures-core", + "http", + "http-body", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" +dependencies = [ + "atomic-waker", + "bytes", + "futures-channel", + "futures-core", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "pin-utils", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-http-proxy" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ad4b0a1e37510028bc4ba81d0e38d239c39671b0f0ce9e02dfa93a8133f7c08" +dependencies = [ + "bytes", + "futures-util", + "headers", + "http", + "hyper", + "hyper-rustls", + "hyper-util", + "pin-project-lite", + "rustls-native-certs 0.7.3", + "tokio", + "tokio-rustls", + "tower-service", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +dependencies = [ + "http", + "hyper", + "hyper-util", + "log", + "rustls", + "rustls-native-certs 0.8.3", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", +] + +[[package]] +name = "hyper-timeout" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" +dependencies = [ + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http", + "http-body", + "hyper", + "libc", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", +] + [[package]] name = "id-arena" version = "2.3.0" @@ -679,6 +1140,15 @@ dependencies = [ "serde_core", ] +[[package]] +name = "instant" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" +dependencies = [ + "cfg-if 1.0.4", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -700,6 +1170,165 @@ version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +[[package]] +name = "json-patch" +version = "3.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "863726d7afb6bc2590eeff7135d923545e5e964f004c2ccf8716c25e70a86f08" +dependencies = [ + "jsonptr", + "serde", + "serde_json", + "thiserror 1.0.69", +] + +[[package]] +name = "jsonpath-rust" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c00ae348f9f8fd2d09f82a98ca381c60df9e0820d8d79fce43e649b4dc3128b" +dependencies = [ + "pest", + "pest_derive", + "regex", + "serde_json", + "thiserror 2.0.18", +] + +[[package]] +name = "jsonptr" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dea2b27dd239b2556ed7a25ba842fe47fd602e7fc7433c2a8d6106d4d9edd70" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "k8s-openapi" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c75b990324f09bef15e791606b7b7a296d02fc88a344f6eba9390970a870ad5" +dependencies = [ + "base64", + "chrono", + "serde", + "serde-value", + "serde_json", +] + +[[package]] +name = "kube" +version = "0.98.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32053dc495efad4d188c7b33cc7c02ef4a6e43038115348348876efd39a53cba" +dependencies = [ + "k8s-openapi", + "kube-client", + "kube-core", + "kube-derive", + "kube-runtime", +] + +[[package]] +name = "kube-client" +version = "0.98.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d34ad38cdfbd1fa87195d42569f57bb1dda6ba5f260ee32fef9570b7937a0c9" +dependencies = [ + "base64", + "bytes", + "chrono", + "either", + "futures", + "home", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-http-proxy", + "hyper-rustls", + "hyper-timeout", + "hyper-util", + "jsonpath-rust", + "k8s-openapi", + "kube-core", + "pem", + "rustls", + "rustls-pemfile", + "secrecy", + "serde", + "serde_json", + "serde_yaml", + "thiserror 2.0.18", + "tokio", + "tokio-util", + "tower", + "tower-http", + "tracing", +] + +[[package]] +name = "kube-core" +version = "0.98.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97aa830b288a178a90e784d1b0f1539f2d200d2188c7b4a3146d9dc983d596f3" +dependencies = [ + "chrono", + "form_urlencoded", + "http", + "json-patch", + "k8s-openapi", + "schemars", + "serde", + "serde-value", + "serde_json", + "thiserror 2.0.18", +] + +[[package]] +name = "kube-derive" +version = "0.98.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37745d8a4076b77e0b1952e94e358726866c8e14ec94baaca677d47dcdb98658" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "serde_json", + "syn 2.0.117", +] + +[[package]] +name = "kube-runtime" +version = "0.98.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a41af186a0fe80c71a13a13994abdc3ebff80859ca6a4b8a6079948328c135b" +dependencies = [ + "ahash", + "async-broadcast", + "async-stream", + "async-trait", + "backoff", + "educe", + "futures", + "hashbrown 0.15.5", + "hostname", + "json-patch", + "jsonptr", + "k8s-openapi", + "kube-client", + "parking_lot", + "pin-project", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -757,6 +1386,12 @@ dependencies = [ "regex-automata", ] +[[package]] +name = "matchit" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" + [[package]] name = "memchr" version = "2.8.0" @@ -790,6 +1425,12 @@ dependencies = [ "autocfg", ] +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + [[package]] name = "mio" version = "1.1.1" @@ -897,6 +1538,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "oci-spec" version = "0.7.1" @@ -925,6 +1575,27 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "openssl-probe" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" + +[[package]] +name = "openssl-probe" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" + +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + [[package]] name = "page_size" version = "0.6.0" @@ -935,6 +1606,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + [[package]] name = "parking_lot" version = "0.12.5" @@ -958,6 +1635,65 @@ dependencies = [ "windows-link", ] +[[package]] +name = "pem" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" +dependencies = [ + "base64", + "serde_core", +] + +[[package]] +name = "percent-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" + +[[package]] +name = "pest" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" +dependencies = [ + "memchr", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "pest_meta" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" +dependencies = [ + "pest", + "sha2", +] + [[package]] name = "petgraph" version = "0.5.1" @@ -968,6 +1704,26 @@ dependencies = [ "indexmap 1.9.3", ] +[[package]] +name = "pin-project" +version = "1.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -986,6 +1742,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "prctl" version = "1.0.0" @@ -1017,26 +1782,49 @@ dependencies = [ ] [[package]] -name = "proc-macro-error2" -version = "2.0.1" +name = "proc-macro-error2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802" +dependencies = [ + "proc-macro-error-attr2", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "prometheus-client" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf41c1a7c32ed72abe5082fb19505b969095c12da9f5732a4bc9878757fd087c" +dependencies = [ + "dtoa", + "itoa", + "parking_lot", + "prometheus-client-derive-encode", +] + +[[package]] +name = "prometheus-client-derive-encode" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802" +checksum = "440f724eba9f6996b75d63681b0a92b06947f1457076d503a4d2e2c8f56442b8" dependencies = [ - "proc-macro-error-attr2", "proc-macro2", "quote", "syn 2.0.117", ] -[[package]] -name = "proc-macro2" -version = "1.0.106" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" -dependencies = [ - "unicode-ident", -] - [[package]] name = "prost" version = "0.8.0" @@ -1154,16 +1942,51 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.17", +] + [[package]] name = "reaper" version = "0.2.6" dependencies = [ "anyhow", "async-trait", + "axum", "clap", "containerd-shim", "containerd-shim-protos", + "futures", + "k8s-openapi", + "kube", "nix 0.28.0", + "prometheus-client", "protobuf", "serde", "serde_json", @@ -1214,6 +2037,20 @@ version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if 1.0.4", + "getrandom 0.2.17", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + [[package]] name = "rustix" version = "0.38.44" @@ -1240,12 +2077,87 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "rustls" +version = "0.23.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" +dependencies = [ + "log", + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-native-certs" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5" +dependencies = [ + "openssl-probe 0.1.6", + "rustls-pemfile", + "rustls-pki-types", + "schannel", + "security-framework 2.11.1", +] + +[[package]] +name = "rustls-native-certs" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +dependencies = [ + "openssl-probe 0.2.1", + "rustls-pki-types", + "schannel", + "security-framework 3.7.0", +] + +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +dependencies = [ + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + [[package]] name = "rustversion" version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + [[package]] name = "scc" version = "2.4.0" @@ -1255,6 +2167,39 @@ dependencies = [ "sdd", ] +[[package]] +name = "schannel" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "schemars" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" +dependencies = [ + "dyn-clone", + "schemars_derive", + "serde", + "serde_json", +] + +[[package]] +name = "schemars_derive" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.117", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -1267,6 +2212,51 @@ version = "3.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca" +[[package]] +name = "secrecy" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e891af845473308773346dc847b2c23ee78fe442e0472ac50e22a18a93d3ae5a" +dependencies = [ + "zeroize", +] + +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags 2.11.0", + "core-foundation 0.9.4", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" +dependencies = [ + "bitflags 2.11.0", + "core-foundation 0.10.1", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "semver" version = "1.0.27" @@ -1283,6 +2273,16 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde-value" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" +dependencies = [ + "ordered-float", + "serde", +] + [[package]] name = "serde_core" version = "1.0.228" @@ -1303,6 +2303,17 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "serde_json" version = "1.0.149" @@ -1316,6 +2327,42 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap 2.13.0", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "serial_test" version = "3.3.1" @@ -1342,6 +2389,17 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if 1.0.4", + "cpufeatures", + "digest", +] + [[package]] name = "sha2" version = "0.10.9" @@ -1362,6 +2420,12 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "signal-hook" version = "0.3.18" @@ -1429,6 +2493,12 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + [[package]] name = "syn" version = "1.0.109" @@ -1451,6 +2521,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" + [[package]] name = "tempfile" version = "3.25.0" @@ -1458,7 +2534,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1" dependencies = [ "fastrand", - "getrandom", + "getrandom 0.4.1", "once_cell", "rustix 1.1.3", "windows-sys 0.61.2", @@ -1572,6 +2648,30 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "tokio-rustls" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" +dependencies = [ + "rustls", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "slab", + "tokio", +] + [[package]] name = "tokio-vsock" version = "0.4.0" @@ -1585,12 +2685,60 @@ dependencies = [ "vsock", ] +[[package]] +name = "tower" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-http" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +dependencies = [ + "base64", + "bitflags 2.11.0", + "bytes", + "http", + "http-body", + "mime", + "pin-project-lite", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + [[package]] name = "tracing" version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ + "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -1646,6 +2794,12 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + [[package]] name = "ttrpc" version = "0.8.6" @@ -1701,6 +2855,12 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +[[package]] +name = "ucd-trie" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" + [[package]] name = "unicode-ident" version = "1.0.24" @@ -1719,6 +2879,18 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + [[package]] name = "utf8parse" version = "0.2.2" @@ -1753,6 +2925,15 @@ dependencies = [ "nix 0.24.3", ] +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" @@ -2188,6 +3369,32 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "zerocopy" +version = "0.8.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a789c6e490b576db9f7e6b6d661bcc9799f7c0ac8352f56ea20193b2681532e5" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f65c489a7071a749c849713807783f70672b28094011623e200cb86dcb835953" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + [[package]] name = "zmij" version = "1.0.21" diff --git a/Cargo.toml b/Cargo.toml index 160a490..58dd42a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ license = "MIT" [dependencies] anyhow = "1" thiserror = "1" -clap = { version = "4", features = ["derive"] } +clap = { version = "4", features = ["derive", "env"] } serde = { version = "1", features = ["derive"] } serde_json = "1" nix = { version = "0.28", features = ["signal", "process", "user", "sched", "mount", "fs", "term"] } @@ -23,6 +23,16 @@ protobuf = "3.3" containerd-shim = { version = "0.10", features = ["async", "tracing"] } containerd-shim-protos = { version = "0.10", features = ["async"] } +# reaper-agent dependencies +kube = { version = "0.98", features = ["runtime", "client", "derive"], optional = true } +k8s-openapi = { version = "0.24", features = ["v1_31"], optional = true } +prometheus-client = { version = "0.23", optional = true } +axum = { version = "0.8", optional = true } +futures = { version = "0.3", optional = true } + +[features] +agent = ["kube", "k8s-openapi", "prometheus-client", "axum", "futures"] + [dev-dependencies] tempfile = "3" serial_test = "3" @@ -35,6 +45,11 @@ path = "src/bin/reaper-runtime/main.rs" name = "containerd-shim-reaper-v2" path = "src/bin/containerd-shim-reaper-v2/main.rs" +[[bin]] +name = "reaper-agent" +path = "src/bin/reaper-agent/main.rs" +required-features = ["agent"] + [lints.rust] unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tarpaulin_include)'] } diff --git a/Dockerfile.agent b/Dockerfile.agent new file mode 100644 index 0000000..32f28a0 --- /dev/null +++ b/Dockerfile.agent @@ -0,0 +1,27 @@ +# Multi-stage build for reaper-agent +# Produces a minimal static binary for Kubernetes DaemonSet deployment. + +# --- Builder stage --- +FROM messense/rust-musl-cross:x86_64-musl AS builder-amd64 +WORKDIR /work +COPY . . +RUN cargo build --release --features agent --bin reaper-agent --target x86_64-unknown-linux-musl + +FROM messense/rust-musl-cross:aarch64-musl AS builder-arm64 +WORKDIR /work +COPY . . +RUN cargo build --release --features agent --bin reaper-agent --target aarch64-unknown-linux-musl + +# --- Runtime stage --- +# Use distroless for a minimal image with ca-certificates (needed for K8s API TLS) +FROM gcr.io/distroless/static-debian12 + +ARG TARGETARCH +COPY --from=builder-amd64 /work/target/x86_64-unknown-linux-musl/release/reaper-agent /reaper-agent-amd64 +COPY --from=builder-arm64 /work/target/aarch64-unknown-linux-musl/release/reaper-agent /reaper-agent-arm64 + +# Select binary based on target architecture +# Note: For single-arch builds, use docker buildx with --platform +COPY --from=builder-${TARGETARCH}64 /work/target/*/release/reaper-agent /reaper-agent + +ENTRYPOINT ["/reaper-agent"] diff --git a/deploy/ansible/install-reaper.yml b/deploy/ansible/install-reaper.yml index 7ade390..94aead3 100644 --- a/deploy/ansible/install-reaper.yml +++ b/deploy/ansible/install-reaper.yml @@ -237,5 +237,6 @@ - "1. Create RuntimeClass: kubectl apply -f deploy/kubernetes/runtimeclass.yaml" - "2. Deploy test pod: kubectl apply -f deploy/kubernetes/runtimeclass.yaml" - "3. Verify: kubectl logs reaper-example" + - "4. Optional: kubectl apply -f deploy/kubernetes/reaper-agent.yaml (config sync, GC, metrics)" - "" - "To rollback: ansible-playbook -i inventory.ini deploy/ansible/rollback-reaper.yml" diff --git a/deploy/kubernetes/reaper-agent.yaml b/deploy/kubernetes/reaper-agent.yaml new file mode 100644 index 0000000..d8269e7 --- /dev/null +++ b/deploy/kubernetes/reaper-agent.yaml @@ -0,0 +1,152 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: reaper-system + labels: + app.kubernetes.io/part-of: reaper +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: reaper-config + namespace: reaper-system + labels: + app.kubernetes.io/part-of: reaper + app.kubernetes.io/component: config +data: + reaper.conf: | + # Reaper runtime configuration + # Managed by reaper-agent ConfigMap sync. + # Edit this ConfigMap to change Reaper settings on all nodes. + REAPER_DNS_MODE=host + REAPER_OVERLAY_ISOLATION=namespace + REAPER_ANNOTATIONS_ENABLED=true +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: reaper-agent + namespace: reaper-system + labels: + app.kubernetes.io/part-of: reaper + app.kubernetes.io/component: agent +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: reaper-agent + labels: + app.kubernetes.io/part-of: reaper + app.kubernetes.io/component: agent +rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "watch", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: reaper-agent + labels: + app.kubernetes.io/part-of: reaper + app.kubernetes.io/component: agent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: reaper-agent +subjects: + - kind: ServiceAccount + name: reaper-agent + namespace: reaper-system +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: reaper-agent + namespace: reaper-system + labels: + app.kubernetes.io/name: reaper-agent + app.kubernetes.io/part-of: reaper + app.kubernetes.io/component: agent +spec: + selector: + matchLabels: + app.kubernetes.io/name: reaper-agent + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 1 + template: + metadata: + labels: + app.kubernetes.io/name: reaper-agent + app.kubernetes.io/part-of: reaper + app.kubernetes.io/component: agent + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9100" + prometheus.io/path: "/metrics" + spec: + serviceAccountName: reaper-agent + hostPID: true + tolerations: + - operator: Exists + effect: NoSchedule + containers: + - name: agent + image: ghcr.io/miguelgila/reaper-agent:latest + imagePullPolicy: IfNotPresent + securityContext: + runAsUser: 0 + args: + - --config-namespace=reaper-system + - --config-name=reaper-config + - --config-path=/host/etc/reaper/reaper.conf + - --state-dir=/host/run/reaper + - --shim-path=/host/usr/local/bin/containerd-shim-reaper-v2 + - --runtime-path=/host/usr/local/bin/reaper-runtime + ports: + - containerPort: 9100 + name: metrics + protocol: TCP + livenessProbe: + httpGet: + path: /healthz + port: metrics + initialDelaySeconds: 10 + periodSeconds: 30 + readinessProbe: + httpGet: + path: /readyz + port: metrics + initialDelaySeconds: 5 + periodSeconds: 10 + resources: + requests: + cpu: 10m + memory: 32Mi + limits: + cpu: 100m + memory: 64Mi + volumeMounts: + - name: etc-reaper + mountPath: /host/etc/reaper + - name: run-reaper + mountPath: /host/run/reaper + - name: usr-local-bin + mountPath: /host/usr/local/bin + readOnly: true + volumes: + - name: etc-reaper + hostPath: + path: /etc/reaper + type: DirectoryOrCreate + - name: run-reaper + hostPath: + path: /run/reaper + type: DirectoryOrCreate + - name: usr-local-bin + hostPath: + path: /usr/local/bin + type: Directory diff --git a/docs/BUGS.md b/docs/BUGS.md new file mode 100644 index 0000000..cf124e8 --- /dev/null +++ b/docs/BUGS.md @@ -0,0 +1,43 @@ +# Known Bugs and Flaky Tests + +## DNS Mode Annotation Override Test Flake + +**Test:** `DNS mode annotation override (host vs kubernetes)` +**Severity:** Low (intermittent, CI-only) +**Status:** Open + +### Symptoms + +The test times out (64s) waiting for the `reaper-dns-annot-default` or +`reaper-dns-annot-host` pod to reach `Succeeded` phase. The pod gets stuck +and containerd reports: + +``` +failed to stop sandbox: task must be stopped before deletion: running: failed precondition +``` + +### Root Cause + +A timing race in containerd's sandbox lifecycle. When the shim reports the +container has exited, containerd sometimes tries to delete the task before +it has fully transitioned out of the `running` state. This causes a +`failed precondition` error that prevents sandbox teardown, leaving the pod +stuck. + +This is a containerd-level issue, not a Reaper bug. It tends to surface +under load (e.g., when many pods are created/deleted in quick succession +during the integration test suite). + +### Workarounds + +- Re-running the test suite usually passes on retry. +- The `--agent-only` flag skips this test entirely for fast agent iteration. +- Running with `--no-cleanup` and re-running `--skip-cargo --no-cleanup` + often avoids the race since the cluster is warmer. + +### Related + +- Observed in Kind clusters with containerd v1.7+. +- The `Combined annotations` test exercises similar annotation logic and + passes reliably, suggesting the issue is timing-related rather than + functional. diff --git a/docs/TODO.md b/docs/TODO.md index 2675928..a1e245d 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -19,4 +19,9 @@ List of tasks to do, not ordered in any specific way. - [x] Add certain configuration parameters as annotations, so users can influence how Reaper works (DNS, overlay name and mount point, etc.). But ensuring adminsistrator parameters cannot be overriden. - [ ] Introduce more complex examples, answer this question: can we have a sssd containerd pod expose its socks file so a sample reaper pod can utilize it? - [ ] Produce RPM an DEB packages compatible with major distributions (SUSE, RHEL, Debian, Ubuntu). This will help with installation and deployment. -- [ ] Evaluate if Reaper can be configured using a Kubernetes ConfigMap instead of relying on a node-level config file. \ No newline at end of file +- [x] Evaluate if Reaper can be configured using a Kubernetes ConfigMap instead of relying on a node-level config file. (Implemented via `reaper-agent` DaemonSet — PR #27) +- [ ] reaper-agent Phase 2: Overlay GC — reconcile overlay namespaces against Kubernetes API, delete overlays for namespaces that no longer exist +- [ ] reaper-agent Phase 2: Binary self-update — watch ConfigMap version field, download and replace shim/runtime binaries +- [ ] reaper-agent Phase 2: Node condition reporting — patch Node object with `ReaperReady` condition +- [ ] reaper-agent Phase 2: Mount namespace cleanup — detect and unmount stale `/run/reaper/ns/*` bind-mounts +- [ ] Fix known bugs documented in [docs/BUGS.md](BUGS.md) \ No newline at end of file diff --git a/scripts/build-agent-image.sh b/scripts/build-agent-image.sh new file mode 100755 index 0000000..b4efd22 --- /dev/null +++ b/scripts/build-agent-image.sh @@ -0,0 +1,188 @@ +#!/usr/bin/env bash +# build-agent-image.sh — Build reaper-agent container image and load into Kind +# +# This script builds the reaper-agent binary using musl cross-compilation +# (same as shim/runtime), packages it into a minimal container image, +# and loads it into a Kind cluster. +# +# Usage: +# ./scripts/build-agent-image.sh --cluster-name +# ./scripts/build-agent-image.sh --cluster-name --quiet +# ./scripts/build-agent-image.sh --cluster-name --skip-build +# +# Prerequisites: +# - Docker running +# - kind cluster already created +# - Run from the repository root + +set -euo pipefail + +CLUSTER_NAME="" +SKIP_BUILD=false +QUIET=false +IMAGE_NAME="ghcr.io/miguelgila/reaper-agent:latest" +LOG_FILE="/tmp/reaper-agent-build.log" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +# --------------------------------------------------------------------------- +# Colors +# --------------------------------------------------------------------------- +if [[ -n "${NO_COLOR:-}" ]]; then + B="" G="" Y="" C="" R="" +elif [[ -t 1 ]] || [[ -n "${CI:-}" ]]; then + B=$'\033[1m' G=$'\033[1;32m' Y=$'\033[1;33m' C=$'\033[1;36m' R=$'\033[0m' +else + B="" G="" Y="" C="" R="" +fi + +info() { echo "${C}==> ${R}${B}$*${R}"; } +ok() { echo " ${G}OK${R} $*"; } +fail() { echo " ${Y}ERR${R} $*" >&2; exit 1; } + +if_log() { + if $QUIET; then cat >> "$LOG_FILE"; else cat; fi +} + +# --------------------------------------------------------------------------- +# Argument parsing +# --------------------------------------------------------------------------- +while [[ $# -gt 0 ]]; do + case $1 in + --cluster-name) + CLUSTER_NAME="${2:-}" + [[ -z "$CLUSTER_NAME" ]] && fail "--cluster-name requires a value" + shift 2 + ;; + --skip-build) + SKIP_BUILD=true + shift + ;; + --quiet) + QUIET=true + shift + ;; + --image) + IMAGE_NAME="${2:-}" + [[ -z "$IMAGE_NAME" ]] && fail "--image requires a value" + shift 2 + ;; + -h|--help) + echo "Usage: $0 --cluster-name [OPTIONS]" + echo "" + echo "Build reaper-agent image and load into Kind cluster." + echo "" + echo "Options:" + echo " --cluster-name Kind cluster name (required)" + echo " --skip-build Skip binary compilation (use existing)" + echo " --image Image name (default: $IMAGE_NAME)" + echo " --quiet Suppress output" + exit 0 + ;; + *) + fail "Unknown option: $1" + ;; + esac +done + +[[ -z "$CLUSTER_NAME" ]] && fail "--cluster-name is required" + +# --------------------------------------------------------------------------- +# Detect architecture +# --------------------------------------------------------------------------- +NODE_ID=$(docker ps --filter "name=${CLUSTER_NAME}-control-plane" --format '{{.ID}}') +[[ -z "$NODE_ID" ]] && fail "Cannot find control-plane container for cluster '$CLUSTER_NAME'" + +NODE_ARCH=$(docker exec "$NODE_ID" uname -m 2>&1) || fail "Cannot detect node architecture" + +case "$NODE_ARCH" in + aarch64) + TARGET_TRIPLE="aarch64-unknown-linux-musl" + MUSL_IMAGE="messense/rust-musl-cross:aarch64-musl" + ;; + x86_64) + TARGET_TRIPLE="x86_64-unknown-linux-musl" + MUSL_IMAGE="messense/rust-musl-cross:x86_64-musl" + ;; + *) + fail "Unsupported architecture: $NODE_ARCH" + ;; +esac + +# --------------------------------------------------------------------------- +# Build agent binary +# --------------------------------------------------------------------------- +cd "$REPO_ROOT" + +if ! $SKIP_BUILD; then + info "Building reaper-agent for $TARGET_TRIPLE" | if_log + + if $QUIET; then + docker run --rm \ + -v "$(pwd)":/work \ + -w /work \ + "$MUSL_IMAGE" \ + cargo build --release --features agent \ + --bin reaper-agent \ + --target "$TARGET_TRIPLE" \ + >> "$LOG_FILE" 2>&1 || fail "Agent build failed. See $LOG_FILE" + else + docker run --rm \ + -v "$(pwd)":/work \ + -w /work \ + "$MUSL_IMAGE" \ + cargo build --release --features agent \ + --bin reaper-agent \ + --target "$TARGET_TRIPLE" \ + 2>&1 | tee -a "$LOG_FILE" || fail "Agent build failed. See $LOG_FILE" + fi + + ok "Agent binary built." | if_log +fi + +AGENT_BINARY="target/$TARGET_TRIPLE/release/reaper-agent" +[[ -f "$AGENT_BINARY" ]] || fail "Agent binary not found at $AGENT_BINARY" + +# --------------------------------------------------------------------------- +# Build minimal container image using inline Dockerfile +# --------------------------------------------------------------------------- +info "Building container image $IMAGE_NAME" | if_log + +# Use a temporary Dockerfile that just copies the pre-built binary +TEMP_DOCKERFILE=$(mktemp /tmp/Dockerfile.agent-XXXXXX) +trap "rm -f '$TEMP_DOCKERFILE'" EXIT + +cat > "$TEMP_DOCKERFILE" <<'DOCKERFILE' +FROM gcr.io/distroless/static-debian12 +COPY reaper-agent /reaper-agent +USER 0 +ENTRYPOINT ["/reaper-agent"] +DOCKERFILE + +# Copy binary to a temp context dir (Docker build context) +TEMP_CONTEXT=$(mktemp -d /tmp/reaper-agent-context-XXXXXX) +trap "rm -rf '$TEMP_CONTEXT' '$TEMP_DOCKERFILE'" EXIT + +cp "$AGENT_BINARY" "$TEMP_CONTEXT/reaper-agent" + +if $QUIET; then + docker build -f "$TEMP_DOCKERFILE" -t "$IMAGE_NAME" "$TEMP_CONTEXT" >> "$LOG_FILE" 2>&1 +else + docker build -f "$TEMP_DOCKERFILE" -t "$IMAGE_NAME" "$TEMP_CONTEXT" 2>&1 | tee -a "$LOG_FILE" +fi + +ok "Image built: $IMAGE_NAME" | if_log + +# --------------------------------------------------------------------------- +# Load image into Kind +# --------------------------------------------------------------------------- +info "Loading image into Kind cluster '$CLUSTER_NAME'" | if_log + +if $QUIET; then + kind load docker-image "$IMAGE_NAME" --name "$CLUSTER_NAME" >> "$LOG_FILE" 2>&1 +else + kind load docker-image "$IMAGE_NAME" --name "$CLUSTER_NAME" 2>&1 | tee -a "$LOG_FILE" +fi + +ok "Image loaded into Kind." | if_log diff --git a/scripts/install-reaper.sh b/scripts/install-reaper.sh index c3ecbd4..107eb45 100755 --- a/scripts/install-reaper.sh +++ b/scripts/install-reaper.sh @@ -41,6 +41,7 @@ RELEASE_VERSION="" VERBOSE=false DRY_RUN=false SKIP_RUNTIMECLASS=false +WITH_AGENT=false # --------------------------------------------------------------------------- # Color setup @@ -81,6 +82,7 @@ Modes: Options: --release Download pre-built binaries from GitHub Releases (e.g., v0.2.0) + --with-agent Deploy reaper-agent DaemonSet (config sync, GC, metrics) --verbose Enable verbose Ansible output (-vv) --dry-run Ansible check mode (no changes) --skip-runtimeclass Skip RuntimeClass creation @@ -150,6 +152,10 @@ parse_args() { SKIP_RUNTIMECLASS=true shift ;; + --with-agent) + WITH_AGENT=true + shift + ;; -h|--help) usage exit 0 @@ -412,6 +418,11 @@ run_ansible_playbook() { if ! $SKIP_RUNTIMECLASS && ! $DRY_RUN; then create_runtimeclass fi + + # Deploy reaper-agent DaemonSet + if $WITH_AGENT && ! $DRY_RUN; then + deploy_agent + fi } # --------------------------------------------------------------------------- @@ -431,6 +442,31 @@ create_runtimeclass() { fi } +# --------------------------------------------------------------------------- +# Deploy reaper-agent DaemonSet +# --------------------------------------------------------------------------- +deploy_agent() { + if ! command -v kubectl &>/dev/null; then + log_warn "kubectl not found, skipping reaper-agent deployment" + return 0 + fi + + log_info "Deploying reaper-agent DaemonSet..." + if kubectl apply -f "$PROJECT_ROOT/deploy/kubernetes/reaper-agent.yaml"; then + log_success "reaper-agent DaemonSet deployed" + + # Wait for agent pods to be ready + log_info "Waiting for reaper-agent pods to be ready..." + if kubectl rollout status daemonset/reaper-agent -n reaper-system --timeout=120s; then + log_success "reaper-agent pods ready" + else + log_warn "reaper-agent rollout timed out (pods may still be starting)" + fi + else + log_warn "reaper-agent deployment failed" + fi +} + # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- @@ -465,6 +501,9 @@ main() { log_info " 1. Verify: kubectl get runtimeclass reaper-v2" log_info " 2. Test: kubectl apply -f deploy/kubernetes/runtimeclass.yaml" log_info " 3. Check logs: kubectl logs reaper-example" + if ! $WITH_AGENT; then + log_info " 4. Optional: re-run with --with-agent to deploy reaper-agent DaemonSet" + fi } main "$@" diff --git a/scripts/lib/test-integration-suite.sh b/scripts/lib/test-integration-suite.sh index aca698f..a1efcc9 100644 --- a/scripts/lib/test-integration-suite.sh +++ b/scripts/lib/test-integration-suite.sh @@ -1985,6 +1985,217 @@ YAML log_verbose "Unknown annotations silently ignored, known annotations applied correctly" } +# --------------------------------------------------------------------------- +# reaper-agent integration tests +# These require the reaper-agent image to be loaded into the Kind cluster. +# Skipped if image is not available. +# --------------------------------------------------------------------------- + +test_agent_deployment() { + # Deploy agent manifests + kubectl apply -f deploy/kubernetes/reaper-agent.yaml >> "$LOG_FILE" 2>&1 + + # Wait for agent DaemonSet rollout + if ! kubectl rollout status daemonset/reaper-agent -n reaper-system --timeout=120s >> "$LOG_FILE" 2>&1; then + log_error "reaper-agent DaemonSet rollout failed" + kubectl describe daemonset reaper-agent -n reaper-system >> "$LOG_FILE" 2>&1 || true + kubectl get pods -n reaper-system >> "$LOG_FILE" 2>&1 || true + return 1 + fi + + # Verify at least one agent pod is running + local running_pods + running_pods=$(kubectl get pods -n reaper-system -l app.kubernetes.io/name=reaper-agent \ + --field-selector=status.phase=Running --no-headers 2>/dev/null | wc -l | tr -d ' ') + if [[ "$running_pods" -lt 1 ]]; then + log_error "Expected at least 1 running reaper-agent pod, got $running_pods" + return 1 + fi + + log_verbose "reaper-agent DaemonSet deployed: $running_pods pod(s) running" +} + +test_agent_config_sync() { + # Update the ConfigMap with a test value + kubectl apply -f - >> "$LOG_FILE" 2>&1 <<'YAML' +apiVersion: v1 +kind: ConfigMap +metadata: + name: reaper-config + namespace: reaper-system +data: + reaper.conf: | + # Integration test config + REAPER_DNS_MODE=kubernetes + REAPER_OVERLAY_ISOLATION=namespace + REAPER_TEST_MARKER=agent-sync-test +YAML + + # Give the agent time to detect and sync the change + sleep 10 + + # Verify the config file was written to the node + local config_content + config_content=$(docker exec "$NODE_ID" cat /etc/reaper/reaper.conf 2>/dev/null || echo "") + + if [[ -z "$config_content" ]]; then + log_error "Config file /etc/reaper/reaper.conf not found on node" + return 1 + fi + + if ! echo "$config_content" | grep -q "REAPER_TEST_MARKER=agent-sync-test"; then + log_error "Config file does not contain expected test marker" + log_error "Actual content: $config_content" + return 1 + fi + + log_verbose "Config sync verified: test marker found in /etc/reaper/reaper.conf" +} + +test_agent_healthz() { + # Get the agent pod name + local agent_pod + agent_pod=$(kubectl get pods -n reaper-system -l app.kubernetes.io/name=reaper-agent \ + -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) + + if [[ -z "$agent_pod" ]]; then + log_error "No reaper-agent pod found" + return 1 + fi + + # Use port-forward to reach the endpoint (distroless container has no shell/wget) + local local_port=19100 + kubectl port-forward -n reaper-system "$agent_pod" ${local_port}:9100 >> "$LOG_FILE" 2>&1 & + local pf_pid=$! + sleep 2 + + local health_response + health_response=$(curl -sf http://localhost:${local_port}/healthz 2>/dev/null || echo "FAILED") + + kill "$pf_pid" 2>/dev/null || true + wait "$pf_pid" 2>/dev/null || true + + if [[ "$health_response" != "ok" ]]; then + log_error "healthz endpoint returned unexpected response: $health_response" + return 1 + fi + + log_verbose "healthz endpoint returned 'ok'" +} + +test_agent_metrics() { + local agent_pod + agent_pod=$(kubectl get pods -n reaper-system -l app.kubernetes.io/name=reaper-agent \ + -o jsonpath='{.items[0].metadata.name}' 2>/dev/null) + + if [[ -z "$agent_pod" ]]; then + log_error "No reaper-agent pod found" + return 1 + fi + + # Use port-forward to reach the endpoint (distroless container has no shell/wget) + local local_port=19101 + kubectl port-forward -n reaper-system "$agent_pod" ${local_port}:9100 >> "$LOG_FILE" 2>&1 & + local pf_pid=$! + sleep 2 + + local metrics_response + metrics_response=$(curl -sf http://localhost:${local_port}/metrics 2>/dev/null || echo "FAILED") + + kill "$pf_pid" 2>/dev/null || true + wait "$pf_pid" 2>/dev/null || true + + if [[ "$metrics_response" == "FAILED" ]]; then + log_error "metrics endpoint not reachable" + return 1 + fi + + # Verify key metrics are present + local missing=() + for metric in reaper_containers_running reaper_agent_gc_runs_total reaper_agent_healthy reaper_agent_config_syncs_total; do + if ! echo "$metrics_response" | grep -q "$metric"; then + missing+=("$metric") + fi + done + + if [[ ${#missing[@]} -gt 0 ]]; then + log_error "Missing expected metrics: ${missing[*]}" + log_error "Metrics output: $metrics_response" + return 1 + fi + + log_verbose "metrics endpoint verified: all expected metrics present" +} + +test_agent_stale_gc() { + # Create a fake stale state directory on the node + docker exec "$NODE_ID" mkdir -p /run/reaper/stale-gc-test >> "$LOG_FILE" 2>&1 + docker exec "$NODE_ID" bash -c 'cat > /run/reaper/stale-gc-test/state.json << EOF +{ + "id": "stale-gc-test", + "bundle": "/tmp/fake", + "status": "running", + "pid": 999999 +} +EOF' >> "$LOG_FILE" 2>&1 + + # Wait for the next GC cycle (default 60s, but initial GC runs on startup too) + # The agent should detect pid 999999 as dead and mark it stopped + log_verbose "Waiting for GC cycle to detect stale PID..." + local max_wait=90 + local elapsed=0 + while [[ $elapsed -lt $max_wait ]]; do + local state_status + state_status=$(docker exec "$NODE_ID" cat /run/reaper/stale-gc-test/state.json 2>/dev/null \ + | grep -o '"status"[[:space:]]*:[[:space:]]*"[^"]*"' | head -1 || echo "") + if echo "$state_status" | grep -q '"stopped"'; then + log_verbose "GC correctly marked stale container as stopped" + return 0 + fi + sleep 5 + elapsed=$((elapsed + 5)) + done + + log_error "GC did not mark stale container as stopped within ${max_wait}s" + docker exec "$NODE_ID" cat /run/reaper/stale-gc-test/state.json >> "$LOG_FILE" 2>&1 || true + return 1 +} + +cleanup_agent() { + kubectl delete -f deploy/kubernetes/reaper-agent.yaml --ignore-not-found >> "$LOG_FILE" 2>&1 || true + docker exec "$NODE_ID" rm -rf /run/reaper/stale-gc-test >> "$LOG_FILE" 2>&1 || true +} + +# --------------------------------------------------------------------------- +# Phase 4a: reaper-agent tests (optional, requires agent image in cluster) +# --------------------------------------------------------------------------- +phase_agent_tests() { + log_status "" + log_status "${CLR_PHASE}Phase 4a: reaper-agent tests${CLR_RESET}" + log_status "========================================" + + # Verify agent image is available in the cluster (loaded during Phase 2 setup) + local image_loaded + image_loaded=$(docker exec "$NODE_ID" crictl images 2>/dev/null \ + | grep -c "reaper-agent" || true) + + if [[ "$image_loaded" -lt 1 ]]; then + log_error "reaper-agent image not found in Kind cluster" + log_error "This should have been built and loaded during Phase 2 setup." + log_error "Check build-agent-image.sh output in the log file." + return 1 + fi + + run_test test_agent_deployment "Agent DaemonSet deployment" --hard-fail + run_test test_agent_config_sync "Agent ConfigMap sync to host" --hard-fail + run_test test_agent_healthz "Agent /healthz endpoint" --hard-fail + run_test test_agent_metrics "Agent /metrics endpoint" --hard-fail + run_test test_agent_stale_gc "Agent stale state GC" --hard-fail + + # Cleanup agent resources + cleanup_agent +} + # --------------------------------------------------------------------------- # Phase 4: Integration test orchestrator # --------------------------------------------------------------------------- diff --git a/scripts/lib/test-phases.sh b/scripts/lib/test-phases.sh index 45a88fe..0ee09ff 100644 --- a/scripts/lib/test-phases.sh +++ b/scripts/lib/test-phases.sh @@ -49,9 +49,27 @@ phase_setup() { exit 1 } + # Set dedicated KUBECONFIG so all kubectl commands target the right cluster, + # even when the user has other Kind clusters or contexts active. + KUBECONFIG_FILE="/tmp/reaper-${CLUSTER_NAME}-kubeconfig" + kind get kubeconfig --name "$CLUSTER_NAME" > "$KUBECONFIG_FILE" + export KUBECONFIG="$KUBECONFIG_FILE" + log_status "Using KUBECONFIG=$KUBECONFIG_FILE" + # Capture NODE_ID for diagnostics (used by cleanup trap and test functions) NODE_ID=$(docker ps --filter "name=${CLUSTER_NAME}-control-plane" --format '{{.ID}}') + # Build and load reaper-agent image (required for Phase 4a tests) + log_status "Building reaper-agent image for Kind..." + "$SCRIPT_DIR/build-agent-image.sh" \ + --cluster-name "$CLUSTER_NAME" \ + --quiet 2>&1 | tee -a "$LOG_FILE" || { + log_error "reaper-agent image build failed" + tail -50 "$LOG_FILE" >&2 + exit 1 + } + log_status "reaper-agent image loaded into Kind." + log_status "Infrastructure setup complete." ci_group_end } diff --git a/scripts/run-integration-tests.sh b/scripts/run-integration-tests.sh index ab0bb12..8f30a8e 100755 --- a/scripts/run-integration-tests.sh +++ b/scripts/run-integration-tests.sh @@ -7,6 +7,7 @@ # ./scripts/run-integration-tests.sh --skip-cargo # Skip Rust cargo tests # ./scripts/run-integration-tests.sh --no-cleanup # Keep kind cluster after run # ./scripts/run-integration-tests.sh --verbose # Print verbose output to stdout too +# ./scripts/run-integration-tests.sh --agent-only # Only run agent tests (fast iteration) set -euo pipefail @@ -32,6 +33,7 @@ TESTS_WARNED=0 SKIP_CARGO=false NO_CLEANUP=false VERBOSE=false +AGENT_ONLY=false # --------------------------------------------------------------------------- # Argument parsing @@ -41,16 +43,18 @@ while [[ $# -gt 0 ]]; do --skip-cargo) SKIP_CARGO=true; shift ;; --no-cleanup) NO_CLEANUP=true; shift ;; --verbose) VERBOSE=true; shift ;; + --agent-only) AGENT_ONLY=true; SKIP_CARGO=true; shift ;; -h|--help) - echo "Usage: $0 [--skip-cargo] [--no-cleanup] [--verbose]" + echo "Usage: $0 [--skip-cargo] [--no-cleanup] [--verbose] [--agent-only]" echo " --skip-cargo Skip Rust cargo tests (for quick K8s-only reruns)" echo " --no-cleanup Keep kind cluster after run" echo " --verbose Also print verbose output to stdout" + echo " --agent-only Only run agent tests (skip cargo + integration tests)" exit 0 ;; *) echo "Unknown option: $1" >&2 - echo "Usage: $0 [--skip-cargo] [--no-cleanup] [--verbose]" >&2 + echo "Usage: $0 [--skip-cargo] [--no-cleanup] [--verbose] [--agent-only]" >&2 exit 1 ;; esac @@ -89,6 +93,8 @@ cleanup() { log_status "Deleting kind cluster $CLUSTER_NAME..." kind delete cluster --name "$CLUSTER_NAME" >> "$LOG_FILE" 2>&1 || true fi + # Clean up dedicated kubeconfig + rm -f "/tmp/reaper-${CLUSTER_NAME}-kubeconfig" exit "$exit_code" } trap cleanup EXIT @@ -111,7 +117,14 @@ main() { phase_setup phase_readiness - phase_integration_tests + + if ! $AGENT_ONLY; then + phase_integration_tests + else + log_status "Skipping integration tests (--agent-only)." + fi + + phase_agent_tests phase_summary } diff --git a/scripts/setup-playground.sh b/scripts/setup-playground.sh index df1099c..87cc4d2 100755 --- a/scripts/setup-playground.sh +++ b/scripts/setup-playground.sh @@ -213,6 +213,14 @@ else ok "Cluster created." | if_log fi +# Export a dedicated KUBECONFIG so all kubectl commands in this session +# (and child processes) target the right cluster, even if the user has +# other Kind clusters or contexts active. +KUBECONFIG_FILE="/tmp/reaper-${CLUSTER_NAME}-kubeconfig" +kind get kubeconfig --name "$CLUSTER_NAME" > "$KUBECONFIG_FILE" +export KUBECONFIG="$KUBECONFIG_FILE" +info "Using KUBECONFIG=$KUBECONFIG_FILE" | if_log + # --------------------------------------------------------------------------- # Resolve "latest" release version # --------------------------------------------------------------------------- diff --git a/src/bin/reaper-agent/config_sync.rs b/src/bin/reaper-agent/config_sync.rs new file mode 100644 index 0000000..9811087 --- /dev/null +++ b/src/bin/reaper-agent/config_sync.rs @@ -0,0 +1,126 @@ +use anyhow::{Context, Result}; +use futures::TryStreamExt; +use k8s_openapi::api::core::v1::ConfigMap; +use kube::{ + api::Api, + runtime::{ + watcher::{self}, + WatchStreamExt, + }, + Client, +}; +use std::fs; +use std::path::Path; +use std::pin::pin; +use tracing::{error, info, warn}; + +use crate::metrics::MetricsState; + +/// Key within the ConfigMap that holds the config file contents. +const CONFIG_KEY: &str = "reaper.conf"; + +/// Write config content to disk atomically (write tmp + rename). +fn atomic_write(path: &str, content: &str) -> Result<()> { + let target = Path::new(path); + + // Ensure parent directory exists + if let Some(parent) = target.parent() { + fs::create_dir_all(parent).with_context(|| format!("creating config dir {:?}", parent))?; + } + + let tmp_path = format!("{}.tmp", path); + fs::write(&tmp_path, content) + .with_context(|| format!("writing temp config to {}", tmp_path))?; + fs::rename(&tmp_path, path).with_context(|| format!("renaming {} to {}", tmp_path, path))?; + + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + fs::set_permissions(path, fs::Permissions::from_mode(0o644)) + .with_context(|| format!("setting permissions on {}", path))?; + } + + Ok(()) +} + +/// Extract config content from a ConfigMap and write it to disk. +fn sync_configmap(cm: &ConfigMap, config_path: &str) -> Result { + let data = match &cm.data { + Some(d) => d, + None => { + warn!("ConfigMap has no data section, skipping sync"); + return Ok(false); + } + }; + + let content = match data.get(CONFIG_KEY) { + Some(c) => c, + None => { + warn!( + key = CONFIG_KEY, + "ConfigMap missing expected key, skipping sync" + ); + return Ok(false); + } + }; + + atomic_write(config_path, content)?; + info!(path = config_path, "config file updated from ConfigMap"); + Ok(true) +} + +/// Run the config sync loop: watch a ConfigMap and write changes to host. +/// +/// Falls back gracefully on API errors — never deletes an existing config file. +pub async fn config_sync_loop( + namespace: &str, + name: &str, + config_path: &str, + metrics: &MetricsState, +) -> Result<()> { + let client = match Client::try_default().await { + Ok(c) => c, + Err(e) => { + error!(error = %e, "failed to create Kubernetes client, config sync disabled"); + return Err(e.into()); + } + }; + + let api: Api = Api::namespaced(client, namespace); + + // Initial sync: try to read the ConfigMap once + match api.get(name).await { + Ok(cm) => { + if sync_configmap(&cm, config_path)? { + metrics.inc_config_syncs(); + } + } + Err(e) => { + warn!(error = %e, "initial ConfigMap read failed, keeping existing config"); + } + } + + // Watch for changes + let watcher_config = watcher::Config::default().fields(&format!("metadata.name={}", name)); + let stream = watcher::watcher(api, watcher_config).applied_objects(); + let mut stream = pin!(stream); + + info!( + namespace = namespace, + name = name, + "watching ConfigMap for changes" + ); + + while let Some(cm) = stream.try_next().await? { + match sync_configmap(&cm, config_path) { + Ok(true) => metrics.inc_config_syncs(), + Ok(false) => {} + Err(e) => { + error!(error = %e, "failed to sync config from ConfigMap"); + } + } + } + + warn!("ConfigMap watch stream ended unexpectedly"); + Ok(()) +} diff --git a/src/bin/reaper-agent/gc.rs b/src/bin/reaper-agent/gc.rs new file mode 100644 index 0000000..4e5b3f6 --- /dev/null +++ b/src/bin/reaper-agent/gc.rs @@ -0,0 +1,164 @@ +use serde::Deserialize; +use std::fs; +use std::path::Path; +use tracing::{debug, info, warn}; + +use crate::metrics::MetricsState; + +/// Minimal deserialization of state.json — only the fields GC needs. +#[derive(Debug, Deserialize)] +pub struct ContainerStateMinimal { + pub id: String, + pub status: String, + pub pid: Option, + #[serde(default)] + #[allow(dead_code)] + pub exit_code: Option, +} + +/// Check whether a PID is still alive using kill(pid, 0). +fn is_pid_alive(pid: i32) -> bool { + use nix::sys::signal; + use nix::unistd::Pid; + // Signal 0 doesn't send a signal, just checks if process exists + signal::kill(Pid::from_raw(pid), None).is_ok() +} + +/// Run a single GC pass: scan state dirs, detect dead PIDs, update state files. +/// +/// Returns (running, stopped, cleaned) counts for metrics. +pub async fn run_gc(state_dir: &str, metrics: &MetricsState) { + let base = Path::new(state_dir); + if !base.exists() { + debug!( + path = state_dir, + "state directory does not exist, skipping GC" + ); + return; + } + + let entries = match fs::read_dir(base) { + Ok(e) => e, + Err(e) => { + warn!(error = %e, path = state_dir, "failed to read state directory"); + return; + } + }; + + let mut running = 0u64; + let mut stopped = 0u64; + let mut created = 0u64; + let mut cleaned = 0u64; + + for entry in entries.flatten() { + let path = entry.path(); + if !path.is_dir() { + continue; + } + + let state_file = path.join("state.json"); + if !state_file.exists() { + // Directory with no state.json — orphaned, clean up + debug!(dir = ?path, "removing orphaned state directory (no state.json)"); + if let Err(e) = fs::remove_dir_all(&path) { + warn!(error = %e, dir = ?path, "failed to remove orphaned directory"); + } else { + cleaned += 1; + } + continue; + } + + let data = match fs::read(&state_file) { + Ok(d) => d, + Err(e) => { + warn!(error = %e, file = ?state_file, "failed to read state file"); + continue; + } + }; + + let state: ContainerStateMinimal = match serde_json::from_slice(&data) { + Ok(s) => s, + Err(e) => { + warn!(error = %e, file = ?state_file, "failed to parse state file, removing"); + if let Err(e) = fs::remove_dir_all(&path) { + warn!(error = %e, dir = ?path, "failed to remove corrupted state dir"); + } else { + cleaned += 1; + } + continue; + } + }; + + match state.status.as_str() { + "running" => { + if let Some(pid) = state.pid { + if is_pid_alive(pid) { + running += 1; + } else { + // Process is gone — mark as stopped + info!( + id = state.id, + pid = pid, + "detected dead process, marking as stopped" + ); + if let Err(e) = mark_stopped(&state_file, &data) { + warn!(error = %e, id = state.id, "failed to update state to stopped"); + } + stopped += 1; + } + } else { + // Running with no PID — shouldn't happen, count as stopped + warn!( + id = state.id, + "running container with no PID, marking as stopped" + ); + if let Err(e) = mark_stopped(&state_file, &data) { + warn!(error = %e, id = state.id, "failed to update state to stopped"); + } + stopped += 1; + } + } + "stopped" => stopped += 1, + "created" => created += 1, + other => { + debug!(id = state.id, status = other, "unknown container status"); + } + } + } + + metrics.set_containers(created, running, stopped); + metrics.inc_gc_runs(); + + info!( + running = running, + stopped = stopped, + created = created, + cleaned = cleaned, + "GC scan complete" + ); +} + +/// Update a state file to mark the container as stopped with exit_code -1. +/// Uses serde_json::Value to preserve all existing fields. +fn mark_stopped(state_file: &Path, data: &[u8]) -> anyhow::Result<()> { + let mut value: serde_json::Value = serde_json::from_slice(data)?; + if let Some(obj) = value.as_object_mut() { + obj.insert( + "status".to_string(), + serde_json::Value::String("stopped".to_string()), + ); + obj.insert("exit_code".to_string(), serde_json::json!(-1)); + } + let updated = serde_json::to_vec_pretty(&value)?; + fs::write(state_file, updated)?; + Ok(()) +} + +/// Run GC in a loop at the configured interval. +pub async fn gc_loop(state_dir: &str, interval_secs: u64, metrics: &MetricsState) { + let interval = tokio::time::Duration::from_secs(interval_secs); + loop { + tokio::time::sleep(interval).await; + run_gc(state_dir, metrics).await; + } +} diff --git a/src/bin/reaper-agent/health.rs b/src/bin/reaper-agent/health.rs new file mode 100644 index 0000000..9ebf191 --- /dev/null +++ b/src/bin/reaper-agent/health.rs @@ -0,0 +1,95 @@ +use std::path::Path; +use tracing::{debug, warn}; + +use crate::metrics::MetricsState; + +/// Result of a health check cycle. +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct HealthResult { + pub healthy: bool, + pub shim_present: bool, + pub runtime_present: bool, + pub state_dir_accessible: bool, + pub details: Vec, +} + +/// Run health checks against the host filesystem (via hostPath mounts). +pub fn check_health(shim_path: &str, runtime_path: &str, state_dir: &str) -> HealthResult { + let mut details = Vec::new(); + + let shim_present = check_binary(shim_path, "containerd-shim-reaper-v2", &mut details); + let runtime_present = check_binary(runtime_path, "reaper-runtime", &mut details); + let state_dir_accessible = check_directory(state_dir, "state directory", &mut details); + + let healthy = shim_present && runtime_present && state_dir_accessible; + + HealthResult { + healthy, + shim_present, + runtime_present, + state_dir_accessible, + details, + } +} + +fn check_binary(path: &str, name: &str, details: &mut Vec) -> bool { + let p = Path::new(path); + if !p.exists() { + let msg = format!("{} not found at {}", name, path); + warn!(msg); + details.push(msg); + return false; + } + + // Check executable permission on Unix + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + if let Ok(meta) = p.metadata() { + let mode = meta.permissions().mode(); + if mode & 0o111 == 0 { + let msg = format!("{} at {} is not executable", name, path); + warn!(msg); + details.push(msg); + return false; + } + } + } + + debug!(path = path, name = name, "binary check passed"); + true +} + +fn check_directory(path: &str, name: &str, details: &mut Vec) -> bool { + let p = Path::new(path); + if !p.exists() { + let msg = format!("{} does not exist at {}", name, path); + warn!(msg); + details.push(msg); + return false; + } + if !p.is_dir() { + let msg = format!("{} at {} is not a directory", name, path); + warn!(msg); + details.push(msg); + return false; + } + debug!(path = path, name = name, "directory check passed"); + true +} + +/// Periodic health check loop (runs every 30s). +pub async fn health_loop( + shim_path: &str, + runtime_path: &str, + state_dir: &str, + metrics: &MetricsState, +) { + let interval = tokio::time::Duration::from_secs(30); + loop { + tokio::time::sleep(interval).await; + let result = check_health(shim_path, runtime_path, state_dir); + metrics.set_healthy(result.healthy); + } +} diff --git a/src/bin/reaper-agent/main.rs b/src/bin/reaper-agent/main.rs new file mode 100644 index 0000000..7cfcef1 --- /dev/null +++ b/src/bin/reaper-agent/main.rs @@ -0,0 +1,180 @@ +use clap::Parser; +use std::net::SocketAddr; +use tokio::signal; +use tracing::{error, info}; +use tracing_subscriber::EnvFilter; + +mod config_sync; +mod gc; +mod health; +mod metrics; + +// config.rs is available as shared module but not needed by the agent +// (the agent writes config files, it doesn't read them) + +fn version_string() -> &'static str { + const VERSION: &str = concat!( + env!("CARGO_PKG_VERSION"), + " (", + env!("GIT_HASH"), + " ", + env!("BUILD_DATE"), + ")" + ); + VERSION +} + +#[derive(Parser, Debug)] +#[command( + name = "reaper-agent", + version = version_string(), + about = "Per-node Reaper agent: config sync, GC, health, and metrics" +)] +struct Cli { + /// HTTP listen address for health and metrics endpoints + #[arg(long, default_value = "0.0.0.0:9100", env = "REAPER_AGENT_LISTEN")] + listen: SocketAddr, + + /// Kubernetes namespace containing the reaper-config ConfigMap + #[arg( + long, + default_value = "reaper-system", + env = "REAPER_AGENT_CONFIG_NAMESPACE" + )] + config_namespace: String, + + /// Name of the ConfigMap to watch + #[arg( + long, + default_value = "reaper-config", + env = "REAPER_AGENT_CONFIG_NAME" + )] + config_name: String, + + /// Path to write the config file on the host (via hostPath mount) + #[arg( + long, + default_value = "/host/etc/reaper/reaper.conf", + env = "REAPER_AGENT_CONFIG_PATH" + )] + config_path: String, + + /// GC scan interval in seconds + #[arg(long, default_value = "60", env = "REAPER_AGENT_GC_INTERVAL")] + gc_interval: u64, + + /// Base state directory (via hostPath mount) + #[arg( + long, + default_value = "/host/run/reaper", + env = "REAPER_AGENT_STATE_DIR" + )] + state_dir: String, + + /// Path to check for shim binary (via hostPath mount) + #[arg( + long, + default_value = "/host/usr/local/bin/containerd-shim-reaper-v2", + env = "REAPER_AGENT_SHIM_PATH" + )] + shim_path: String, + + /// Path to check for runtime binary (via hostPath mount) + #[arg( + long, + default_value = "/host/usr/local/bin/reaper-runtime", + env = "REAPER_AGENT_RUNTIME_PATH" + )] + runtime_path: String, +} + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + tracing_subscriber::fmt() + .with_env_filter( + EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")), + ) + .init(); + + let cli = Cli::parse(); + info!(version = version_string(), "reaper-agent starting"); + + // Initialize shared metrics registry + let metrics_state = metrics::MetricsState::new(); + + // Run initial GC before starting loops + info!("running initial GC scan"); + gc::run_gc(&cli.state_dir, &metrics_state).await; + + // Run initial health check + let health_result = health::check_health(&cli.shim_path, &cli.runtime_path, &cli.state_dir); + metrics_state.set_healthy(health_result.healthy); + info!(healthy = health_result.healthy, "initial health check"); + + // Spawn concurrent tasks + let gc_state_dir = cli.state_dir.clone(); + let gc_metrics = metrics_state.clone(); + let gc_interval = cli.gc_interval; + let gc_handle = tokio::spawn(async move { + gc::gc_loop(&gc_state_dir, gc_interval, &gc_metrics).await; + }); + + let health_shim = cli.shim_path.clone(); + let health_runtime = cli.runtime_path.clone(); + let health_state_dir = cli.state_dir.clone(); + let health_metrics = metrics_state.clone(); + let health_handle = tokio::spawn(async move { + health::health_loop( + &health_shim, + &health_runtime, + &health_state_dir, + &health_metrics, + ) + .await; + }); + + let sync_namespace = cli.config_namespace.clone(); + let sync_name = cli.config_name.clone(); + let sync_path = cli.config_path.clone(); + let sync_metrics = metrics_state.clone(); + let sync_handle = tokio::spawn(async move { + if let Err(e) = + config_sync::config_sync_loop(&sync_namespace, &sync_name, &sync_path, &sync_metrics) + .await + { + error!(error = %e, "config sync loop exited with error"); + } + }); + + let server_metrics = metrics_state.clone(); + let server_shim = cli.shim_path.clone(); + let server_runtime = cli.runtime_path.clone(); + let server_state_dir = cli.state_dir.clone(); + let server_handle = tokio::spawn(async move { + if let Err(e) = metrics::serve( + cli.listen, + server_metrics, + &server_shim, + &server_runtime, + &server_state_dir, + ) + .await + { + error!(error = %e, "HTTP server exited with error"); + } + }); + + info!(listen = %cli.listen, "reaper-agent running"); + + // Wait for shutdown signal + signal::ctrl_c().await?; + info!("shutdown signal received, exiting"); + + // Abort all tasks + gc_handle.abort(); + health_handle.abort(); + sync_handle.abort(); + server_handle.abort(); + + Ok(()) +} diff --git a/src/bin/reaper-agent/metrics.rs b/src/bin/reaper-agent/metrics.rs new file mode 100644 index 0000000..c5dd521 --- /dev/null +++ b/src/bin/reaper-agent/metrics.rs @@ -0,0 +1,183 @@ +use axum::{extract::State, http::StatusCode, response::IntoResponse, routing::get, Router}; +use prometheus_client::{ + encoding::text::encode, + metrics::{counter::Counter, gauge::Gauge}, + registry::Registry, +}; +use std::net::SocketAddr; +use std::sync::{Arc, Mutex}; +use tracing::info; + +use crate::health; + +/// Shared metrics state used across all agent tasks. +#[derive(Clone)] +pub struct MetricsState { + inner: Arc, +} + +struct MetricsInner { + registry: Mutex, + + // Container counts by status + containers_created: Gauge, + containers_running: Gauge, + containers_stopped: Gauge, + + // Operational counters + config_syncs_total: Counter, + gc_runs_total: Counter, + + // Health gauge + healthy: Gauge, +} + +impl MetricsState { + pub fn new() -> Self { + let mut registry = Registry::default(); + + let containers_created = Gauge::default(); + let containers_running = Gauge::default(); + let containers_stopped = Gauge::default(); + let config_syncs_total = Counter::default(); + let gc_runs_total = Counter::default(); + let healthy = Gauge::default(); + + registry.register( + "reaper_containers_created", + "Number of containers in created state", + containers_created.clone(), + ); + registry.register( + "reaper_containers_running", + "Number of containers in running state", + containers_running.clone(), + ); + registry.register( + "reaper_containers_stopped", + "Number of containers in stopped state", + containers_stopped.clone(), + ); + registry.register( + "reaper_agent_config_syncs_total", + "Total number of config file syncs from ConfigMap", + config_syncs_total.clone(), + ); + registry.register( + "reaper_agent_gc_runs_total", + "Total number of GC scan cycles", + gc_runs_total.clone(), + ); + registry.register( + "reaper_agent_healthy", + "Whether the agent considers the node healthy (1=healthy, 0=unhealthy)", + healthy.clone(), + ); + + Self { + inner: Arc::new(MetricsInner { + registry: Mutex::new(registry), + containers_created, + containers_running, + containers_stopped, + config_syncs_total, + gc_runs_total, + healthy, + }), + } + } + + pub fn set_containers(&self, created: u64, running: u64, stopped: u64) { + self.inner.containers_created.set(created as i64); + self.inner.containers_running.set(running as i64); + self.inner.containers_stopped.set(stopped as i64); + } + + pub fn inc_config_syncs(&self) { + self.inner.config_syncs_total.inc(); + } + + pub fn inc_gc_runs(&self) { + self.inner.gc_runs_total.inc(); + } + + pub fn set_healthy(&self, healthy: bool) { + self.inner.healthy.set(if healthy { 1 } else { 0 }); + } + + #[allow(dead_code)] + pub fn is_healthy(&self) -> bool { + self.inner.healthy.get() == 1 + } + + pub fn encode(&self) -> String { + let mut buf = String::new(); + let registry = self.inner.registry.lock().unwrap(); + encode(&mut buf, ®istry).unwrap(); + buf + } +} + +/// App state shared with axum handlers. +#[derive(Clone)] +struct AppState { + metrics: MetricsState, + shim_path: String, + runtime_path: String, + state_dir: String, +} + +async fn healthz_handler(State(state): State) -> impl IntoResponse { + let result = health::check_health(&state.shim_path, &state.runtime_path, &state.state_dir); + state.metrics.set_healthy(result.healthy); + + if result.healthy { + (StatusCode::OK, "ok\n") + } else { + (StatusCode::SERVICE_UNAVAILABLE, "unhealthy\n") + } +} + +async fn metrics_handler(State(state): State) -> impl IntoResponse { + let body = state.metrics.encode(); + ( + StatusCode::OK, + [( + "content-type", + "application/openmetrics-text; version=1.0.0; charset=utf-8", + )], + body, + ) +} + +async fn readyz_handler() -> impl IntoResponse { + (StatusCode::OK, "ok\n") +} + +/// Start the HTTP server for health and metrics endpoints. +pub async fn serve( + addr: SocketAddr, + metrics: MetricsState, + shim_path: &str, + runtime_path: &str, + state_dir: &str, +) -> anyhow::Result<()> { + let state = AppState { + metrics, + shim_path: shim_path.to_string(), + runtime_path: runtime_path.to_string(), + state_dir: state_dir.to_string(), + }; + + let app = Router::new() + .route("/healthz", get(healthz_handler)) + .route("/readyz", get(readyz_handler)) + .route("/metrics", get(metrics_handler)) + .with_state(state); + + info!(addr = %addr, "starting HTTP server"); + let listener = tokio::net::TcpListener::bind(addr).await?; + axum::serve(listener, app).await?; + + Ok(()) +}