Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ members = [
"packages/amazon-ecs-cni-plugins",
"packages/amazon-ssm-agent",
"packages/amazon-vpc-cni-plugins",
"packages/amd-k8s-device-plugin",
"packages/aws-iam-authenticator",
"packages/aws-otel-collector",
"packages/aws-signer-notation-plugin",
Expand Down Expand Up @@ -66,6 +67,7 @@ members = [
"packages/libcap",
"packages/libcrypto",
"packages/libdevmapper",
"packages/libdrm",
"packages/libelf",
"packages/libexpat",
"packages/libffi",
Expand Down
2 changes: 2 additions & 0 deletions kits/bottlerocket-core-kit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ path = "../kit.rs"
amazon-ecs-cni-plugins = { path = "../../packages/amazon-ecs-cni-plugins" }
amazon-ssm-agent = { path = "../../packages/amazon-ssm-agent" }
amazon-vpc-cni-plugins = { path = "../../packages/amazon-vpc-cni-plugins" }
amd-k8s-device-plugin = { path = "../../packages/amd-k8s-device-plugin" }
aws-iam-authenticator = { path = "../../packages/aws-iam-authenticator" }
aws-otel-collector = { path = "../../packages/aws-otel-collector" }
aws-signer-notation-plugin = { path = "../../packages/aws-signer-notation-plugin" }
Expand Down Expand Up @@ -76,6 +77,7 @@ libcap = { path = "../../packages/libcap" }
libcrypto = { path = "../../packages/libcrypto" }
libcryptsetup = { path = "../../packages/libcryptsetup" }
libdevmapper = { path = "../../packages/libdevmapper" }
libdrm = { path = "../../packages/libdrm" }
libelf = { path = "../../packages/libelf" }
libexpat = { path = "../../packages/libexpat" }
libffi = { path = "../../packages/libffi" }
Expand Down
22 changes: 22 additions & 0 deletions packages/amd-k8s-device-plugin/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[package]
name = "amd-k8s-device-plugin"
version = "0.1.0"
edition = "2021"
publish = false
build = "../build.rs"

[lib]
path = "../packages.rs"

[package.metadata.build-package]
releases-url = "https://github.com/ROCm/k8s-device-plugin/releases"

[[package.metadata.build-package.external-files]]
url = "https://github.com/ROCm/k8s-device-plugin/archive/v1.31.0.8.tar.gz"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is marked pre-release, is there a "latest" release or all of them just marked pre-release?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All releases of the ROCm k8s-device-plugin (https://github.com/ROCm/k8s-device-plugin/releases?page=1) are marked as pre-release on their GitHub. v1.31.0.8 is the latest available release.

sha512 = "23a127b46ad15cabbdd9abe18a8b75140340dcb10f41c9efdcfd30b38db7142edabaf6a97ed2c621f0414c78c0cb87fdc81a30b5e3fb016d16cbd11210143326"
force-upstream = true

[build-dependencies]
glibc = { path = "../glibc" }
libdrm = { path = "../libdrm" }
hwloc = { path = "../hwloc" }
19 changes: 19 additions & 0 deletions packages/amd-k8s-device-plugin/amd-k8s-device-plugin.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[Unit]
Description=Start AMD kubernetes device plugin
After=kubelet.service
Wants=kubelet.service

[Service]
# Ensure that the kubelet device plugin socket exists before we start
# A brief sleep is needed to avoid the `test` failing its first check
ExecStartPre=/usr/bin/sleep 0.1
ExecStartPre=/usr/bin/test -S /var/lib/kubelet/device-plugins/kubelet.sock
ExecStart=/usr/bin/amd-device-plugin -logtostderr=true -stderrthreshold=INFO -v=5
Type=simple
TimeoutSec=0
RestartSec=2
Restart=always
StandardError=journal+console

[Install]
WantedBy=multi-user.target
75 changes: 75 additions & 0 deletions packages/amd-k8s-device-plugin/amd-k8s-device-plugin.spec
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
%global goproject github.com/ROCm
%global gorepo k8s-device-plugin
%global goimport %{goproject}/%{gorepo}

%global gover 1.31.0.8
%global rpmver %{gover}

Name: %{_cross_os}amd-k8s-device-plugin
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's keep this consistent with the upstream naming

Suggested change
Name: %{_cross_os}amd-k8s-device-plugin
Name: %{_cross_os}rocm-k8s-device-plugin

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. Will update.

Version: %{rpmver}
Release: 1%{?dist}
Summary: Kubernetes device plugin for AMD GPUs
License: Apache-2.0
URL: https://github.com/ROCm/k8s-device-plugin
Source0: https://github.com/ROCm/k8s-device-plugin/archive/v%{gover}.tar.gz
Source1: amd-k8s-device-plugin.service

BuildRequires: %{_cross_os}glibc-devel
BuildRequires: %{_cross_os}libdrm-devel
BuildRequires: %{_cross_os}hwloc-devel
Requires: %{name}(binaries)
Requires: %{_cross_os}libdrm
Requires: %{_cross_os}hwloc

%description
%{summary}.

%package bin
Summary: Kubernetes device plugin for AMD GPUs binaries
Provides: %{name}(binaries)
Requires: (%{_cross_os}image-feature(no-fips) and %{name})
Conflicts: (%{_cross_os}image-feature(fips) or %{name}-fips-bin)

%description bin
%{summary}.

%package fips-bin
Summary: Kubernetes device plugin for AMD GPUs binaries, FIPS edition
Provides: %{name}(binaries)
Requires: (%{_cross_os}image-feature(fips) and %{name})
Conflicts: (%{_cross_os}image-feature(no-fips) or %{name}-bin)

%description fips-bin
%{summary}.

%prep
%autosetup -n %{gorepo}-%{gover} -p1
%cross_go_setup %{gorepo}-%{gover} %{goproject} %{goimport}

%build
%cross_go_configure %{goimport}
%set_cross_go_flags

go build -ldflags="${GOLDFLAGS}" -o amd-device-plugin ./cmd/k8s-device-plugin/
gofips build -ldflags="${GOLDFLAGS}" -o fips/amd-device-plugin ./cmd/k8s-device-plugin/

%install
install -d %{buildroot}%{_cross_bindir}
install -p -m 0755 amd-device-plugin %{buildroot}%{_cross_bindir}

install -d %{buildroot}%{_cross_fips_bindir}
install -p -m 0755 fips/amd-device-plugin %{buildroot}%{_cross_fips_bindir}

install -d %{buildroot}%{_cross_unitdir}
install -p -m 0644 %{S:1} %{buildroot}%{_cross_unitdir}

%files
%license LICENSE
%{_cross_attribution_file}
%{_cross_unitdir}/amd-k8s-device-plugin.service

%files bin
%{_cross_bindir}/amd-device-plugin

%files fips-bin
%{_cross_fips_bindir}/amd-device-plugin
25 changes: 25 additions & 0 deletions packages/libdrm/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[package]
name = "libdrm"
version = "0.1.0"
edition = "2021"
publish = false
build = "../build.rs"

[lib]
path = "../packages.rs"

[package.metadata.build-package]
releases-url = "https://dri.freedesktop.org/libdrm/"

[[package.metadata.build-package.external-files]]
url = "https://dri.freedesktop.org/libdrm/libdrm-2.4.123.tar.xz"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like latest is 2.4.128

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will update.

sha512 = "d3e6ba26bbd609fd87ca131690547eeb6a903c0a8c28b7f5cd5d0947619da09f31daf7bf4b6c38bf5e5dc173e2ccba476338ef682d8cf06d6b71ba73fc9b948d"
force-upstream = true

[[package.metadata.build-package.external-files]]
url = "https://dri.freedesktop.org/libdrm/libdrm-2.4.123.tar.xz.sig"
sha512 = "8eb04246661102ed93d80c1793882423d7798c5f46dfe6bc1c710de0afc5016cc99ccd1b02442587c8af5439eee3cfd008570ed671ffa28c7f02a3d9b78d4e95"
force-upstream = true

[build-dependencies]
glibc = { path = "../glibc" }
61 changes: 61 additions & 0 deletions packages/libdrm/libdrm.spec
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
Name: %{_cross_os}libdrm
Version: 2.4.123
Release: 1%{?dist}
Summary: Direct Rendering Manager runtime library
License: MIT
URL: https://dri.freedesktop.org
Source0: https://dri.freedesktop.org/libdrm/libdrm-%{version}.tar.xz

BuildRequires: %{_cross_os}glibc-devel
Requires: %{_cross_os}glibc

%description
%{summary}.

%package devel
Summary: Files for development using the direct rendering manager library
Requires: %{name}

%description devel
%{summary}.

%prep
%autosetup -n libdrm-%{version} -p1

%build
CONFIGURE_OPTS=(
--auto-features=disabled
-Dcairo-tests=disabled
-Dman-pages=disabled
-Dvalgrind=disabled
-Dfreedreno=disabled
-Dvc4=disabled
-Detnaviv=disabled
-Dexynos=disabled
-Dtegra=disabled
-Domap=disabled
-Dintel=disabled
-Dradeon=enabled
-Damdgpu=enabled
-Dnouveau=disabled
-Dtests=false
)

%cross_meson "${CONFIGURE_OPTS[@]}"
%cross_meson_build

%install
%cross_meson_install

%files
%{_cross_attribution_file}
%{_cross_libdir}/*.so.*
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we list these out?

%{_cross_includedir}/libsync.h
%{_cross_datadir}/libdrm/amdgpu.ids

%files devel
%{_cross_libdir}/*.so
%{_cross_includedir}/libdrm/
%{_cross_includedir}/xf86drm.h
%{_cross_includedir}/xf86drmMode.h
%{_cross_pkgconfigdir}/*.pc
4 changes: 4 additions & 0 deletions sources/ghostdog/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,14 @@ pub(super) enum Error {
CheckEfaFailure { source: pciclient::PciClientError },
#[snafu(display("Failed to check if Neuron device is attached: {}", source))]
CheckNeuronFailure { source: pciclient::PciClientError },
#[snafu(display("Failed to check if AMD GPU device is attached: {}", source))]
CheckAmdGpuFailure { source: pciclient::PciClientError },
#[snafu(display("Did not detect EFA"))]
NoEfaPresent,
#[snafu(display("Did not detect Neuron"))]
NoNeuronPresent,
#[snafu(display("Did not detect AMD GPU"))]
NoAmdGpuPresent,
#[snafu(display("'{}' has no parent directory", path.display()))]
NoParentDirectory { path: std::path::PathBuf },
#[snafu(display("Failed to open '{}': {}", path.display(), source))]
Expand Down
17 changes: 17 additions & 0 deletions sources/ghostdog/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ enum SubCommand {
EbsDeviceName(EbsDeviceNameArgs),
EfaPresent(EfaPresentArgs),
NeuronPresent(NeuronPresentArgs),
AmdGpuPresent(AmdGpuPresentArgs),
MatchDriver(MatchDriverArgs),
MatchNvidiaDriver(MatchNvidiaDriverArgs),
WriteInfinibandGuid(WriteInfinibandGuidArgs),
Expand All @@ -73,6 +74,11 @@ struct EfaPresentArgs {}
/// Detect if Neuron devices are attached.
struct NeuronPresentArgs {}

#[derive(FromArgs, PartialEq, Debug)]
#[argh(subcommand, name = "amd-gpu-present")]
/// Detect if AMD GPU devices are attached.
struct AmdGpuPresentArgs {}

#[derive(FromArgs, PartialEq, Debug)]
#[argh(subcommand, name = "scan")]
/// Scan a device to see if it is an ephemeral disk.
Expand Down Expand Up @@ -163,6 +169,9 @@ fn main() -> Result<()> {
SubCommand::NeuronPresent(_) => {
is_neuron_attached()?;
}
SubCommand::AmdGpuPresent(_) => {
is_amd_gpu_attached()?;
}
SubCommand::MatchNvidiaDriver(driver) => {
let driver_name = driver.driver_name;
nvidia_driver_supported(&driver_name)?;
Expand Down Expand Up @@ -203,6 +212,14 @@ fn is_neuron_attached() -> Result<()> {
}
}

fn is_amd_gpu_attached() -> Result<()> {
if pciclient::is_amd_gpu_attached().context(error::CheckAmdGpuFailureSnafu)? {
Ok(())
} else {
Err(error::Error::NoAmdGpuPresent)
}
}

// Returns true if this is an inf1 instance
fn is_inf1_instance() -> Result<()> {
if pciclient::is_inf1_instance().context(error::CheckInf1FailureSnafu)? {
Expand Down
7 changes: 6 additions & 1 deletion sources/pciclient/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ mod private;

use private::{
call_list_devices, check_efa_attachment, check_inf1_attachment, check_inf2_attachment,
check_neuron_attachment, PciClient,
check_neuron_attachment, check_amd_gpu_attachment, PciClient,
};

use bon::Builder;
Expand Down Expand Up @@ -123,6 +123,11 @@ pub fn is_neuron_attached() -> Result<bool> {
check_neuron_attachment(PciClient {})
}

/// Call `lspci` and check if there is any AMD GPU device attached.
pub fn is_amd_gpu_attached() -> Result<bool> {
check_amd_gpu_attachment(PciClient {})
}

/// Call `lspci` and check if there are inf1 devices attached
pub fn is_inf1_instance() -> Result<bool> {
check_inf1_attachment(PciClient {})
Expand Down
Loading
Loading