Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 148 additions & 0 deletions nvml-wrapper/src/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@
nvml: &'nvml Nvml,
}

unsafe impl<'nvml> Send for Device<'nvml> {}

Check warning on line 76 in nvml-wrapper/src/device.rs

View workflow job for this annotation

GitHub Actions / Clippy

the following explicit lifetimes could be elided: 'nvml
unsafe impl<'nvml> Sync for Device<'nvml> {}

Check warning on line 77 in nvml-wrapper/src/device.rs

View workflow job for this annotation

GitHub Actions / Clippy

the following explicit lifetimes could be elided: 'nvml

assert_impl_all!(Device: Send, Sync);

Expand Down Expand Up @@ -155,10 +155,10 @@

* `Uninitialized`, if the library has not been successfully initialized
* `InvalidArg`, if this `Device` is invalid or the apiType is invalid (may occur if
the C lib changes dramatically?)

Check warning on line 158 in nvml-wrapper/src/device.rs

View workflow job for this annotation

GitHub Actions / Clippy

doc list item without indentation
* `NotSupported`, if this query is not supported by this `Device` or this `Device`
does not support the feature that is being queried (e.g. enabling/disabling auto

Check warning on line 160 in nvml-wrapper/src/device.rs

View workflow job for this annotation

GitHub Actions / Clippy

doc list item without indentation
boosted clocks is not supported by this `Device`).

Check warning on line 161 in nvml-wrapper/src/device.rs

View workflow job for this annotation

GitHub Actions / Clippy

doc list item without indentation
* `GpuLost`, if this `Device` has fallen off the bus or is otherwise inaccessible
* `UnexpectedVariant`, for which you can read the docs for
* `Unknown`, on any unexpected error
Expand Down Expand Up @@ -192,7 +192,7 @@

* `Uninitialized`, if the library has not been successfully initialized
* `InvalidArg`, if this `Device` is invalid or the clockType is invalid (may occur
if the C lib changes dramatically?)

Check warning on line 195 in nvml-wrapper/src/device.rs

View workflow job for this annotation

GitHub Actions / Clippy

doc list item without indentation
* `NotSupported`, if this `Device` does not support this feature
* `GpuLost`, if this `Device` has fallen off the bus or is otherwise inaccessible
* `Unknown`, on any unexpected error
Expand Down Expand Up @@ -432,7 +432,7 @@
* `Uninitialized`, if the library has not been successfully initialized
* `InvalidArg`, if this `Device` is invalid or `clock_type` is invalid (shouldn't occur?)
* `NotSupported`, if this `Device` or the `clock_type` on this `Device`
does not support this feature

Check warning on line 435 in nvml-wrapper/src/device.rs

View workflow job for this annotation

GitHub Actions / Clippy

doc list item without indentation
* `GpuLost`, if this `Device` has fallen off the bus or is otherwise inaccessible
* `Unknown`, on any unexpected error

Expand Down Expand Up @@ -802,6 +802,154 @@
}
}

/**
Checks simultaneously if confidential compute is enabled, if the device is in a production environment,
and if the device is accepting client requests.
# Errors
* `Uninitialized`, if the library has not been successfully initialized
* `NotSupported`, if this query is not supported by the device
* `InvalidArg`, if confidential compute state is invalid
*/
pub fn check_confidential_compute_status(&self) -> Result<bool, NvmlError> {
let cc_state_sym = nvml_sym(self.nvml.lib.nvmlSystemGetConfComputeState.as_ref())?;
let cc_gpus_ready_sym = nvml_sym(
self.nvml
.lib
.nvmlSystemGetConfComputeGpusReadyState
.as_ref(),
)?;

unsafe {
let mut state: nvmlConfComputeSystemState_t = mem::zeroed();
nvml_try(cc_state_sym(&mut state))?;

let is_cc_enabled = state.ccFeature == NVML_CC_SYSTEM_FEATURE_ENABLED;
let is_prod_environment = state.environment == NVML_CC_SYSTEM_ENVIRONMENT_PROD;

let mut cc_gpus_ready: std::os::raw::c_uint = 0;
nvml_try(cc_gpus_ready_sym(&mut cc_gpus_ready))?;
let is_accepting_client_requests =
cc_gpus_ready == NVML_CC_ACCEPTING_CLIENT_REQUESTS_TRUE;

Ok(is_cc_enabled && is_prod_environment && is_accepting_client_requests)
}
}

/**
Gets the confidential compute capabilities for this `Device`.
# Errors
* `Uninitialized`, if the library has not been successfully initialized
* `InvalidArg`, if device is invalid or memory is NULL
* `NotSupported`, if this query is not supported by the device
* `Unknown`, on any unexpected error
*/
pub fn get_confidential_compute_capabilities(
&self,
) -> Result<ConfidentialComputeCapabilities, NvmlError> {
let sym = nvml_sym(self.nvml.lib.nvmlSystemGetConfComputeCapabilities.as_ref())?;

unsafe {
let mut capabilities: nvmlConfComputeSystemCaps_t = mem::zeroed();
nvml_try(sym(&mut capabilities))?;

let cpu_caps = match capabilities.cpuCaps {
NVML_CC_SYSTEM_CPU_CAPS_NONE => ConfidentialComputeCpuCapabilities::None,
NVML_CC_SYSTEM_CPU_CAPS_AMD_SEV => ConfidentialComputeCpuCapabilities::AmdSev,
NVML_CC_SYSTEM_CPU_CAPS_INTEL_TDX => ConfidentialComputeCpuCapabilities::IntelTdx,
_ => return Err(NvmlError::Unknown),
};

let gpus_caps = match capabilities.gpusCaps {
NVML_CC_SYSTEM_GPUS_CC_CAPABLE => ConfidentialComputeGpuCapabilities::Capable,
NVML_CC_SYSTEM_GPUS_CC_NOT_CAPABLE => {
ConfidentialComputeGpuCapabilities::NotCapable
}
_ => return Err(NvmlError::Unknown),
};

Ok(ConfidentialComputeCapabilities {
cpu_caps,
gpus_caps,
})
}
}

/**
Gets the confidential compute GPU certificate for this `Device`.
# Errors
* `Uninitialized` if the library has not been successfully initialized
* `InvalidArg` if device is invalid or memory is NULL
* `NotSupported` if this query is not supported by the device
* `Unknown` on any unexpected error
*/
pub fn confidential_compute_gpu_certificate(
&self,
) -> Result<ConfidentialComputeGpuCertificate, NvmlError> {
let sym = nvml_sym(
self.nvml
.lib
.nvmlDeviceGetConfComputeGpuCertificate
.as_ref(),
)?;

unsafe {
let mut certificate_chain: nvmlConfComputeGpuCertificate_t = mem::zeroed();
nvml_try(sym(self.device, &mut certificate_chain))?;

Ok(ConfidentialComputeGpuCertificate {
cert_chain_size: certificate_chain.certChainSize,
attestation_cert_chain_size: certificate_chain.attestationCertChainSize,
cert_chain: certificate_chain.certChain.to_vec(),
attestation_cert_chain: certificate_chain.attestationCertChain.to_vec(),
})
}
}

/**
Fetches the confidential compute attestation report for this [`Device`].
This method retrieves a comprehensive attestation report from the device, which includes:
- A 32-byte nonce
- The attestation report size (as big-endian bytes)
- The attestation report data (up to 8192 bytes)
- A flag indicating if CEC attestation is present (as big-endian bytes)
- The CEC attestation report size (as big-endian bytes)
- The CEC attestation report data (up to 4096 bytes)
The returned vector contains all these components concatenated together in the order listed above.

Check warning on line 917 in nvml-wrapper/src/device.rs

View workflow job for this annotation

GitHub Actions / Clippy

doc list item without indentation
# Errors
* `Uninitialized`, if the library has not been successfully initialized
* `InvalidArg`, if device is invalid or memory is NULL
* `NotSupported`, if this query is not supported by the device
* `Unknown`, on any unexpected error
*/
#[doc(alias = "nvmlDeviceGetAttestationReport")]
pub fn confidential_compute_gpu_attestation_report(
&self,
nonce: [u8; NVML_CC_GPU_CEC_NONCE_SIZE as usize],
) -> Result<ConfidentialComputeGpuAttestationReport, NvmlError> {
let sym = nvml_sym(
self.nvml
.lib
.nvmlDeviceGetConfComputeGpuAttestationReport
.as_ref(),
)?;

unsafe {
let mut report: nvmlConfComputeGpuAttestationReport_st = mem::zeroed();
report.nonce = nonce;

nvml_try(sym(self.device, &mut report))?;

let is_cec_attestation_report_present = report.isCecAttestationReportPresent == 1;
Ok(ConfidentialComputeGpuAttestationReport {
attestation_report_size: report.attestationReportSize,
attestation_report: report.attestationReport.to_vec(),
is_cec_attestation_report_present,
cec_attestation_report_size: report.cecAttestationReportSize,
cec_attestation_report: report.cecAttestationReport.to_vec(),
})
}
}

/**
Gets the current utilization and sampling size (sampling size in μs) for the Decoder.

Expand Down Expand Up @@ -1236,7 +1384,7 @@
* `Uninitialized`, if the library has not been successfully initialized
* `GpuLost`, if this `Device` has fallen off the bus or is otherwise inaccessible
* `UnexpectedVariant`, if an enum variant not defined in this wrapper gets
returned in a field of an `EncoderSessionInfo` struct

Check warning on line 1387 in nvml-wrapper/src/device.rs

View workflow job for this annotation

GitHub Actions / Clippy

doc list item without indentation
* `Unknown`, on any unexpected error

# Device Support
Expand Down Expand Up @@ -1899,7 +2047,7 @@
* `Uninitialized`, if the library has not been successfully initialized
* `InvalidArg`, if `error_type`, `counter_type`, or `location` is invalid (shouldn't occur?)
* `NotSupported`, if this `Device` does not support ECC error reporting for the specified
memory

Check warning on line 2050 in nvml-wrapper/src/device.rs

View workflow job for this annotation

GitHub Actions / Clippy

doc list item without indentation
* `GpuLost`, if this `Device` has fallen off the bus or is otherwise inaccessible
* `Unknown`, on any unexpected error

Expand Down
66 changes: 66 additions & 0 deletions nvml-wrapper/src/structs/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,69 @@ pub struct RetiredPage {
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct FieldId(pub u32);

/// Returned from `Device.get_confidential_compute_capabilities()`
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct ConfidentialComputeCapabilities {
/// The CPU capabilities.
pub cpu_caps: ConfidentialComputeCpuCapabilities,
/// The GPU capabilities.
pub gpus_caps: ConfidentialComputeGpuCapabilities,
}

/// The possible CPU capabilities for confidential compute (either None, AMD SEV or Intel TDX)
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum ConfidentialComputeCpuCapabilities {
/// No CPU capabilities.
None,
/// AMD SEV confidential compute capabilities.
AmdSev,
/// Intel TDX confidential compute capabilities.
IntelTdx,
}

/// The possible GPU capabilities for confidential compute (either not capable or capable)
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum ConfidentialComputeGpuCapabilities {
/// Capable.
Capable,
/// Not capable.
NotCapable,
}

/// Returned from `Device.confidential_compute_gpu_certificate()`
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct ConfidentialComputeGpuCertificate {
/// The size of the certificate chain.
pub cert_chain_size: u32,
/// The size of the attestation certificate chain.
pub attestation_cert_chain_size: u32,
/// The certificate chain, of size
/// `ffi::bindings::NVML_GPU_CERT_CHAIN_SIZE == 4096`.
pub cert_chain: Vec<u8>,
/// The attestation certificate chain, of size
/// `ffi::bindings::NVML_GPU_ATTESTATION_CERT_CHAIN_SIZE == 5120`.
pub attestation_cert_chain: Vec<u8>,
}

/// Returned from `Device.confidential_compute_gpu_attestation_report_bytes()`
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct ConfidentialComputeGpuAttestationReport {
/// The size of the attestation report.
pub attestation_report_size: u32,
/// The attestation report, of size
/// `ffi::bindings::NVML_CC_GPU_ATTESTATION_REPORT_SIZE == 8192`.
pub attestation_report: Vec<u8>,
/// Whether the CEC attestation report is present.
pub is_cec_attestation_report_present: bool,
/// The size of the CEC attestation report.
pub cec_attestation_report_size: u32,
/// The CEC attestation report, of size
/// `ffi::bindings::NVML_CC_GPU_CEC_ATTESTATION_REPORT_SIZE == 4096`.
pub cec_attestation_report: Vec<u8>,
}
Loading