feat: decoded media via NIXL

milesial · milesial · commit ef274b4ccf1c · 2025-11-04T00:37:09.000-08:00
Signed-off-by: Alexandre Milesi &lt;milesial@users.noreply.github.com&gt;
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/lib/llm/Cargo.toml b/lib/llm/Cargo.toml
@@ -13,7 +13,7 @@ readme.workspace = true
 description = "Dynamo LLM Library"
 
 [features]
-default = []
+default = ["block-manager"]
 # todo(ops): get this working in CI as a default.
 # default = ["block-manager", "testing-full"]
 
@@ -142,7 +142,7 @@ json-five = { version = "0.3" }
 # media loading in the preprocessor
 reqwest = { workspace = true }
 base64 = { version = "0.22" }
-image = { version = "0.25" }
+image = { version = "0.25", features = ["default", "serde"] }
 tokio-rayon = {version = "2" }
 ndarray = { version = "0.16" }
 
diff --git a/lib/llm/src/block_manager/storage.rs b/lib/llm/src/block_manager/storage.rs
@@ -364,6 +364,30 @@ impl SystemStorage {
     }
 }
 
+impl TryFrom<Vec<u8>> for SystemStorage {
+    type Error = StorageError;
+
+    /// Create SystemStorage from an existing Vec<u8>
+    /// Takes ownership of the Vec and uses its memory directly (zero-copy)
+    fn try_from(mut vec: Vec<u8>) -> Result<Self, Self::Error> {
+        let size = vec.len();
+        let layout =
+            Layout::array::<u8>(size).map_err(|e| StorageError::AllocationFailed(e.to_string()))?;
+        let ptr = NonNull::new(vec.as_mut_ptr())
+            .ok_or_else(|| StorageError::AllocationFailed("vec pointer is null".into()))?;
+
+        // prevents Vec from freeing the memory
+        std::mem::forget(vec);
+
+        Ok(Self {
+            ptr,
+            layout,
+            len: size,
+            handles: RegistrationHandles::new(),
+        })
+    }
+}
+
 impl Drop for SystemStorage {
     fn drop(&mut self) {
         self.handles.release();
diff --git a/lib/llm/src/preprocessor.rs b/lib/llm/src/preprocessor.rs
@@ -327,14 +327,21 @@ impl OpenAIPreprocessor {
         // Execute all fetch tasks
         if !fetch_tasks.is_empty() {
             let loader = self.media_loader.as_ref().unwrap();
-            let _results = futures::future::join_all(
+            let results = futures::future::join_all(
                 fetch_tasks
                     .iter()
                     .map(|(_, content_part)| loader.fetch_and_decode_media_part(content_part)),
             )
             .await;
 
-            // TODO: decode and pass NIXL descriptors to the media map
+            for ((type_str, _), result) in fetch_tasks.into_iter().zip(results.into_iter()) {
+                // if one item fails, errors the whole request, other items will be cleaned up by Drop
+                let rdma_descriptor = result?;
+                media_map
+                    .entry(type_str)
+                    .or_default()
+                    .push(MultimodalData::Decoded(rdma_descriptor));
+            }
         }
 
         if !media_map.is_empty() {
diff --git a/lib/llm/src/preprocessor/media.rs b/lib/llm/src/preprocessor/media.rs
@@ -4,7 +4,9 @@
 mod common;
 mod decoders;
 mod loader;
+mod rdma;
 
 pub use common::EncodedMediaData;
 pub use decoders::{Decoder, ImageDecoder, MediaDecoder};
 pub use loader::MediaLoader;
+pub use rdma::{DecodedMediaData, RdmaMediaDataDescriptor, get_nixl_agent, get_nixl_metadata};
diff --git a/lib/llm/src/preprocessor/media/decoders.rs b/lib/llm/src/preprocessor/media/decoders.rs
@@ -2,52 +2,14 @@
 // SPDX-License-Identifier: Apache-2.0
 
 use anyhow::Result;
+use serde::{Deserialize, Serialize};
 
 use super::common::EncodedMediaData;
-use ndarray::{ArrayBase, Dimension, OwnedRepr};
-mod image;
+use super::rdma::DecodedMediaData;
+pub mod image;
 
 pub use image::{ImageDecoder, ImageMetadata};
 
-#[derive(Debug)]
-pub enum DecodedMediaMetadata {
-    #[allow(dead_code)] // used in followup MR
-    Image(ImageMetadata),
-}
-
-#[derive(Debug, PartialEq, Eq)]
-pub enum DataType {
-    UINT8,
-}
-
-// Decoded media data (image RGB, video frames pixels, ...)
-#[derive(Debug)]
-pub struct DecodedMediaData {
-    #[allow(dead_code)] // used in followup MR
-    pub(crate) data: Vec<u8>,
-    #[allow(dead_code)] // used in followup MR
-    pub(crate) shape: Vec<usize>,
-    #[allow(dead_code)] // used in followup MR
-    pub(crate) dtype: DataType,
-    #[allow(dead_code)] // used in followup MR
-    pub(crate) metadata: Option<DecodedMediaMetadata>,
-}
-
-// convert Array{N}<u8> to DecodedMediaData
-// TODO: Array1<f32> for audio
-impl<D: Dimension> From<ArrayBase<OwnedRepr<u8>, D>> for DecodedMediaData {
-    fn from(array: ArrayBase<OwnedRepr<u8>, D>) -> Self {
-        let shape = array.shape().to_vec();
-        let (data, _) = array.into_raw_vec_and_offset();
-        Self {
-            data,
-            shape,
-            dtype: DataType::UINT8,
-            metadata: None,
-        }
-    }
-}
-
 #[async_trait::async_trait]
 pub trait Decoder: Clone + Send + 'static {
     fn decode(&self, data: EncodedMediaData) -> Result<DecodedMediaData>;
@@ -67,3 +29,9 @@ pub struct MediaDecoder {
     pub image_decoder: ImageDecoder,
     // TODO: video, audio decoders
 }
+
+#[derive(Serialize, Deserialize, Clone, Copy, Debug)]
+pub enum DecodedMediaMetadata {
+    #[allow(dead_code)] // used in followup MR
+    Image(ImageMetadata),
+}
diff --git a/lib/llm/src/preprocessor/media/decoders/image.rs b/lib/llm/src/preprocessor/media/decoders/image.rs
@@ -6,14 +6,15 @@ use std::io::Cursor;
 use anyhow::Result;
 use image::{ColorType, GenericImageView, ImageFormat, ImageReader};
 use ndarray::Array3;
+use serde::{Deserialize, Serialize};
 
 use super::super::common::EncodedMediaData;
-use super::super::decoders::{DecodedMediaData, DecodedMediaMetadata};
-use super::Decoder;
+use super::super::rdma::DecodedMediaData;
+use super::{DecodedMediaMetadata, Decoder};
 
 const DEFAULT_MAX_ALLOC: u64 = 128 * 1024 * 1024; // 128 MB
 
-#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
+#[derive(Clone, Debug, Serialize, Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct ImageDecoder {
     #[serde(default)]
@@ -35,12 +36,12 @@ impl Default for ImageDecoder {
     }
 }
 
-#[derive(Debug)]
+#[derive(Serialize, Deserialize, Clone, Copy, Debug)]
 pub enum ImageLayout {
     HWC,
 }
 
-#[derive(Debug)]
+#[derive(Serialize, Deserialize, Clone, Copy, Debug)]
 pub struct ImageMetadata {
     #[allow(dead_code)] // used in followup MR
     pub(crate) format: Option<ImageFormat>,
@@ -77,8 +78,8 @@ impl Decoder for ImageDecoder {
         let (width, height) = img.dimensions();
         let shape = (height as usize, width as usize, n_channels as usize);
         let array = Array3::from_shape_vec(shape, data)?;
-        let mut decoded: DecodedMediaData = array.into();
-        decoded.metadata = Some(DecodedMediaMetadata::Image(ImageMetadata {
+        let mut decoded: DecodedMediaData = array.try_into()?;
+        decoded.tensor_info.metadata = Some(DecodedMediaMetadata::Image(ImageMetadata {
             format,
             color_type,
             layout: ImageLayout::HWC,
@@ -89,7 +90,7 @@ impl Decoder for ImageDecoder {
 
 #[cfg(test)]
 mod tests {
-    use super::super::super::decoders::DataType;
+    use super::super::super::rdma::DataType;
     use super::*;
     use image::{DynamicImage, ImageBuffer};
     use rstest::rstest;
@@ -155,10 +156,10 @@ mod tests {
 
         let decoded = result.unwrap();
         assert_eq!(
-            decoded.shape,
+            decoded.tensor_info.shape,
             vec![height as usize, width as usize, expected_channels as usize]
         );
-        assert_eq!(decoded.dtype, DataType::UINT8);
+        assert_eq!(decoded.tensor_info.dtype, DataType::UINT8);
     }
 
     #[rstest]
@@ -195,9 +196,12 @@ mod tests {
                 format
             );
             let decoded = result.unwrap();
-            assert_eq!(decoded.shape, vec![height as usize, width as usize, 3]);
             assert_eq!(
-                decoded.dtype,
+                decoded.tensor_info.shape,
+                vec![height as usize, width as usize, 3]
+            );
+            assert_eq!(
+                decoded.tensor_info.dtype,
                 DataType::UINT8,
                 "dtype should be uint8 for case: {}",
                 test_case
@@ -235,11 +239,15 @@ mod tests {
         );
 
         let decoded = result.unwrap();
-        assert_eq!(decoded.shape.len(), 3, "Should have 3 dimensions");
-        assert_eq!(decoded.shape[0], 1, "Height should be 1");
-        assert_eq!(decoded.shape[1], 1, "Width should be 1");
         assert_eq!(
-            decoded.dtype,
+            decoded.tensor_info.shape.len(),
+            3,
+            "Should have 3 dimensions"
+        );
+        assert_eq!(decoded.tensor_info.shape[0], 1, "Height should be 1");
+        assert_eq!(decoded.tensor_info.shape[1], 1, "Width should be 1");
+        assert_eq!(
+            decoded.tensor_info.dtype,
             DataType::UINT8,
             "dtype should be uint8 for {} channels {:?}",
             input_channels,
diff --git a/lib/llm/src/preprocessor/media/loader.rs b/lib/llm/src/preprocessor/media/loader.rs
@@ -9,7 +9,9 @@ use anyhow::Result;
 use dynamo_async_openai::types::ChatCompletionRequestUserMessageContentPart;
 
 use super::common::EncodedMediaData;
-use super::decoders::{DecodedMediaData, Decoder, MediaDecoder};
+use super::decoders::{Decoder, MediaDecoder};
+use super::rdma::{RdmaMediaDataDescriptor, get_nixl_agent};
+use nixl_sys::Agent as NixlAgent;
 
 const DEFAULT_HTTP_USER_AGENT: &str = "dynamo-ai/dynamo";
 const DEFAULT_HTTP_TIMEOUT: Duration = Duration::from_secs(30);
@@ -39,7 +41,7 @@ pub struct MediaLoader {
     media_decoder: MediaDecoder,
     http_client: reqwest::Client,
     media_fetcher: MediaFetcher,
-    // TODO: NIXL agent
+    nixl_agent: NixlAgent,
 }
 
 impl MediaLoader {
@@ -53,10 +55,13 @@ impl MediaLoader {
 
         let http_client = http_client_builder.build()?;
 
+        let nixl_agent = get_nixl_agent()?;
+
         Ok(Self {
             media_decoder,
             http_client,
             media_fetcher,
+            nixl_agent,
         })
     }
 
@@ -90,9 +95,8 @@ impl MediaLoader {
         &self,
         oai_content_part: &ChatCompletionRequestUserMessageContentPart,
         // TODO: request-level options
-    ) -> Result<DecodedMediaData> {
-        // fetch the media
-        // TODO: decode and NIXL-register
+    ) -> Result<RdmaMediaDataDescriptor> {
+        // fetch the media, decode and NIXL-register
         let decoded = match oai_content_part {
             ChatCompletionRequestUserMessageContentPart::ImageUrl(image_part) => {
                 let url = &image_part.image_url.url;
@@ -112,14 +116,16 @@ impl MediaLoader {
             _ => anyhow::bail!("Unsupported media type"),
         };
 
-        Ok(decoded)
+        let rdma_descriptor = decoded.into_rdma_descriptor(&self.nixl_agent)?;
+        Ok(rdma_descriptor)
     }
 }
 
 #[cfg(test)]
 mod tests {
-    use super::super::decoders::DataType;
+    use super::super::rdma::DataType;
     use super::*;
+    use crate::block_manager::storage::nixl::NixlRegisterableStorage;
     use dynamo_async_openai::types::{ChatCompletionRequestMessageContentPartImage, ImageUrl};
 
     #[tokio::test]
@@ -157,17 +163,52 @@ mod tests {
             result.err()
         );
 
-        let data = result.unwrap();
-        assert_eq!(data.dtype, DataType::UINT8);
+        let descriptor = result.unwrap();
+        assert_eq!(descriptor.tensor_info.dtype, DataType::UINT8);
 
         // Verify image dimensions: 1,999px × 1,125px (width × height)
         // Shape format is [height, width, channels]
-        assert_eq!(data.shape.len(), 3);
-        assert_eq!(data.shape[0], 1125, "Height should be 1125");
-        assert_eq!(data.shape[1], 1999, "Width should be 1999");
-        assert_eq!(data.shape[2], 4, "RGBA channels should be 4");
+        assert_eq!(descriptor.tensor_info.shape.len(), 3);
+        assert_eq!(
+            descriptor.tensor_info.shape[0], 1125,
+            "Height should be 1125"
+        );
+        assert_eq!(
+            descriptor.tensor_info.shape[1], 1999,
+            "Width should be 1999"
+        );
+        assert_eq!(
+            descriptor.tensor_info.shape[2], 4,
+            "RGBA channels should be 4"
+        );
 
         mock.assert_async().await;
+
+        assert!(
+            !descriptor.tensor_info.shape.is_empty(),
+            "Shape should not be empty"
+        );
+        assert_eq!(
+            descriptor.tensor_info.shape[0], 1125,
+            "Height should be 1125"
+        );
+        assert_eq!(
+            descriptor.tensor_info.shape[1], 1999,
+            "Width should be 1999"
+        );
+        assert_eq!(
+            descriptor.tensor_info.shape[2], 4,
+            "RGBA channels should be 4"
+        );
+
+        assert!(
+            descriptor.source_storage.is_some(),
+            "Source storage should be present"
+        );
+        assert!(
+            descriptor.source_storage.unwrap().is_nixl_registered(),
+            "Source storage should be registered with NIXL"
+        );
     }
 
     #[test]
diff --git a/lib/llm/src/preprocessor/media/rdma.rs b/lib/llm/src/preprocessor/media/rdma.rs
diff --git a/lib/llm/src/protocols/common/preprocessor.rs b/lib/llm/src/protocols/common/preprocessor.rs