stamp: solving ort api breaking changes

kallebysantos · kallebysantos · commit e0c0ea645eb5 · 2025-09-11T18:05:09.000+01:00
diff --git a/ext/ai/lib.rs b/ext/ai/lib.rs
@@ -21,6 +21,7 @@ use ndarray::ArrayView3;
 use ndarray::Axis;
 use ndarray::Ix3;
 use ort::inputs;
+use ort::value::TensorRef;
 use reqwest::Url;
 use session::load_session_from_url;
 use std::cell::RefCell;
@@ -180,6 +181,7 @@ async fn init_gte(state: Rc<RefCell<OpState>>) -> Result<(), Error> {
           -> Result<Vec<f32>, Error> {
       let encoded_prompt =
         tokenizer.encode(prompt, true).map_err(anyhow::Error::msg)?;
+
       let input_ids = encoded_prompt
         .get_ids()
         .iter()
@@ -198,32 +200,36 @@ async fn init_gte(state: Rc<RefCell<OpState>>) -> Result<(), Error> {
         .map(|i| *i as i64)
         .collect::<Vec<_>>();
 
-      let input_ids_array = Array1::from_iter(input_ids.iter().cloned());
-      let input_ids_array = input_ids_array.view().insert_axis(Axis(0));
+      let input_ids_array = TensorRef::from_array_view(([input_ids.len(), 1], &*input_ids))?;
+      let attention_mask_array = TensorRef::from_array_view(([1, encoded_prompt.len()], &*attention_mask))?;
+      let token_type_ids_array = TensorRef::from_array_view(([1, encoded_prompt.len()], &*token_type_ids))?;
 
-      let attention_mask_array =
-        Array1::from_iter(attention_mask.iter().cloned());
-      let attention_mask_array =
-        attention_mask_array.view().insert_axis(Axis(0));
 
-      let token_type_ids_array =
-        Array1::from_iter(token_type_ids.iter().cloned());
-      let token_type_ids_array =
-        token_type_ids_array.view().insert_axis(Axis(0));
+      let Ok(mut guard) = session.lock() else {
+        let err = anyhow!("failed to lock session");
+        error!(reason = ?err);
+        return Err(err);
+      };
 
       let outputs = trace_span!("infer_gte").in_scope(|| {
-        session.run(inputs! {
-            "input_ids" => input_ids_array,
-            "token_type_ids" => token_type_ids_array,
-            "attention_mask" => attention_mask_array,
-        }?)
+        guard.run(inputs! {
+          "input_ids" => input_ids_array,
+          "token_type_ids" => token_type_ids_array,
+          "attention_mask" => attention_mask_array,
+        })
       })?;
 
-      let embeddings = outputs["last_hidden_state"].try_extract_tensor()?;
+      let embeddings = outputs["last_hidden_state"].try_extract_array()?;
       let embeddings = embeddings.into_dimensionality::<Ix3>()?;
 
       let result = if do_mean_pooling {
-        mean_pool(embeddings, attention_mask_array.insert_axis(Axis(2)))
+      let attention_mask_array_clone= Array1::from_iter(attention_mask.iter().cloned());
+      let attention_mask_array_clone= attention_mask_array_clone.view()
+        .insert_axis(Axis(0))
+        .insert_axis(Axis(2));
+
+        println!("attention_mask: {attention_mask_array_clone:?}");
+        mean_pool(embeddings, attention_mask_array_clone)
       } else {
         embeddings.into_owned().remove_axis(Axis(0))
       };
diff --git a/ext/ai/onnxruntime/mod.rs b/ext/ai/onnxruntime/mod.rs
@@ -10,6 +10,7 @@ use std::cell::RefCell;
 use std::collections::HashMap;
 use std::rc::Rc;
 use std::sync::Arc;
+use std::sync::Mutex;
 
 use anyhow::anyhow;
 use anyhow::Context;
@@ -56,7 +57,7 @@ pub async fn op_ai_ort_init_session(
 
   let mut state = state.borrow_mut();
   let mut sessions =
-    { state.try_take::<Vec<Arc<Session>>>().unwrap_or_default() };
+    { state.try_take::<Vec<Arc<Mutex<Session>>>>().unwrap_or_default() };
 
   sessions.push(model.get_session());
   state.put(sessions);
@@ -103,7 +104,12 @@ pub async fn op_ai_ort_run_session(
     JsRuntime::op_state_from(state)
       .borrow_mut()
       .spawn_cpu_accumul_blocking_scope(move || {
-        let outputs = match model_session.run(input_values) {
+        let Ok(mut session_guard) = model_session.lock() else {
+          let _ = tx.send(Err(anyhow!("failed to lock model session")));
+          return;
+        };
+
+        let outputs = match session_guard.run(input_values) {
           Ok(v) => v,
           Err(err) => {
             let _ = tx.send(Err(anyhow::Error::from(err)));
diff --git a/ext/ai/onnxruntime/model.rs b/ext/ai/onnxruntime/model.rs
@@ -1,5 +1,7 @@
 use std::sync::Arc;
+use std::sync::Mutex;
 
+use anyhow::anyhow;
 use anyhow::Result;
 use deno_core::serde_v8::to_v8;
 use deno_core::ToV8;
@@ -27,56 +29,73 @@ impl std::fmt::Display for ModelInfo {
 #[derive(Debug)]
 pub struct Model {
   info: ModelInfo,
-  session: Arc<Session>,
+  session: Arc<Mutex<Session>>,
 }
 
 impl Model {
-  fn new(session_with_id: SessionWithId) -> Self {
-    let input_names = session_with_id
-      .session
+  fn new(session_with_id: SessionWithId) -> Result<Self> {
+    let (input_names, output_names) = {
+      let Ok(session_guard) = session_with_id.session.lock() else {
+      return Err(anyhow!("Could not lock model session {}", session_with_id.id));
+    };
+
+    let input_names = session_guard
       .inputs
       .iter()
       .map(|input| input.name.clone())
       .collect::<Vec<_>>();
 
-    let output_names = session_with_id
-      .session
+    let output_names = session_guard
       .outputs
       .iter()
       .map(|output| output.name.clone())
       .collect::<Vec<_>>();
 
-    Self {
+      (input_names, output_names)
+    };
+
+    Ok(Self {
       info: ModelInfo {
         id: session_with_id.id,
         input_names,
         output_names,
       },
       session: session_with_id.session,
-    }
+    })
   }
 
   pub fn get_info(&self) -> ModelInfo {
     self.info.clone()
   }
 
-  pub fn get_session(&self) -> Arc<Session> {
+  pub fn get_session(&self) -> Arc<Mutex<Session>> {
     self.session.clone()
   }
 
   pub async fn from_id(id: &str) -> Option<Self> {
-    get_session(id)
+    let session = {
+      get_session(id)
       .await
       .map(|it| SessionWithId::from((id.to_string(), it)))
-      .map(Self::new)
+    };
+
+    let Some(session) = session else {
+      return None;
+    };
+
+    Self::new(session).ok()
   }
 
   pub async fn from_url(model_url: Url) -> Result<Self> {
-    load_session_from_url(model_url).await.map(Self::new)
+    let session = load_session_from_url(model_url).await?;
+
+    Self::new(session)
   }
 
   pub async fn from_bytes(model_bytes: &[u8]) -> Result<Self> {
-    load_session_from_bytes(model_bytes).await.map(Self::new)
+    let session = load_session_from_bytes(model_bytes).await?;
+
+    Self::new(session)
   }
 }
 
diff --git a/ext/ai/onnxruntime/session.rs b/ext/ai/onnxruntime/session.rs
@@ -5,7 +5,8 @@ use reqwest::Url;
 use std::collections::HashMap;
 use std::hash::Hasher;
 use std::sync::Arc;
-use tokio::sync::Mutex;
+use std::sync::Mutex;
+use tokio::sync::Mutex as AsyncMutex;
 use tokio_util::compat::FuturesAsyncWriteCompatExt;
 use tracing::debug;
 use tracing::instrument;
@@ -25,17 +26,17 @@ use ort::session::Session;
 
 use crate::onnx::ensure_onnx_env_init;
 
-static SESSIONS: Lazy<Mutex<HashMap<String, Arc<Session>>>> =
-  Lazy::new(|| Mutex::new(HashMap::new()));
+static SESSIONS: Lazy<AsyncMutex<HashMap<String, Arc<Mutex<Session>>>>> =
+  Lazy::new(|| AsyncMutex::new(HashMap::new()));
 
 #[derive(Debug)]
 pub struct SessionWithId {
   pub(crate) id: String,
-  pub(crate) session: Arc<Session>,
+  pub(crate) session: Arc<Mutex<Session>>,
 }
 
-impl From<(String, Arc<Session>)> for SessionWithId {
-  fn from(value: (String, Arc<Session>)) -> Self {
+impl From<(String, Arc<Mutex<Session>>)> for SessionWithId {
+  fn from(value: (String, Arc<Mutex<Session>>)) -> Self {
     Self {
       id: value.0,
       session: value.1,
@@ -50,7 +51,7 @@ impl std::fmt::Display for SessionWithId {
 }
 
 impl SessionWithId {
-  pub fn into_split(self) -> (String, Arc<Session>) {
+  pub fn into_split(self) -> (String, Arc<Mutex<Session>>) {
     (self.id, self.session)
   }
 }
@@ -74,53 +75,51 @@ pub(crate) fn get_session_builder() -> Result<SessionBuilder, AnyError> {
   Ok(builder)
 }
 
-fn cpu_execution_provider(
-) -> Box<dyn Iterator<Item = ExecutionProviderDispatch>> {
-  Box::new(
-    [
-      // NOTE(Nyannacha): See the comment above. This makes `enable_cpu_mem_arena` set to
-      // False.
-      //
-      // Backgrounds:
-      // [1]: https://docs.rs/ort/2.0.0-rc.4/src/ort/execution_providers/cpu.rs.html#9-18
-      // [2]: https://docs.rs/ort/2.0.0-rc.4/src/ort/execution_providers/cpu.rs.html#46-50
-      CPUExecutionProvider::default().build(),
-    ]
-    .into_iter(),
-  )
+fn cpu_execution_provider() -> ExecutionProviderDispatch {
+    // NOTE(Nyannacha): See the comment above. This makes `enable_cpu_mem_arena` set to
+    // False.
+    //
+    // Backgrounds:
+    // [1]: https://docs.rs/ort/2.0.0-rc.4/src/ort/execution_providers/cpu.rs.html#9-18
+    // [2]: https://docs.rs/ort/2.0.0-rc.4/src/ort/execution_providers/cpu.rs.html#46-50
+    CPUExecutionProvider::default().build()
 }
 
-fn cuda_execution_provider(
-) -> Box<dyn Iterator<Item = ExecutionProviderDispatch>> {
+fn cuda_execution_provider() -> Option<ExecutionProviderDispatch> {
   let cuda = CUDAExecutionProvider::default();
-  let providers = match cuda.is_available() {
-    Ok(is_cuda_available) => {
-      debug!(cuda_support = is_cuda_available);
-      if is_cuda_available {
-        vec![cuda.build()]
-      } else {
-        vec![]
-      }
-    }
+  let is_cuda_available = cuda.is_available().is_ok_and(|v| v);
+  debug!(cuda_support = is_cuda_available);
+
+  if is_cuda_available {
+    Some(cuda.build())
+  }else{
+    None
+  }
+}
+
+fn get_execution_providers(
+) -> Vec<ExecutionProviderDispatch> {
+  let cpu = cpu_execution_provider();
 
-    _ => vec![],
+  if let Some(cuda) = cuda_execution_provider() {
+    return [cuda, cpu].to_vec();
   };
 
-  Box::new(providers.into_iter().chain(cpu_execution_provider()))
+  [cpu].to_vec()
 }
 
-fn create_session(model_bytes: &[u8]) -> Result<Arc<Session>, Error> {
+fn create_session(model_bytes: &[u8]) -> Result<Arc<Mutex<Session>>, Error> {
   let session = {
     if let Some(err) = ensure_onnx_env_init() {
       return Err(anyhow!("failed to create onnx environment: {err}"));
     }
 
     get_session_builder()?
-      .with_execution_providers(cuda_execution_provider())?
+      .with_execution_providers(get_execution_providers())?
       .commit_from_memory(model_bytes)?
   };
 
-  Ok(Arc::new(session))
+  Ok(Arc::new(Mutex::new(session)))
 }
 
 #[instrument(level = "debug", skip_all, fields(model_bytes = model_bytes.len()), err)]
@@ -181,7 +180,7 @@ pub(crate) async fn load_session_from_url(
   Ok((session_id, session).into())
 }
 
-pub(crate) async fn get_session(id: &str) -> Option<Arc<Session>> {
+pub(crate) async fn get_session(id: &str) -> Option<Arc<Mutex<Session>>> {
   SESSIONS.lock().await.get(id).cloned()
 }
 
diff --git a/ext/ai/onnxruntime/tensor.rs b/ext/ai/onnxruntime/tensor.rs