feat: support for authorization header on model fetch

kallebysantos · kallebysantos · commit eb1bb49222de · 2025-08-21T10:37:52.000+01:00
diff --git a/ext/ai/js/onnxruntime/inference_api.js b/ext/ai/js/onnxruntime/inference_api.js
@@ -1,90 +1,93 @@
+const core = globalThis.Deno.core;
 import { InferenceSession, Tensor } from 'ext:ai/onnxruntime/onnx.js';
 
 const DEFAULT_HUGGING_FACE_OPTIONS = {
-    hostname: 'https://huggingface.co',
-    path: {
-        template: '{REPO_ID}/resolve/{REVISION}/onnx/{MODEL_FILE}?donwload=true',
-        revision: 'main',
-        modelFile: 'model_quantized.onnx',
-    },
+  hostname: 'https://huggingface.co',
+  path: {
+    template: '{REPO_ID}/resolve/{REVISION}/onnx/{MODEL_FILE}?donwload=true',
+    revision: 'main',
+    modelFile: 'model_quantized.onnx',
+  },
 };
 
 /**
  * An user friendly API for onnx backend
  */
 class UserInferenceSession {
-    inner;
+  inner;
 
-    id;
-    inputs;
-    outputs;
+  id;
+  inputs;
+  outputs;
 
-    constructor(session) {
-        this.inner = session;
+  constructor(session) {
+    this.inner = session;
 
-        this.id = session.sessionId;
-        this.inputs = session.inputNames;
-        this.outputs = session.outputNames;
-    }
-
-    static async fromUrl(modelUrl) {
-        if (modelUrl instanceof URL) {
-            modelUrl = modelUrl.toString();
-        }
-
-        const encoder = new TextEncoder();
-        const modelUrlBuffer = encoder.encode(modelUrl);
-        const session = await InferenceSession.fromBuffer(modelUrlBuffer);
+    this.id = session.sessionId;
+    this.inputs = session.inputNames;
+    this.outputs = session.outputNames;
+  }
 
-        return new UserInferenceSession(session);
+  static async fromUrl(modelUrl) {
+    if (modelUrl instanceof URL) {
+      modelUrl = modelUrl.toString();
     }
 
-    static async fromHuggingFace(repoId, opts = {}) {
-        const hostname = opts?.hostname ?? DEFAULT_HUGGING_FACE_OPTIONS.hostname;
-        const pathOpts = {
-            ...DEFAULT_HUGGING_FACE_OPTIONS.path,
-            ...opts?.path,
-        };
-
-        const modelPath = pathOpts.template
-            .replaceAll('{REPO_ID}', repoId)
-            .replaceAll('{REVISION}', pathOpts.revision)
-            .replaceAll('{MODEL_FILE}', pathOpts.modelFile);
-
-        if (!URL.canParse(modelPath, hostname)) {
-            throw Error(`[Invalid URL] Couldn't parse the model path: "${modelPath}"`);
-        }
-
-        return await UserInferenceSession.fromUrl(new URL(modelPath, hostname));
+    const encoder = new TextEncoder();
+    const modelUrlBuffer = encoder.encode(modelUrl);
+    const session = await InferenceSession.fromBuffer(modelUrlBuffer);
+
+    return new UserInferenceSession(session);
+  }
+
+  static async fromHuggingFace(repoId, opts = {}) {
+    const hostname = opts?.hostname ?? DEFAULT_HUGGING_FACE_OPTIONS.hostname;
+    const pathOpts = {
+      ...DEFAULT_HUGGING_FACE_OPTIONS.path,
+      ...opts?.path,
+    };
+
+    const modelPath = pathOpts.template
+      .replaceAll('{REPO_ID}', repoId)
+      .replaceAll('{REVISION}', pathOpts.revision)
+      .replaceAll('{MODEL_FILE}', pathOpts.modelFile);
+
+    if (!URL.canParse(modelPath, hostname)) {
+      throw Error(
+        `[Invalid URL] Couldn't parse the model path: "${modelPath}"`,
+      );
     }
 
-    async run(inputs) {
-        const outputs = await core.ops.op_sb_ai_ort_run_session(this.id, inputs);
+    return await UserInferenceSession.fromUrl(new URL(modelPath, hostname));
+  }
 
-        // Parse to Tensor
-        for (const key in outputs) {
-            if (Object.hasOwn(outputs, key)) {
-                const { type, data, dims } = outputs[key];
+  async run(inputs) {
+    const outputs = await core.ops.op_ai_ort_run_session(this.id, inputs);
 
-                outputs[key] = new UserTensor(type, data.buffer, dims);
-            }
-        }
+    // Parse to Tensor
+    for (const key in outputs) {
+      if (Object.hasOwn(outputs, key)) {
+        const { type, data, dims } = outputs[key];
 
-        return outputs;
+        outputs[key] = new UserTensor(type, data.buffer, dims);
+      }
     }
+
+    return outputs;
+  }
 }
 
 class UserTensor extends Tensor {
-    constructor(type, data, dim) {
-        super(type, data, dim);
-    }
+  constructor(type, data, dim) {
+    super(type, data, dim);
+  }
 
-    async tryEncodeAudio(sampleRate) {
-        return await core.ops.op_sb_ai_ort_encode_tensor_audio(this.data, sampleRate);
-    }
+  async tryEncodeAudio(sampleRate) {
+    return await core.ops.op_ai_ort_encode_tensor_audio(this.data, sampleRate);
+  }
 }
 
 export default {
-    RawSession: UserInferenceSession,
-    RawTensor: UserTensor,
+  RawSession: UserInferenceSession,
+  RawTensor: UserTensor,
 };
diff --git a/ext/ai/js/onnxruntime/onnx.js b/ext/ai/js/onnxruntime/onnx.js
@@ -18,7 +18,7 @@ const DataTypeMap = Object.freeze({
 class TensorProxy {
   get(target, property) {
     switch (property) {
-      case "data":
+      case 'data':
         return target.data?.c ?? target.data;
 
       default:
@@ -86,6 +86,15 @@ export class InferenceSession {
     return new InferenceSession(id, inputs, outputs);
   }
 
+  static async fromRequest(modelUrl, authorization) {
+    const [id, inputs, outputs] = await core.ops.op_ai_ort_init_session(
+      modelUrl,
+      authorization,
+    );
+
+    return new InferenceSession(id, inputs, outputs);
+  }
+
   async run(inputs) {
     const sessionInputs = {};
 
@@ -125,4 +134,4 @@ const onnxruntime = {
   },
 };
 
-globalThis[Symbol.for("onnxruntime")] = onnxruntime;
+globalThis[Symbol.for('onnxruntime')] = onnxruntime;
diff --git a/ext/ai/lib.rs b/ext/ai/lib.rs
@@ -118,7 +118,7 @@ async fn init_gte(state: Rc<RefCell<OpState>>) -> Result<(), Error> {
     let handle = handle.clone();
     move || {
       handle.block_on(async move {
-        load_session_from_url(Url::parse(consts::GTE_SMALL_MODEL_URL).unwrap())
+        load_session_from_url(Url::parse(consts::GTE_SMALL_MODEL_URL).unwrap(), None)
           .await
       })
     }
@@ -143,6 +143,7 @@ async fn init_gte(state: Rc<RefCell<OpState>>) -> Result<(), Error> {
               "tokenizer",
               Url::parse(consts::GTE_SMALL_TOKENIZER_URL).unwrap(),
               None,
+              None
             )
             .map_err(AnyError::from)
             .and_then(|it| {
diff --git a/ext/ai/onnxruntime/mod.rs b/ext/ai/onnxruntime/mod.rs
@@ -37,26 +37,28 @@ use tokio_util::bytes::BufMut;
 #[op2(async)]
 #[to_v8]
 pub async fn op_ai_ort_init_session(
-  state: Rc<RefCell<OpState>>,
-  #[buffer] model_bytes: JsBuffer,
+    state: Rc<RefCell<OpState>>,
+    #[buffer] model_bytes: JsBuffer,
+    // Maybe improve the code style to enum payload or something else
+    #[string] req_authorization: Option<String>,
 ) -> Result<ModelInfo> {
   let model_bytes = model_bytes.into_parts().to_boxed_slice();
   let model_bytes_or_url = str::from_utf8(&model_bytes)
     .map_err(AnyError::from)
     .and_then(|utf8_str| Url::parse(utf8_str).map_err(AnyError::from));
 
-  let model = match model_bytes_or_url {
-    Ok(model_url) => {
-      trace!(kind = "url", url = %model_url);
-      Model::from_url(model_url).await?
-    }
-    Err(_) => {
-      trace!(kind = "bytes", len = model_bytes.len());
-      Model::from_bytes(&model_bytes).await?
-    }
-  };
-
-  let mut state = state.borrow_mut();
+    let model = match model_bytes_or_url {
+        Ok(model_url) => {
+            trace!(kind = "url", url = %model_url);
+            Model::from_url(model_url, req_authorization).await?
+        }
+        Err(_) => {
+            trace!(kind = "bytes", len = model_bytes.len());
+            Model::from_bytes(&model_bytes).await?
+        }
+    };
+
+    let mut state = state.borrow_mut();
   let mut sessions =
     { state.try_take::<Vec<Arc<Session>>>().unwrap_or_default() };
 
diff --git a/ext/ai/onnxruntime/model.rs b/ext/ai/onnxruntime/model.rs
@@ -71,9 +71,11 @@ impl Model {
       .map(Self::new)
   }
 
-  pub async fn from_url(model_url: Url) -> Result<Self> {
-    load_session_from_url(model_url).await.map(Self::new)
-  }
+    pub async fn from_url(model_url: Url, authorization: Option<String>) -> Result<Self> {
+        load_session_from_url(model_url, authorization)
+            .await
+            .map(Self::new)
+    }
 
   pub async fn from_bytes(model_bytes: &[u8]) -> Result<Self> {
     load_session_from_bytes(model_bytes).await.map(Self::new)
diff --git a/ext/ai/onnxruntime/session.rs b/ext/ai/onnxruntime/session.rs
@@ -154,9 +154,10 @@ pub(crate) async fn load_session_from_bytes(
 
 #[instrument(level = "debug", fields(%model_url), err)]
 pub(crate) async fn load_session_from_url(
-  model_url: Url,
+    model_url: Url,
+    authorization: Option<String>,
 ) -> Result<SessionWithId, Error> {
-  let session_id = fxhash::hash(model_url.as_str()).to_string();
+    let session_id = fxhash::hash(model_url.as_str()).to_string();
 
   let mut sessions = SESSIONS.lock().await;
 
@@ -165,12 +166,13 @@ pub(crate) async fn load_session_from_url(
     return Ok((session_id, session.clone()).into());
   }
 
-  let model_file_path = crate::utils::fetch_and_cache_from_url(
-    "model",
-    model_url,
-    Some(session_id.to_string()),
-  )
-  .await?;
+    let model_file_path = crate::utils::fetch_and_cache_from_url(
+        "model",
+        model_url,
+        Some(session_id.to_string()),
+        authorization,
+    )
+    .await?;
 
   let model_bytes = tokio::fs::read(model_file_path).await?;
   let session = create_session(model_bytes.as_slice())?;
diff --git a/ext/ai/utils.rs b/ext/ai/utils.rs
@@ -20,6 +20,7 @@ pub async fn fetch_and_cache_from_url(
   kind: &'static str,
   url: Url,
   cache_id: Option<String>,
+  authorization: Option<String>,
 ) -> Result<PathBuf, AnyError> {
   let cache_id = cache_id.unwrap_or(fxhash::hash(url.as_str()).to_string());
   let download_dir = std::env::var("EXT_AI_CACHE_DIR")
@@ -91,13 +92,26 @@ pub async fn fetch_and_cache_from_url(
 
       use reqwest::*;
 
+      let mut headers = header::HeaderMap::new();
+
+      if let Some(authorization) = authorization {
+        let mut authorization =
+          header::HeaderValue::from_str(authorization.as_str())?;
+        authorization.set_sensitive(true);
+
+        headers.insert(header::AUTHORIZATION, authorization);
+      };
+
       let resp = Client::builder()
         .http1_only()
+        .default_headers(headers)
         .build()
         .context("failed to create http client")?
         .get(url.clone())
         .send()
         .await
+        .context("failed to download")?
+        .error_for_status()
         .context("failed to download")?;
 
       let file = tokio::fs::File::create(&filepath)