supabase
diff --git a/‎ext/ai/js/ai.js‎
Lines changed: 21 additions & 124 deletions b/‎ext/ai/js/ai.js‎
Lines changed: 21 additions & 124 deletions
diff --git a/‎ext/ai/js/llm/llm_session.ts‎
Lines changed: 14 additions & 5 deletions b/‎ext/ai/js/llm/llm_session.ts‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎ext/ai/js/llm/providers/ollama.ts‎
Lines changed: 21 additions & 21 deletions b/‎ext/ai/js/llm/providers/ollama.ts‎
Lines changed: 21 additions & 21 deletions
@@ -1,6 +1,5 @@
 import 'ext:ai/onnxruntime/onnx.js';
-import { parseJSON, parseJSONOverEventStream } from './llm/utils/json_parser.ts';
-import { LLMSession } from './llm/llm_session.ts';
+import { LLMSession, providers } from './llm/llm_session.ts';
 
 const core = globalThis.Deno.core;
 
@@ -9,11 +8,15 @@ class Session {
   init;
   is_ext_inference_api;
   inferenceAPIHost;
+  extraOpts;
 
-  constructor(model) {
+  // TODO:(kallebysantos) get 'provider' type here and use type checking to suggest Inputs when run
+  constructor(model, opts = {}) {
     this.model = model;
     this.is_ext_inference_api = false;
+    this.extraOpts = opts;
 
+    // TODO:(kallebysantos) do we still need gte-small?
     if (model === 'gte-small') {
       this.init = core.ops.op_ai_init_model(model);
     } else {
@@ -28,131 +31,25 @@ class Session {
       const stream = opts.stream ?? false;
 
       /** @type {'ollama' | 'openaicompatible'} */
+      // TODO:(kallebysantos) get mode from 'new' and apply type checking based on that
       const mode = opts.mode ?? 'ollama';
 
-      if (mode === 'ollama') {
-        // Using the new LLMSession API
-        const llmSession = LLMSession.fromProvider('ollama', {
-          inferenceAPIHost: this.inferenceAPIHost,
-          model: this.model,
-        });
-
-        return await llmSession.run({
-          prompt,
-          stream,
-          signal: opts.signal,
-          timeout: opts.timeout,
-        });
-      }
-
-      // default timeout 60s
-      const timeout = typeof opts.timeout === 'number' ? opts.timeout : 60;
-      const timeoutMs = timeout * 1000;
-
-      switch (mode) {
-        case 'openaicompatible':
-          break;
-
-        default:
-          throw new TypeError(`invalid mode: ${mode}`);
-      }
-
-      const timeoutSignal = AbortSignal.timeout(timeoutMs);
-      const signals = [opts.signal, timeoutSignal]
-        .filter((it) => it instanceof AbortSignal);
-
-      const signal = AbortSignal.any(signals);
-
-      const path = '/v1/chat/completions';
-      const body = prompt;
-
-      const res = await fetch(
-        new URL(path, this.inferenceAPIHost),
-        {
-          method: 'POST',
-          headers: {
-            'Content-Type': 'application/json',
-          },
-          body: JSON.stringify({
-            model: this.model,
-            stream,
-            ...body,
-          }),
-        },
-        { signal },
-      );
-
-      if (!res.ok) {
-        throw new Error(
-          `Failed to fetch inference API host. Status ${res.status}: ${res.statusText}`,
-        );
+      if (!Object.keys(providers).includes(mode)) {
+        throw new TypeError(`invalid mode: ${mode}`);
       }
 
-      if (!res.body) {
-        throw new Error('Missing body');
-      }
-
-      const parseGenFn = stream === true ? parseJSONOverEventStream : parseJSON;
-      const itr = parseGenFn(res.body, signal);
-
-      if (stream) {
-        return (async function* () {
-          for await (const message of itr) {
-            if ('error' in message) {
-              if (message.error instanceof Error) {
-                throw message.error;
-              } else {
-                throw new Error(message.error);
-              }
-            }
-
-            yield message;
-
-            switch (mode) {
-              case 'openaicompatible': {
-                const finishReason = message.choices[0].finish_reason;
-
-                if (finishReason) {
-                  if (finishReason !== 'stop') {
-                    throw new Error('Expected a completed response.');
-                  }
-
-                  return;
-                }
-
-                break;
-              }
-
-              default:
-                throw new Error('unreachable');
-            }
-          }
-
-          throw new Error(
-            'Did not receive done or success response in stream.',
-          );
-        })();
-      } else {
-        const message = await itr.next();
-
-        if (message.value && 'error' in message.value) {
-          const error = message.value.error;
-
-          if (error instanceof Error) {
-            throw error;
-          } else {
-            throw new Error(error);
-          }
-        }
-
-        const finish = message.value.choices[0].finish_reason === 'stop';
-
-        if (finish !== true) {
-          throw new Error('Expected a completed response.');
-        }
-
-        return message.value;
-      }
+      const llmSession = LLMSession.fromProvider(mode, {
+        inferenceAPIHost: this.inferenceAPIHost,
+        model: this.model,
+        ...this.extraOpts, // allows custom provider initialization like 'apiKey'
+      });
+
+      return await llmSession.run({
+        prompt,
+        stream,
+        signal: opts.signal,
+        timeout: opts.timeout,
+      });
     }
 
     if (this.init) {
 
@@ -1,4 +1,5 @@
 import { OllamaLLMSession } from './providers/ollama.ts';
+import { OpenAILLMSession } from './providers/openai.ts';
 
 // @ts-ignore deno_core environment
 const core = globalThis.Deno.core;
@@ -20,18 +21,25 @@ export type LLMRunInput = {
 };
 
 export interface ILLMProviderOptions {
-  inferenceAPIHost: string;
   model: string;
+  inferenceAPIHost: string;
+}
+
+export interface ILLMProviderInput {
+  prompt: string | object;
+  signal: AbortSignal;
 }
 
 export interface ILLMProvider {
   // TODO:(kallebysantos) remove 'any'
-  getStream(prompt: string, signal: AbortSignal): Promise<AsyncIterable<any>>;
-  getText(prompt: string, signal: AbortSignal): Promise<any>;
+  // TODO: (kallebysantos) standardised output format
+  getStream(input: ILLMProviderInput): Promise<AsyncIterable<any>>;
+  getText(input: ILLMProviderInput): Promise<any>;
 }
 
 export const providers = {
   'ollama': OllamaLLMSession,
+  'openaicompatible': OpenAILLMSession,
 } satisfies Record<string, new (opts: ILLMProviderOptions) => ILLMProvider>;
 
 export type LLMProviderName = keyof typeof providers;
@@ -65,10 +73,11 @@ export class LLMSession {
       .filter((it) => it instanceof AbortSignal);
     const signal = AbortSignal.any(abortSignals);
 
+    const llmInput: ILLMProviderInput = { prompt: opts.prompt, signal };
     if (isStream) {
-      return this.#inner.getStream(opts.prompt, signal);
+      return this.#inner.getStream(llmInput);
     }
 
-    return this.#inner.getText(opts.prompt, signal);
+    return this.#inner.getText(llmInput);
   }
 }
@@ -1,7 +1,10 @@
-import { ILLMProvider, ILLMProviderOptions } from '../llm_session.ts';
+import { ILLMProvider, ILLMProviderInput, ILLMProviderOptions } from '../llm_session.ts';
 import { parseJSON } from '../utils/json_parser.ts';
 
 export type OllamaProviderOptions = ILLMProviderOptions;
+export type OllamaProviderInput = ILLMProviderInput & {
+  prompt: string;
+};
 
 export type OllamaMessage = {
   model: string;
@@ -26,10 +29,13 @@ export class OllamaLLMSession implements ILLMProvider {
 
   // ref: https://github.com/ollama/ollama-js/blob/6a4bfe3ab033f611639dfe4249bdd6b9b19c7256/src/utils.ts#L26
   async getStream(
-    prompt: string,
-    signal: AbortSignal,
+    { prompt, signal }: OllamaProviderInput,
   ): Promise<AsyncIterable<OllamaMessage>> {
-    const generator = await this.generate(prompt, signal, true);
+    const generator = await this.generate(
+      prompt,
+      signal,
+      true,
+    ) as AsyncGenerator<OllamaMessage>;
 
     const stream = async function* () {
       for await (const message of generator) {
@@ -55,22 +61,10 @@ export class OllamaLLMSession implements ILLMProvider {
     return stream();
   }
 
-  async getText(prompt: string, signal: AbortSignal): Promise<OllamaMessage> {
-    const generator = await this.generate(prompt, signal);
-
-    const message = await generator.next();
-
-    if (message.value && 'error' in message.value) {
-      const error = message.value.error;
-
-      if (error instanceof Error) {
-        throw error;
-      } else {
-        throw new Error(error);
-      }
-    }
-
-    const response = message.value;
+  async getText(
+    { prompt, signal }: OllamaProviderInput,
+  ): Promise<OllamaMessage> {
+    const response = await this.generate(prompt, signal) as OllamaMessage;
 
     if (!response?.done) {
       throw new Error('Expected a completed response.');
@@ -110,6 +104,12 @@ export class OllamaLLMSession implements ILLMProvider {
       throw new Error('Missing body');
     }
 
-    return parseJSON<OllamaMessage>(res.body, signal);
+    if (stream) {
+      return parseJSON<OllamaMessage>(res.body, signal);
+    }
+
+    const result: OllamaMessage = await res.json();
+
+    return result;
   }
 }