1
1
import 'ext:ai/onnxruntime/onnx.js' ;
2
- import { parseJSON , parseJSONOverEventStream } from './llm/utils/json_parser.ts' ;
3
- import { LLMSession } from './llm/llm_session.ts' ;
2
+ import { LLMSession , providers } from './llm/llm_session.ts' ;
4
3
5
4
const core = globalThis . Deno . core ;
6
5
@@ -9,11 +8,15 @@ class Session {
9
8
init ;
10
9
is_ext_inference_api ;
11
10
inferenceAPIHost ;
11
+ extraOpts ;
12
12
13
- constructor ( model ) {
13
+ // TODO:(kallebysantos) get 'provider' type here and use type checking to suggest Inputs when run
14
+ constructor ( model , opts = { } ) {
14
15
this . model = model ;
15
16
this . is_ext_inference_api = false ;
17
+ this . extraOpts = opts ;
16
18
19
+ // TODO:(kallebysantos) do we still need gte-small?
17
20
if ( model === 'gte-small' ) {
18
21
this . init = core . ops . op_ai_init_model ( model ) ;
19
22
} else {
@@ -28,131 +31,25 @@ class Session {
28
31
const stream = opts . stream ?? false ;
29
32
30
33
/** @type {'ollama' | 'openaicompatible' } */
34
+ // TODO:(kallebysantos) get mode from 'new' and apply type checking based on that
31
35
const mode = opts . mode ?? 'ollama' ;
32
36
33
- if ( mode === 'ollama' ) {
34
- // Using the new LLMSession API
35
- const llmSession = LLMSession . fromProvider ( 'ollama' , {
36
- inferenceAPIHost : this . inferenceAPIHost ,
37
- model : this . model ,
38
- } ) ;
39
-
40
- return await llmSession . run ( {
41
- prompt,
42
- stream,
43
- signal : opts . signal ,
44
- timeout : opts . timeout ,
45
- } ) ;
46
- }
47
-
48
- // default timeout 60s
49
- const timeout = typeof opts . timeout === 'number' ? opts . timeout : 60 ;
50
- const timeoutMs = timeout * 1000 ;
51
-
52
- switch ( mode ) {
53
- case 'openaicompatible' :
54
- break ;
55
-
56
- default :
57
- throw new TypeError ( `invalid mode: ${ mode } ` ) ;
58
- }
59
-
60
- const timeoutSignal = AbortSignal . timeout ( timeoutMs ) ;
61
- const signals = [ opts . signal , timeoutSignal ]
62
- . filter ( ( it ) => it instanceof AbortSignal ) ;
63
-
64
- const signal = AbortSignal . any ( signals ) ;
65
-
66
- const path = '/v1/chat/completions' ;
67
- const body = prompt ;
68
-
69
- const res = await fetch (
70
- new URL ( path , this . inferenceAPIHost ) ,
71
- {
72
- method : 'POST' ,
73
- headers : {
74
- 'Content-Type' : 'application/json' ,
75
- } ,
76
- body : JSON . stringify ( {
77
- model : this . model ,
78
- stream,
79
- ...body ,
80
- } ) ,
81
- } ,
82
- { signal } ,
83
- ) ;
84
-
85
- if ( ! res . ok ) {
86
- throw new Error (
87
- `Failed to fetch inference API host. Status ${ res . status } : ${ res . statusText } ` ,
88
- ) ;
37
+ if ( ! Object . keys ( providers ) . includes ( mode ) ) {
38
+ throw new TypeError ( `invalid mode: ${ mode } ` ) ;
89
39
}
90
40
91
- if ( ! res . body ) {
92
- throw new Error ( 'Missing body' ) ;
93
- }
94
-
95
- const parseGenFn = stream === true ? parseJSONOverEventStream : parseJSON ;
96
- const itr = parseGenFn ( res . body , signal ) ;
97
-
98
- if ( stream ) {
99
- return ( async function * ( ) {
100
- for await ( const message of itr ) {
101
- if ( 'error' in message ) {
102
- if ( message . error instanceof Error ) {
103
- throw message . error ;
104
- } else {
105
- throw new Error ( message . error ) ;
106
- }
107
- }
108
-
109
- yield message ;
110
-
111
- switch ( mode ) {
112
- case 'openaicompatible' : {
113
- const finishReason = message . choices [ 0 ] . finish_reason ;
114
-
115
- if ( finishReason ) {
116
- if ( finishReason !== 'stop' ) {
117
- throw new Error ( 'Expected a completed response.' ) ;
118
- }
119
-
120
- return ;
121
- }
122
-
123
- break ;
124
- }
125
-
126
- default :
127
- throw new Error ( 'unreachable' ) ;
128
- }
129
- }
130
-
131
- throw new Error (
132
- 'Did not receive done or success response in stream.' ,
133
- ) ;
134
- } ) ( ) ;
135
- } else {
136
- const message = await itr . next ( ) ;
137
-
138
- if ( message . value && 'error' in message . value ) {
139
- const error = message . value . error ;
140
-
141
- if ( error instanceof Error ) {
142
- throw error ;
143
- } else {
144
- throw new Error ( error ) ;
145
- }
146
- }
147
-
148
- const finish = message . value . choices [ 0 ] . finish_reason === 'stop' ;
149
-
150
- if ( finish !== true ) {
151
- throw new Error ( 'Expected a completed response.' ) ;
152
- }
153
-
154
- return message . value ;
155
- }
41
+ const llmSession = LLMSession . fromProvider ( mode , {
42
+ inferenceAPIHost : this . inferenceAPIHost ,
43
+ model : this . model ,
44
+ ...this . extraOpts , // allows custom provider initialization like 'apiKey'
45
+ } ) ;
46
+
47
+ return await llmSession . run ( {
48
+ prompt,
49
+ stream,
50
+ signal : opts . signal ,
51
+ timeout : opts . timeout ,
52
+ } ) ;
156
53
}
157
54
158
55
if ( this . init ) {
0 commit comments