1
1
import 'ext:ai/onnxruntime/onnx.js' ;
2
2
import { parseJSON , parseJSONOverEventStream } from './llm/utils/json_parser.ts' ;
3
+ import { LLMSession } from './llm/llm_session.ts' ;
3
4
4
5
const core = globalThis . Deno . core ;
5
6
@@ -13,10 +14,10 @@ class Session {
13
14
this . model = model ;
14
15
this . is_ext_inference_api = false ;
15
16
16
- if ( model === " gte-small" ) {
17
+ if ( model === ' gte-small' ) {
17
18
this . init = core . ops . op_ai_init_model ( model ) ;
18
19
} else {
19
- this . inferenceAPIHost = core . ops . op_get_env ( " AI_INFERENCE_API_HOST" ) ;
20
+ this . inferenceAPIHost = core . ops . op_get_env ( ' AI_INFERENCE_API_HOST' ) ;
20
21
this . is_ext_inference_api = ! ! this . inferenceAPIHost ; // only enable external inference API if env variable is set
21
22
}
22
23
}
@@ -26,16 +27,30 @@ class Session {
26
27
if ( this . is_ext_inference_api ) {
27
28
const stream = opts . stream ?? false ;
28
29
30
+ /** @type {'ollama' | 'openaicompatible' } */
31
+ const mode = opts . mode ?? 'ollama' ;
32
+
33
+ if ( mode === 'ollama' ) {
34
+ // Using the new LLMSession API
35
+ const llmSession = LLMSession . fromProvider ( 'ollama' , {
36
+ inferenceAPIHost : this . inferenceAPIHost ,
37
+ model : this . model ,
38
+ } ) ;
39
+
40
+ return await llmSession . run ( {
41
+ prompt,
42
+ stream,
43
+ signal : opts . signal ,
44
+ timeout : opts . timeout ,
45
+ } ) ;
46
+ }
47
+
29
48
// default timeout 60s
30
- const timeout = typeof opts . timeout === " number" ? opts . timeout : 60 ;
49
+ const timeout = typeof opts . timeout === ' number' ? opts . timeout : 60 ;
31
50
const timeoutMs = timeout * 1000 ;
32
51
33
- /** @type {'ollama' | 'openaicompatible' } */
34
- const mode = opts . mode ?? "ollama" ;
35
-
36
52
switch ( mode ) {
37
- case "ollama" :
38
- case "openaicompatible" :
53
+ case 'openaicompatible' :
39
54
break ;
40
55
41
56
default :
@@ -48,15 +63,15 @@ class Session {
48
63
49
64
const signal = AbortSignal . any ( signals ) ;
50
65
51
- const path = mode === "ollama" ? "/api/generate" : "/ v1/chat/completions" ;
52
- const body = mode === "ollama" ? { prompt } : prompt ;
66
+ const path = '/ v1/chat/completions' ;
67
+ const body = prompt ;
53
68
54
69
const res = await fetch (
55
70
new URL ( path , this . inferenceAPIHost ) ,
56
71
{
57
- method : " POST" ,
72
+ method : ' POST' ,
58
73
headers : {
59
- " Content-Type" : " application/json" ,
74
+ ' Content-Type' : ' application/json' ,
60
75
} ,
61
76
body : JSON . stringify ( {
62
77
model : this . model ,
@@ -74,20 +89,16 @@ class Session {
74
89
}
75
90
76
91
if ( ! res . body ) {
77
- throw new Error ( " Missing body" ) ;
92
+ throw new Error ( ' Missing body' ) ;
78
93
}
79
94
80
- const parseGenFn = mode === "ollama"
81
- ? parseJSON
82
- : stream === true
83
- ? parseJSONOverEventStream
84
- : parseJSON ;
95
+ const parseGenFn = stream === true ? parseJSONOverEventStream : parseJSON ;
85
96
const itr = parseGenFn ( res . body , signal ) ;
86
97
87
98
if ( stream ) {
88
99
return ( async function * ( ) {
89
100
for await ( const message of itr ) {
90
- if ( " error" in message ) {
101
+ if ( ' error' in message ) {
91
102
if ( message . error instanceof Error ) {
92
103
throw message . error ;
93
104
} else {
@@ -98,20 +109,12 @@ class Session {
98
109
yield message ;
99
110
100
111
switch ( mode ) {
101
- case "ollama" : {
102
- if ( message . done ) {
103
- return ;
104
- }
105
-
106
- break ;
107
- }
108
-
109
- case "openaicompatible" : {
112
+ case 'openaicompatible' : {
110
113
const finishReason = message . choices [ 0 ] . finish_reason ;
111
114
112
115
if ( finishReason ) {
113
- if ( finishReason !== " stop" ) {
114
- throw new Error ( " Expected a completed response." ) ;
116
+ if ( finishReason !== ' stop' ) {
117
+ throw new Error ( ' Expected a completed response.' ) ;
115
118
}
116
119
117
120
return ;
@@ -121,18 +124,18 @@ class Session {
121
124
}
122
125
123
126
default :
124
- throw new Error ( " unreachable" ) ;
127
+ throw new Error ( ' unreachable' ) ;
125
128
}
126
129
}
127
130
128
131
throw new Error (
129
- " Did not receive done or success response in stream." ,
132
+ ' Did not receive done or success response in stream.' ,
130
133
) ;
131
134
} ) ( ) ;
132
135
} else {
133
136
const message = await itr . next ( ) ;
134
137
135
- if ( message . value && " error" in message . value ) {
138
+ if ( message . value && ' error' in message . value ) {
136
139
const error = message . value . error ;
137
140
138
141
if ( error instanceof Error ) {
@@ -142,12 +145,10 @@ class Session {
142
145
}
143
146
}
144
147
145
- const finish = mode === "ollama"
146
- ? message . value . done
147
- : message . value . choices [ 0 ] . finish_reason === "stop" ;
148
+ const finish = message . value . choices [ 0 ] . finish_reason === 'stop' ;
148
149
149
150
if ( finish !== true ) {
150
- throw new Error ( " Expected a completed response." ) ;
151
+ throw new Error ( ' Expected a completed response.' ) ;
151
152
}
152
153
153
154
return message . value ;
@@ -172,8 +173,7 @@ class Session {
172
173
}
173
174
174
175
const MAIN_WORKER_API = {
175
- tryCleanupUnusedSession : ( ) =>
176
- /* async */ core . ops . op_ai_try_cleanup_unused_session ( ) ,
176
+ tryCleanupUnusedSession : ( ) => /* async */ core . ops . op_ai_try_cleanup_unused_session ( ) ,
177
177
} ;
178
178
179
179
const USER_WORKER_API = {
0 commit comments