@@ -27,7 +27,9 @@ import {
2727 Availability ,
2828 LanguageModel ,
2929 LanguageModelCreateOptions ,
30- LanguageModelMessageContent
30+ LanguageModelExpectedInput ,
31+ LanguageModelMessageContent ,
32+ LanguageModelMessageType
3133} from '../types/language-model' ;
3234
3335/**
@@ -44,9 +46,7 @@ export class ChromeAdapter {
4446 private languageModelProvider ?: LanguageModel ,
4547 private mode ?: InferenceMode ,
4648 private onDeviceParams : LanguageModelCreateOptions = { }
47- ) {
48- this . addImageTypeAsExpectedInput ( ) ;
49- }
49+ ) { }
5050
5151 /**
5252 * Checks if a given request can be made on-device.
@@ -68,8 +68,10 @@ export class ChromeAdapter {
6868 return false ;
6969 }
7070
71+ const expectedInputs = ChromeAdapter . extractExpectedInputs ( request ) ;
72+
7173 // Triggers out-of-band download so model will eventually become available.
72- const availability = await this . downloadIfAvailable ( ) ;
74+ const availability = await this . downloadIfAvailable ( expectedInputs ) ;
7375
7476 if ( this . mode === 'only_on_device' ) {
7577 return true ;
@@ -129,6 +131,33 @@ export class ChromeAdapter {
129131 ) ;
130132 }
131133
134+ /**
135+ * Maps
136+ * <a href="https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#blob">
137+ * Vertex's input mime types</a> to
138+ * <a href="https://github.com/webmachinelearning/prompt-api?tab=readme-ov-file#full-api-surface-in-web-idl">
139+ * Chrome's expected types</a>.
140+ *
141+ * <p>Chrome's API checks availability by type. It's tedious to specify the types in advance, so
142+ * this method infers the types.</p>
143+ */
144+ private static extractExpectedInputs (
145+ request : GenerateContentRequest
146+ ) : LanguageModelExpectedInput [ ] {
147+ const inputSet = new Set < LanguageModelExpectedInput > ( ) ;
148+ for ( const content of request . contents ) {
149+ for ( const part of content . parts ) {
150+ if ( part . inlineData ) {
151+ const type = part . inlineData . mimeType . split (
152+ '/'
153+ ) [ 0 ] as LanguageModelMessageType ;
154+ inputSet . add ( { type } ) ;
155+ }
156+ }
157+ }
158+ return Array . from ( inputSet ) ;
159+ }
160+
132161 /**
133162 * Asserts inference for the given request can be performed by an on-device model.
134163 */
@@ -164,12 +193,20 @@ export class ChromeAdapter {
164193 /**
165194 * Encapsulates logic to get availability and download a model if one is downloadable.
166195 */
167- private async downloadIfAvailable ( ) : Promise < Availability | undefined > {
196+ private async downloadIfAvailable (
197+ expectedInputs : LanguageModelExpectedInput [ ]
198+ ) : Promise < Availability | undefined > {
199+ // Side-effect: updates construction-time params with request-time params.
200+ // This is required because params are referenced through multiple flows.
201+ Object . assign ( this . onDeviceParams , { expectedInputs } ) ;
202+
168203 const availability = await this . languageModelProvider ?. availability (
169204 this . onDeviceParams
170205 ) ;
171206
172207 if ( availability === Availability . downloadable ) {
208+ // Side-effect: triggers out-of-band model download.
209+ // This is required because Chrome manages the model download.
173210 this . download ( ) ;
174211 }
175212
@@ -252,11 +289,6 @@ export class ChromeAdapter {
252289 return newSession ;
253290 }
254291
255- private addImageTypeAsExpectedInput ( ) : void {
256- // Defaults to support image inputs for convenience.
257- this . onDeviceParams . expectedInputs ??= [ { type : 'image' } ] ;
258- }
259-
260292 /**
261293 * Formats string returned by Chrome as a {@link Response} returned by Vertex.
262294 */
0 commit comments