Skip to content

Commit e1c6856

Browse files
Fix Embedder Schemas & Migrate Default Embedding Model
- Fixed missing output schema in Dev UI for embedder actions by explicitly setting input_schema and output_schema on Action objects. - Added default sampleInput to embedder actions for easier testing. - Resolved a deadlock in registry.py affecting dotprompt schema resolution (switched to async). - Migrated default embedding model references from text-embedding-004 to gemini-embedding-001 across plugins, samples, and tests due to model deprecation/availability. - Fixed a redundant cast lint warning in _aio.py.
1 parent 8843661 commit e1c6856

File tree

21 files changed

+52
-41
lines changed

21 files changed

+52
-41
lines changed

py/engdoc/parity-analysis/plugin_api_consistency.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ model='ollama/llama3'
237237

238238
**JS Pattern:**
239239
```typescript
240-
const embedder = googleAI.embedder('text-embedding-004');
240+
const embedder = googleAI.embedder('gemini-embedding-001');
241241
```
242242

243243
**Python:** ❌ No equivalent

py/packages/genkit/src/genkit/ai/_aio.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -619,7 +619,7 @@ async def embed(
619619
{version, ...config, ...options} (options take precedence).
620620
621621
Args:
622-
embedder: Embedder name (e.g., 'googleai/text-embedding-004') or
622+
embedder: Embedder name (e.g., 'googleai/gemini-embedding-001') or
623623
an EmbedderRef with configuration.
624624
content: A single string, Document, or DocumentData to embed.
625625
metadata: Optional metadata to apply to the document. Only used
@@ -634,25 +634,25 @@ async def embed(
634634
ValueError: If content is not specified.
635635
636636
Example - Basic string embedding:
637-
>>> embeddings = await ai.embed(embedder='googleai/text-embedding-004', content='Hello, world!')
637+
>>> embeddings = await ai.embed(embedder='googleai/gemini-embedding-001', content='Hello, world!')
638638
>>> print(len(embeddings[0].embedding)) # Vector dimensions
639639
640640
Example - With metadata:
641641
>>> embeddings = await ai.embed(
642-
... embedder='googleai/text-embedding-004',
642+
... embedder='googleai/gemini-embedding-001',
643643
... content='Product description',
644644
... metadata={'category': 'electronics'},
645645
... )
646646
647647
Example - With embedder options:
648648
>>> embeddings = await ai.embed(
649-
... embedder='googleai/text-embedding-004',
649+
... embedder='googleai/gemini-embedding-001',
650650
... content='Search query',
651651
... options={'task_type': 'RETRIEVAL_QUERY'},
652652
... )
653653
654654
Example - Using EmbedderRef:
655-
>>> ref = create_embedder_ref('googleai/text-embedding-004', config={'task_type': 'CLUSTERING'})
655+
>>> ref = create_embedder_ref('googleai/gemini-embedding-001', config={'task_type': 'CLUSTERING'})
656656
>>> embeddings = await ai.embed(embedder=ref, content='Text')
657657
"""
658658
embedder_name = self._resolve_embedder_name(embedder)
@@ -706,7 +706,7 @@ async def embed_many(
706706
and passes options directly. This matches the JS canonical behavior.
707707
708708
Args:
709-
embedder: Embedder name (e.g., 'googleai/text-embedding-004') or
709+
embedder: Embedder name (e.g., 'googleai/gemini-embedding-001') or
710710
an EmbedderRef.
711711
content: List of strings, Documents, or DocumentData to embed.
712712
metadata: Optional metadata to apply to all items. Only used when
@@ -722,22 +722,22 @@ async def embed_many(
722722
723723
Example - Basic batch embedding:
724724
>>> embeddings = await ai.embed_many(
725-
... embedder='googleai/text-embedding-004',
725+
... embedder='googleai/gemini-embedding-001',
726726
... content=['Doc 1', 'Doc 2', 'Doc 3'],
727727
... )
728728
>>> for i, emb in enumerate(embeddings):
729729
... print(f'Doc {i}: {len(emb.embedding)} dims')
730730
731731
Example - With shared metadata:
732732
>>> embeddings = await ai.embed_many(
733-
... embedder='googleai/text-embedding-004',
733+
... embedder='googleai/gemini-embedding-001',
734734
... content=['text1', 'text2'],
735735
... metadata={'batch_id': 'batch-001'},
736736
... )
737737
738738
Example - With options (EmbedderRef config is NOT extracted):
739739
>>> embeddings = await ai.embed_many(
740-
... embedder='googleai/text-embedding-004',
740+
... embedder='googleai/gemini-embedding-001',
741741
... content=documents,
742742
... options={'task_type': 'RETRIEVAL_DOCUMENT'},
743743
... )

py/packages/genkit/src/genkit/blocks/embedding.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
| | vectors. Contains 'embedding' field and metadata. |
3232
+-------------------+------------------------------------------------------+
3333
| Embedder | A model/service that converts text to embeddings. |
34-
| | Examples: 'googleai/text-embedding-004'. Registered |
34+
| | Examples: 'googleai/gemini-embedding-001'. Registered |
3535
| | as actions, invoked via embed() and embed_many(). |
3636
+-------------------+------------------------------------------------------+
3737
| EmbedderRef | Reference bundling embedder name with optional |
@@ -68,14 +68,14 @@
6868
- ai.embed_many(): Embed multiple pieces of content in batch
6969
7070
Example - Single embedding:
71-
>>> embeddings = await ai.embed(embedder='googleai/text-embedding-004', content='Hello, world!')
71+
>>> embeddings = await ai.embed(embedder='googleai/gemini-embedding-001', content='Hello, world!')
7272
>>> vector = embeddings[0].embedding
7373
7474
Example - Batch embedding:
75-
>>> embeddings = await ai.embed_many(embedder='googleai/text-embedding-004', content=['Doc 1', 'Doc 2', 'Doc 3'])
75+
>>> embeddings = await ai.embed_many(embedder='googleai/gemini-embedding-001', content=['Doc 1', 'Doc 2', 'Doc 3'])
7676
7777
Example - Using EmbedderRef with configuration:
78-
>>> ref = create_embedder_ref('googleai/text-embedding-004', config={'task_type': 'CLUSTERING'}, version='v1')
78+
>>> ref = create_embedder_ref('googleai/gemini-embedding-001', config={'task_type': 'CLUSTERING'}, version='v1')
7979
>>> embeddings = await ai.embed(embedder=ref, content='My text')
8080
8181
Note on embed() vs embed_many():
@@ -195,12 +195,16 @@ def embedder_action_metadata(
195195

196196
embedder_info['customOptions'] = options.config_schema if options.config_schema else None
197197

198+
# Default sample input for easier testing in Dev UI
199+
sample_input: dict[str, object] = {'input': [{'content': [{'text': 'Hello, world!'}]}]}
200+
198201
return ActionMetadata(
199202
kind=cast(ActionKind, ActionKind.EMBEDDER),
200203
name=name,
201204
input_json_schema=to_json_schema(EmbedRequest),
202205
output_json_schema=to_json_schema(EmbedResponse),
203206
metadata=embedder_metadata_dict,
207+
sample_input=sample_input,
204208
)
205209

206210

py/packages/genkit/src/genkit/blocks/prompt.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -851,9 +851,11 @@ class RecipeInput(BaseModel):
851851
middleware = effective_opts.get('use') or self._use
852852
context = effective_opts.get('context')
853853

854+
rendered_options = await self.render(input=input, opts=effective_opts)
855+
854856
result = await generate_action(
855857
self._registry,
856-
await self.render(input=input, opts=effective_opts),
858+
rendered_options,
857859
on_chunk=on_chunk,
858860
middleware=middleware,
859861
context=context if context else ActionRunContext._current_context(), # pyright: ignore[reportPrivateUsage]

py/packages/genkit/src/genkit/core/action/_action.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@
9292
from functools import cached_property
9393
from typing import Any, Generic, Protocol, cast, get_type_hints
9494

95-
from pydantic import BaseModel, TypeAdapter, ValidationError
95+
from pydantic import BaseModel, ConfigDict, TypeAdapter, ValidationError
96+
from pydantic.alias_generators import to_camel
9697
from typing_extensions import Never, TypeVar
9798

9899
from genkit.aio import Channel, ensure_async
@@ -524,6 +525,8 @@ def _initialize_io_schemas(
524525
class ActionMetadata(BaseModel):
525526
"""Metadata for actions."""
526527

528+
model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
529+
527530
kind: ActionKind
528531
name: str
529532
description: str | None = None
@@ -533,6 +536,7 @@ class ActionMetadata(BaseModel):
533536
output_json_schema: dict[str, object] | None = None
534537
stream_schema: object | None = None
535538
metadata: dict[str, object] | None = None
539+
sample_input: dict[str, object] | None = None
536540

537541

538542
_SyncTracingWrapper = Callable[[object | None, ActionRunContext], ActionResponse[Any]]

py/packages/genkit/src/genkit/core/reflection.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ def _build_actions_payload(
122122
'inputSchema': getattr(meta, 'input_json_schema', None),
123123
'outputSchema': getattr(meta, 'output_json_schema', None),
124124
'metadata': getattr(meta, 'metadata', None),
125+
'sampleInput': getattr(meta, 'sample_input', None),
125126
}
126127

127128
if key not in actions:

py/plugins/dev-local-vectorstore/src/genkit/plugins/dev_local_vectorstore/plugin_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def define_dev_local_vector_store(
4646
Args:
4747
ai: The Genkit instance to register the retriever and indexer with.
4848
name: Name of the retriever and indexer.
49-
embedder: The embedder to use (e.g., 'vertexai/text-embedding-004').
49+
embedder: The embedder to use (e.g., 'vertexai/gemini-embedding-001').
5050
embedder_options: Optional configuration to pass to the embedder.
5151
5252
Returns:

py/plugins/firebase/src/genkit/plugins/firebase/firestore.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def define_firestore_vector_store(
6363
Args:
6464
ai: The Genkit instance to register the retriever with.
6565
name: Name of the retriever.
66-
embedder: The embedder to use (e.g., 'vertexai/text-embedding-004').
66+
embedder: The embedder to use (e.g., 'vertexai/gemini-embedding-001').
6767
embedder_options: Optional configuration to pass to the embedder.
6868
collection: The name of the Firestore collection to query.
6969
vector_field: The name of the field containing the vector embeddings.

py/plugins/google-genai/src/genkit/plugins/google_genai/google.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ class GoogleAI(Plugin):
283283
+------------------+-------------------+--------------------------------+
284284
| Gemini/Gemma | MODEL | googleai/gemini-2.0-flash-001 |
285285
| Imagen | MODEL | googleai/imagen-3.0-generate |
286-
| Embedders | EMBEDDER | googleai/text-embedding-004 |
286+
| Embedders | EMBEDDER | googleai/gemini-embedding-001 |
287287
| Veo (video) | BACKGROUND_MODEL | googleai/veo-2.0-generate-001 |
288288
+------------------+-------------------+--------------------------------+
289289

py/plugins/google-genai/test/google_plugin_test.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ async def test_googleai_initialize(mock_client_cls: MagicMock) -> None:
137137
m1.description = ' Gemini Pro '
138138

139139
m2 = MagicMock()
140-
m2.name = 'models/text-embedding-004'
140+
m2.name = 'models/gemini-embedding-001'
141141
m2.supported_actions = ['embedContent']
142142
m2.description = ' Embedding '
143143

@@ -264,7 +264,7 @@ class MockModel:
264264

265265
models_return_value = [
266266
MockModel(supported_actions=['generateContent'], name='models/gemini-pro'),
267-
MockModel(supported_actions=['embedContent'], name='models/text-embedding-004'),
267+
MockModel(supported_actions=['embedContent'], name='models/gemini-embedding-001'),
268268
MockModel(supported_actions=['generateContent'], name='models/gemini-2.0-flash-tts'), # TTS
269269
]
270270

@@ -279,7 +279,7 @@ class MockModel:
279279
assert action1 is not None
280280

281281
# Check Embedder
282-
action2 = next(a for a in result if a.name == googleai_name('text-embedding-004'))
282+
action2 = next(a for a in result if a.name == googleai_name('gemini-embedding-001'))
283283
assert action2 is not None
284284
assert action2.kind == ActionKind.EMBEDDER
285285

@@ -490,7 +490,7 @@ async def test_vertexai_initialize(vertexai_plugin_instance: VertexAI) -> None:
490490
m1.supported_actions = ['generateContent']
491491

492492
m2 = MagicMock()
493-
m2.name = 'publishers/google/models/text-embedding-004'
493+
m2.name = 'publishers/google/models/gemini-embedding-001'
494494
m2.supported_actions = ['embedContent']
495495

496496
plugin._client.models.list.return_value = [m1, m2] # type: ignore
@@ -661,7 +661,7 @@ class MockModel:
661661

662662
[
663663
MockModel(name='publishers/google/models/gemini-1.5-flash'),
664-
MockModel(name='publishers/google/models/text-embedding-004'),
664+
MockModel(name='publishers/google/models/gemini-embedding-001'),
665665
MockModel(name='publishers/google/models/imagen-3.0-generate-001'),
666666
MockModel(name='publishers/google/models/veo-2.0-generate-001'),
667667
]
@@ -674,7 +674,7 @@ class MockModel:
674674
m1.description = 'Gemini model'
675675

676676
m2 = MagicMock()
677-
m2.name = 'publishers/google/models/text-embedding-004'
677+
m2.name = 'publishers/google/models/gemini-embedding-001'
678678
m2.supported_actions = ['embedContent']
679679
m2.description = 'Embedder'
680680

@@ -698,7 +698,7 @@ class MockModel:
698698
assert action1 is not None
699699

700700
# Verify Embedder
701-
action2 = next(a for a in result if a.name == vertexai_name('text-embedding-004'))
701+
action2 = next(a for a in result if a.name == vertexai_name('gemini-embedding-001'))
702702
assert action2 is not None
703703

704704
# Verify Imagen

0 commit comments

Comments
 (0)