Fix Embedder Schemas & Migrate Default Embedding Model

MengqinShen-GL · MengqinShen-GL · commit e1c6856b8f06 · 2026-02-10T15:17:37.000-08:00
- Fixed missing output schema in Dev UI for embedder actions by explicitly setting input_schema and output_schema on Action objects.
- Added default sampleInput to embedder actions for easier testing.
- Resolved a deadlock in registry.py affecting dotprompt schema resolution (switched to async).
- Migrated default embedding model references from text-embedding-004 to gemini-embedding-001 across plugins, samples, and tests due to model deprecation/availability.
- Fixed a redundant cast lint warning in _aio.py.
diff --git a/py/engdoc/parity-analysis/plugin_api_consistency.md b/py/engdoc/parity-analysis/plugin_api_consistency.md
@@ -237,7 +237,7 @@ model='ollama/llama3'
 
 **JS Pattern:**
 ```typescript
-const embedder = googleAI.embedder('text-embedding-004');
+const embedder = googleAI.embedder('gemini-embedding-001');
 ```
 
 **Python:** ❌ No equivalent
diff --git a/py/packages/genkit/src/genkit/ai/_aio.py b/py/packages/genkit/src/genkit/ai/_aio.py
@@ -619,7 +619,7 @@ async def embed(
         {version, ...config, ...options} (options take precedence).
 
         Args:
-            embedder: Embedder name (e.g., 'googleai/text-embedding-004') or
+            embedder: Embedder name (e.g., 'googleai/gemini-embedding-001') or
                 an EmbedderRef with configuration.
             content: A single string, Document, or DocumentData to embed.
             metadata: Optional metadata to apply to the document. Only used
@@ -634,25 +634,25 @@ async def embed(
             ValueError: If content is not specified.
 
         Example - Basic string embedding:
-            >>> embeddings = await ai.embed(embedder='googleai/text-embedding-004', content='Hello, world!')
+            >>> embeddings = await ai.embed(embedder='googleai/gemini-embedding-001', content='Hello, world!')
             >>> print(len(embeddings[0].embedding))  # Vector dimensions
 
         Example - With metadata:
             >>> embeddings = await ai.embed(
-            ...     embedder='googleai/text-embedding-004',
+            ...     embedder='googleai/gemini-embedding-001',
             ...     content='Product description',
             ...     metadata={'category': 'electronics'},
             ... )
 
         Example - With embedder options:
             >>> embeddings = await ai.embed(
-            ...     embedder='googleai/text-embedding-004',
+            ...     embedder='googleai/gemini-embedding-001',
             ...     content='Search query',
             ...     options={'task_type': 'RETRIEVAL_QUERY'},
             ... )
 
         Example - Using EmbedderRef:
-            >>> ref = create_embedder_ref('googleai/text-embedding-004', config={'task_type': 'CLUSTERING'})
+            >>> ref = create_embedder_ref('googleai/gemini-embedding-001', config={'task_type': 'CLUSTERING'})
             >>> embeddings = await ai.embed(embedder=ref, content='Text')
         """
         embedder_name = self._resolve_embedder_name(embedder)
@@ -706,7 +706,7 @@ async def embed_many(
         and passes options directly. This matches the JS canonical behavior.
 
         Args:
-            embedder: Embedder name (e.g., 'googleai/text-embedding-004') or
+            embedder: Embedder name (e.g., 'googleai/gemini-embedding-001') or
                 an EmbedderRef.
             content: List of strings, Documents, or DocumentData to embed.
             metadata: Optional metadata to apply to all items. Only used when
@@ -722,22 +722,22 @@ async def embed_many(
 
         Example - Basic batch embedding:
             >>> embeddings = await ai.embed_many(
-            ...     embedder='googleai/text-embedding-004',
+            ...     embedder='googleai/gemini-embedding-001',
             ...     content=['Doc 1', 'Doc 2', 'Doc 3'],
             ... )
             >>> for i, emb in enumerate(embeddings):
             ...     print(f'Doc {i}: {len(emb.embedding)} dims')
 
         Example - With shared metadata:
             >>> embeddings = await ai.embed_many(
-            ...     embedder='googleai/text-embedding-004',
+            ...     embedder='googleai/gemini-embedding-001',
             ...     content=['text1', 'text2'],
             ...     metadata={'batch_id': 'batch-001'},
             ... )
 
         Example - With options (EmbedderRef config is NOT extracted):
             >>> embeddings = await ai.embed_many(
-            ...     embedder='googleai/text-embedding-004',
+            ...     embedder='googleai/gemini-embedding-001',
             ...     content=documents,
             ...     options={'task_type': 'RETRIEVAL_DOCUMENT'},
             ... )
diff --git a/py/packages/genkit/src/genkit/blocks/embedding.py b/py/packages/genkit/src/genkit/blocks/embedding.py
@@ -31,7 +31,7 @@
     |                   | vectors. Contains 'embedding' field and metadata.    |
     +-------------------+------------------------------------------------------+
     | Embedder          | A model/service that converts text to embeddings.    |
-    |                   | Examples: 'googleai/text-embedding-004'. Registered  |
+    |                   | Examples: 'googleai/gemini-embedding-001'. Registered  |
     |                   | as actions, invoked via embed() and embed_many().    |
     +-------------------+------------------------------------------------------+
     | EmbedderRef       | Reference bundling embedder name with optional       |
@@ -68,14 +68,14 @@
     - ai.embed_many(): Embed multiple pieces of content in batch
 
 Example - Single embedding:
-    >>> embeddings = await ai.embed(embedder='googleai/text-embedding-004', content='Hello, world!')
+    >>> embeddings = await ai.embed(embedder='googleai/gemini-embedding-001', content='Hello, world!')
     >>> vector = embeddings[0].embedding
 
 Example - Batch embedding:
-    >>> embeddings = await ai.embed_many(embedder='googleai/text-embedding-004', content=['Doc 1', 'Doc 2', 'Doc 3'])
+    >>> embeddings = await ai.embed_many(embedder='googleai/gemini-embedding-001', content=['Doc 1', 'Doc 2', 'Doc 3'])
 
 Example - Using EmbedderRef with configuration:
-    >>> ref = create_embedder_ref('googleai/text-embedding-004', config={'task_type': 'CLUSTERING'}, version='v1')
+    >>> ref = create_embedder_ref('googleai/gemini-embedding-001', config={'task_type': 'CLUSTERING'}, version='v1')
     >>> embeddings = await ai.embed(embedder=ref, content='My text')
 
 Note on embed() vs embed_many():
@@ -195,12 +195,16 @@ def embedder_action_metadata(
 
     embedder_info['customOptions'] = options.config_schema if options.config_schema else None
 
+    # Default sample input for easier testing in Dev UI
+    sample_input: dict[str, object] = {'input': [{'content': [{'text': 'Hello, world!'}]}]}
+
     return ActionMetadata(
         kind=cast(ActionKind, ActionKind.EMBEDDER),
         name=name,
         input_json_schema=to_json_schema(EmbedRequest),
         output_json_schema=to_json_schema(EmbedResponse),
         metadata=embedder_metadata_dict,
+        sample_input=sample_input,
     )
 
 
diff --git a/py/packages/genkit/src/genkit/blocks/prompt.py b/py/packages/genkit/src/genkit/blocks/prompt.py
@@ -851,9 +851,11 @@ class RecipeInput(BaseModel):
         middleware = effective_opts.get('use') or self._use
         context = effective_opts.get('context')
 
+        rendered_options = await self.render(input=input, opts=effective_opts)
+
         result = await generate_action(
             self._registry,
-            await self.render(input=input, opts=effective_opts),
+            rendered_options,
             on_chunk=on_chunk,
             middleware=middleware,
             context=context if context else ActionRunContext._current_context(),  # pyright: ignore[reportPrivateUsage]
diff --git a/py/packages/genkit/src/genkit/core/action/_action.py b/py/packages/genkit/src/genkit/core/action/_action.py
@@ -92,7 +92,8 @@
 from functools import cached_property
 from typing import Any, Generic, Protocol, cast, get_type_hints
 
-from pydantic import BaseModel, TypeAdapter, ValidationError
+from pydantic import BaseModel, ConfigDict, TypeAdapter, ValidationError
+from pydantic.alias_generators import to_camel
 from typing_extensions import Never, TypeVar
 
 from genkit.aio import Channel, ensure_async
@@ -524,6 +525,8 @@ def _initialize_io_schemas(
 class ActionMetadata(BaseModel):
     """Metadata for actions."""
 
+    model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
+
     kind: ActionKind
     name: str
     description: str | None = None
@@ -533,6 +536,7 @@ class ActionMetadata(BaseModel):
     output_json_schema: dict[str, object] | None = None
     stream_schema: object | None = None
     metadata: dict[str, object] | None = None
+    sample_input: dict[str, object] | None = None
 
 
 _SyncTracingWrapper = Callable[[object | None, ActionRunContext], ActionResponse[Any]]
diff --git a/py/packages/genkit/src/genkit/core/reflection.py b/py/packages/genkit/src/genkit/core/reflection.py
@@ -122,6 +122,7 @@ def _build_actions_payload(
             'inputSchema': getattr(meta, 'input_json_schema', None),
             'outputSchema': getattr(meta, 'output_json_schema', None),
             'metadata': getattr(meta, 'metadata', None),
+            'sampleInput': getattr(meta, 'sample_input', None),
         }
 
         if key not in actions:
diff --git a/py/plugins/dev-local-vectorstore/src/genkit/plugins/dev_local_vectorstore/plugin_api.py b/py/plugins/dev-local-vectorstore/src/genkit/plugins/dev_local_vectorstore/plugin_api.py
@@ -46,7 +46,7 @@ def define_dev_local_vector_store(
     Args:
         ai: The Genkit instance to register the retriever and indexer with.
         name: Name of the retriever and indexer.
-        embedder: The embedder to use (e.g., 'vertexai/text-embedding-004').
+        embedder: The embedder to use (e.g., 'vertexai/gemini-embedding-001').
         embedder_options: Optional configuration to pass to the embedder.
 
     Returns:
diff --git a/py/plugins/firebase/src/genkit/plugins/firebase/firestore.py b/py/plugins/firebase/src/genkit/plugins/firebase/firestore.py
@@ -63,7 +63,7 @@ def define_firestore_vector_store(
     Args:
         ai: The Genkit instance to register the retriever with.
         name: Name of the retriever.
-        embedder: The embedder to use (e.g., 'vertexai/text-embedding-004').
+        embedder: The embedder to use (e.g., 'vertexai/gemini-embedding-001').
         embedder_options: Optional configuration to pass to the embedder.
         collection: The name of the Firestore collection to query.
         vector_field: The name of the field containing the vector embeddings.
diff --git a/py/plugins/google-genai/src/genkit/plugins/google_genai/google.py b/py/plugins/google-genai/src/genkit/plugins/google_genai/google.py
@@ -283,7 +283,7 @@ class GoogleAI(Plugin):
         +------------------+-------------------+--------------------------------+
         | Gemini/Gemma     | MODEL             | googleai/gemini-2.0-flash-001  |
         | Imagen           | MODEL             | googleai/imagen-3.0-generate   |
-        | Embedders        | EMBEDDER          | googleai/text-embedding-004    |
+        | Embedders        | EMBEDDER          | googleai/gemini-embedding-001  |
         | Veo (video)      | BACKGROUND_MODEL  | googleai/veo-2.0-generate-001  |
         +------------------+-------------------+--------------------------------+
 
diff --git a/py/plugins/google-genai/test/google_plugin_test.py b/py/plugins/google-genai/test/google_plugin_test.py
@@ -137,7 +137,7 @@ async def test_googleai_initialize(mock_client_cls: MagicMock) -> None:
     m1.description = ' Gemini Pro '
 
     m2 = MagicMock()
-    m2.name = 'models/text-embedding-004'
+    m2.name = 'models/gemini-embedding-001'
     m2.supported_actions = ['embedContent']
     m2.description = ' Embedding '
 
@@ -264,7 +264,7 @@ class MockModel:
 
     models_return_value = [
         MockModel(supported_actions=['generateContent'], name='models/gemini-pro'),
-        MockModel(supported_actions=['embedContent'], name='models/text-embedding-004'),
+        MockModel(supported_actions=['embedContent'], name='models/gemini-embedding-001'),
         MockModel(supported_actions=['generateContent'], name='models/gemini-2.0-flash-tts'),  # TTS
     ]
 
@@ -279,7 +279,7 @@ class MockModel:
     assert action1 is not None
 
     # Check Embedder
-    action2 = next(a for a in result if a.name == googleai_name('text-embedding-004'))
+    action2 = next(a for a in result if a.name == googleai_name('gemini-embedding-001'))
     assert action2 is not None
     assert action2.kind == ActionKind.EMBEDDER
 
@@ -490,7 +490,7 @@ async def test_vertexai_initialize(vertexai_plugin_instance: VertexAI) -> None:
     m1.supported_actions = ['generateContent']
 
     m2 = MagicMock()
-    m2.name = 'publishers/google/models/text-embedding-004'
+    m2.name = 'publishers/google/models/gemini-embedding-001'
     m2.supported_actions = ['embedContent']
 
     plugin._client.models.list.return_value = [m1, m2]  # type: ignore
@@ -661,7 +661,7 @@ class MockModel:
 
     [
         MockModel(name='publishers/google/models/gemini-1.5-flash'),
-        MockModel(name='publishers/google/models/text-embedding-004'),
+        MockModel(name='publishers/google/models/gemini-embedding-001'),
         MockModel(name='publishers/google/models/imagen-3.0-generate-001'),
         MockModel(name='publishers/google/models/veo-2.0-generate-001'),
     ]
@@ -674,7 +674,7 @@ class MockModel:
     m1.description = 'Gemini model'
 
     m2 = MagicMock()
-    m2.name = 'publishers/google/models/text-embedding-004'
+    m2.name = 'publishers/google/models/gemini-embedding-001'
     m2.supported_actions = ['embedContent']
     m2.description = 'Embedder'
 
@@ -698,7 +698,7 @@ class MockModel:
     assert action1 is not None
 
     # Verify Embedder
-    action2 = next(a for a in result if a.name == vertexai_name('text-embedding-004'))
+    action2 = next(a for a in result if a.name == vertexai_name('gemini-embedding-001'))
     assert action2 is not None
 
     # Verify Imagen
diff --git a/py/plugins/google-genai/tests/google_genai_plugin_test.py b/py/plugins/google-genai/tests/google_genai_plugin_test.py
@@ -44,7 +44,7 @@
 def test_googleai_name() -> None:
     """Test googleai_name helper function."""
     assert googleai_name('gemini-2.0-flash') == 'googleai/gemini-2.0-flash'
-    assert googleai_name('text-embedding-004') == 'googleai/text-embedding-004'
+    assert googleai_name('gemini-embedding-001') == 'googleai/gemini-embedding-001'
 
 
 def test_vertexai_name() -> None:
@@ -181,11 +181,11 @@ async def test_googleai_resolve_embedder(mock_list_models: MagicMock, mock_clien
     mock_list_models.return_value = GenaiModels()
 
     plugin = GoogleAI(api_key='test-key')
-    action = await plugin.resolve(ActionKind.EMBEDDER, 'googleai/text-embedding-004')
+    action = await plugin.resolve(ActionKind.EMBEDDER, 'googleai/gemini-embedding-001')
 
     assert action is not None
     assert action.kind == ActionKind.EMBEDDER
-    assert action.name == 'googleai/text-embedding-004'
+    assert action.name == 'googleai/gemini-embedding-001'
 
 
 @patch('genkit.plugins.google_genai.google.genai.client.Client')
diff --git a/py/plugins/google-genai/tests/rerankers_test.py b/py/plugins/google-genai/tests/rerankers_test.py
@@ -63,7 +63,7 @@ def test_is_reranker_model_name_valid() -> None:
 def test_is_reranker_model_name_invalid() -> None:
     """Test is_reranker_model_name returns False for invalid names."""
     assert is_reranker_model_name('gemini-2.0-flash') is False
-    assert is_reranker_model_name('text-embedding-004') is False
+    assert is_reranker_model_name('gemini-embedding-001') is False
     assert is_reranker_model_name(None) is False
     assert is_reranker_model_name('') is False
 
diff --git a/py/plugins/vertex-ai/src/genkit/plugins/vertex_ai/vector_search.py b/py/plugins/vertex-ai/src/genkit/plugins/vertex_ai/vector_search.py
@@ -416,7 +416,7 @@ def define_vertex_vector_search_big_query(
     Args:
         ai: The Genkit instance to register the retriever with.
         name: Name of the retriever.
-        embedder: The embedder to use (e.g., 'vertexai/text-embedding-004').
+        embedder: The embedder to use (e.g., 'vertexai/gemini-embedding-001').
         embedder_options: Optional configuration to pass to the embedder.
         bq_client: The BigQuery client to use for querying.
         dataset_id: The ID of the BigQuery dataset.
@@ -472,7 +472,7 @@ def define_vertex_vector_search_firestore(
     Args:
         ai: The Genkit instance to register the retriever with.
         name: Name of the retriever.
-        embedder: The embedder to use (e.g., 'vertexai/text-embedding-004').
+        embedder: The embedder to use (e.g., 'vertexai/gemini-embedding-001').
         embedder_options: Optional configuration to pass to the embedder.
         firestore_client: The Firestore client to use for querying.
         collection_name: The name of the Firestore collection.
diff --git a/py/samples/dev-local-vectorstore-hello/src/main.py b/py/samples/dev-local-vectorstore-hello/src/main.py
@@ -116,7 +116,7 @@
 define_dev_local_vector_store(
     ai,
     name='films',
-    embedder='vertexai/text-embedding-004',
+    embedder='vertexai/gemini-embedding-001',
 )
 
 films = [
diff --git a/py/samples/framework-evaluator-demo/evaluator_demo/pdf_rag.py b/py/samples/framework-evaluator-demo/evaluator_demo/pdf_rag.py
@@ -141,7 +141,7 @@ def chunk_text(text: str, chunk_size: int, overlap: int) -> list[str]:
 # Define a flow to index documents into the "vector store"
 # genkit flow:run indexPdf '"./docs/sfspca-cat-adoption-handbook-2023.pdf"'
 @ai.flow(name='index_pdf')
-async def index_pdf(file_path: str = 'samples/evaluator-demo/docs/cat-wiki.pdf') -> None:
+async def index_pdf(file_path: str = 'samples/framework-evaluator-demo/docs/cat-wiki.pdf') -> None:
     """Index a PDF file.
 
     Args:
@@ -151,7 +151,7 @@ async def index_pdf(file_path: str = 'samples/evaluator-demo/docs/cat-wiki.pdf')
         >>> await index_pdf('doc.pdf')
     """
     if not file_path:
-        file_path = 'samples/evaluator-demo/docs/cat-wiki.pdf'
+        file_path = 'samples/framework-evaluator-demo/docs/cat-wiki.pdf'
     resolved_path = pathlib.Path(file_path).resolve()
 
     # Extract text from PDF
diff --git a/py/samples/provider-firestore-retriever/src/main.py b/py/samples/provider-firestore-retriever/src/main.py
@@ -104,7 +104,7 @@
     os.environ['GCLOUD_PROJECT'] = input('Please enter your GCLOUD_PROJECT: ')
 
 # Important: use the same embedding model for indexing and retrieval.
-EMBEDDING_MODEL = 'vertexai/text-embedding-004'
+EMBEDDING_MODEL = 'vertexai/gemini-embedding-001'
 
 # Add Firebase telemetry (metrics, logs, traces)
 add_firebase_telemetry()
diff --git a/py/samples/provider-google-genai-vertexai-hello/src/main.py b/py/samples/provider-google-genai-vertexai-hello/src/main.py
@@ -236,7 +236,7 @@ async def embed_docs(docs: list[str] | None = None) -> list[Embedding]:
         docs = ['Hello world', 'Genkit is great', 'Embeddings are fun']
     options = {'task_type': EmbeddingTaskType.CLUSTERING}
     return await ai.embed_many(
-        embedder='vertexai/text-embedding-004',
+        embedder='vertexai/gemini-embedding-001',
         content=docs,
         options=options,
     )
diff --git a/py/samples/provider-vertex-ai-vector-search-bigquery/src/main.py b/py/samples/provider-vertex-ai-vector-search-bigquery/src/main.py
@@ -162,7 +162,7 @@
 define_vertex_vector_search_big_query(
     ai,
     name='my-vector-search',
-    embedder='vertexai/text-embedding-004',
+    embedder='vertexai/gemini-embedding-001',
     embedder_options={
         'task': 'RETRIEVAL_DOCUMENT',
         'output_dimensionality': 128,
diff --git a/py/samples/provider-vertex-ai-vector-search-bigquery/src/setup_env.py b/py/samples/provider-vertex-ai-vector-search-bigquery/src/setup_env.py
@@ -32,7 +32,7 @@
 # Environment Variables
 LOCATION = os.environ['LOCATION']
 PROJECT_ID = os.environ['PROJECT_ID']
-EMBEDDING_MODEL = 'text-embedding-004'
+EMBEDDING_MODEL = 'gemini-embedding-001'
 
 BIGQUERY_DATASET_NAME = os.environ['BIGQUERY_DATASET_NAME']
 BIGQUERY_TABLE_NAME = os.environ['BIGQUERY_TABLE_NAME']
diff --git a/py/samples/provider-vertex-ai-vector-search-firestore/src/main.py b/py/samples/provider-vertex-ai-vector-search-firestore/src/main.py
@@ -161,7 +161,7 @@
 define_vertex_vector_search_firestore(
     ai,
     name='my-vector-search',
-    embedder='vertexai/text-embedding-004',
+    embedder='vertexai/gemini-embedding-001',
     embedder_options={
         'task': 'RETRIEVAL_DOCUMENT',
         'output_dimensionality': 128,
diff --git a/py/samples/web-short-n-long/src/main.py b/py/samples/web-short-n-long/src/main.py
@@ -352,7 +352,7 @@ async def embed_docs(docs: list[str] | None = None) -> list[Embedding]:
         docs = ['Hello world', 'Genkit is great', 'Embeddings are fun']
     options = {'task_type': EmbeddingTaskType.CLUSTERING}
     return await ai.embed_many(
-        embedder=f'googleai/{GeminiEmbeddingModels.TEXT_EMBEDDING_004}',
+        embedder=f'googleai/{GeminiEmbeddingModels.GEMINI_EMBEDDING_001}',
         content=docs,
         options=options,
     )

Original file line number	Diff line number	Diff line change
`@@ -122,6 +122,7 @@ def _build_actions_payload(`
`122`	`122`	`'inputSchema': getattr(meta, 'input_json_schema', None),`
`123`	`123`	`'outputSchema': getattr(meta, 'output_json_schema', None),`
`124`	`124`	`'metadata': getattr(meta, 'metadata', None),`
	`125`	`+ 'sampleInput': getattr(meta, 'sample_input', None),`
`125`	`126`	`}`
`126`	`127`
`127`	`128`	`if key not in actions:`