diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml index 30a8063ea9..c13ed6cbeb 100644 --- a/.github/workflows/integration-auth-tests.yml +++ b/.github/workflows/integration-auth-tests.yml @@ -91,6 +91,9 @@ jobs: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default server: port: 8321 EOF diff --git a/benchmarking/k8s-benchmark/stack-configmap.yaml b/benchmarking/k8s-benchmark/stack-configmap.yaml index e1ca170f57..11f0cd5af8 100644 --- a/benchmarking/k8s-benchmark/stack-configmap.yaml +++ b/benchmarking/k8s-benchmark/stack-configmap.yaml @@ -115,13 +115,21 @@ data: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} - references: + stores: metadata: backend: kv_default namespace: registry inference: backend: sql_default table_name: inference_store + max_write_queue_size: 10000 + num_writers: 4 + conversations: + backend: sql_default + table_name: openai_conversations + prompts: + backend: kv_default + namespace: prompts models: - metadata: embedding_dimension: 768 diff --git a/benchmarking/k8s-benchmark/stack_run_config.yaml b/benchmarking/k8s-benchmark/stack_run_config.yaml index 06a481f432..b201f4f19b 100644 --- a/benchmarking/k8s-benchmark/stack_run_config.yaml +++ b/benchmarking/k8s-benchmark/stack_run_config.yaml @@ -115,6 +115,9 @@ storage: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default registered_resources: models: - metadata: diff --git a/docs/docs/distributions/configuration.mdx b/docs/docs/distributions/configuration.mdx index bf31568653..041567440f 100644 --- a/docs/docs/distributions/configuration.mdx +++ b/docs/docs/distributions/configuration.mdx @@ -63,13 +63,21 @@ storage: sql_default: type: sql_sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db - references: + stores: metadata: backend: kv_default namespace: registry inference: backend: sql_default table_name: inference_store + max_write_queue_size: 10000 + num_writers: 4 + conversations: + backend: sql_default + table_name: openai_conversations + prompts: + backend: kv_default + namespace: prompts models: - metadata: {} model_id: ${env.INFERENCE_MODEL} diff --git a/docs/docs/distributions/k8s/stack-configmap.yaml b/docs/docs/distributions/k8s/stack-configmap.yaml index c71ab05d8b..255e39ac20 100644 --- a/docs/docs/distributions/k8s/stack-configmap.yaml +++ b/docs/docs/distributions/k8s/stack-configmap.yaml @@ -113,13 +113,21 @@ data: db: ${env.POSTGRES_DB:=llamastack} user: ${env.POSTGRES_USER:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack} - references: + stores: metadata: backend: kv_default namespace: registry inference: backend: sql_default table_name: inference_store + max_write_queue_size: 10000 + num_writers: 4 + conversations: + backend: sql_default + table_name: openai_conversations + prompts: + backend: kv_default + namespace: prompts models: - metadata: embedding_dimension: 768 diff --git a/docs/docs/distributions/k8s/stack_run_config.yaml b/docs/docs/distributions/k8s/stack_run_config.yaml index 3c74fd436a..0f317cd3b4 100644 --- a/docs/docs/distributions/k8s/stack_run_config.yaml +++ b/docs/docs/distributions/k8s/stack_run_config.yaml @@ -113,6 +113,9 @@ storage: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default registered_resources: models: - metadata: diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index 5f4775d878..96a94e38d8 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -582,6 +582,7 @@ def _ensure_backend(reference, expected_set, store_name: str) -> None: _ensure_backend(stores.inference, sql_backends, "storage.stores.inference") _ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations") _ensure_backend(stores.responses, sql_backends, "storage.stores.responses") + _ensure_backend(stores.prompts, kv_backends, "storage.stores.prompts") return self diff --git a/llama_stack/core/prompts/prompts.py b/llama_stack/core/prompts/prompts.py index 856397ca52..1e48bcc8cd 100644 --- a/llama_stack/core/prompts/prompts.py +++ b/llama_stack/core/prompts/prompts.py @@ -11,7 +11,6 @@ from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts from llama_stack.core.datatypes import StackRunConfig -from llama_stack.core.storage.datatypes import KVStoreReference from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl @@ -40,11 +39,10 @@ def __init__(self, config: PromptServiceConfig, deps: dict[Any, Any]): self.kvstore: KVStore async def initialize(self) -> None: - # Use metadata store backend with prompts-specific namespace - metadata_ref = self.config.run_config.storage.stores.metadata - if not metadata_ref: - raise ValueError("storage.stores.metadata must be configured in run config") - prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend) + # Use prompts store reference from run config + prompts_ref = self.config.run_config.storage.stores.prompts + if not prompts_ref: + raise ValueError("storage.stores.prompts must be configured in run config") self.kvstore = await kvstore_impl(prompts_ref) def _get_default_key(self, prompt_id: str) -> str: diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py index 4cf1d072dc..2d60bbeeec 100644 --- a/llama_stack/core/stack.py +++ b/llama_stack/core/stack.py @@ -540,6 +540,7 @@ def run_config_from_adhoc_config_spec( metadata=KVStoreReference(backend="kv_default", namespace="registry"), inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"), conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"), + prompts=KVStoreReference(backend="kv_default", namespace="prompts"), ), ), ) diff --git a/llama_stack/core/storage/datatypes.py b/llama_stack/core/storage/datatypes.py index 9df170e100..4b17b9ea92 100644 --- a/llama_stack/core/storage/datatypes.py +++ b/llama_stack/core/storage/datatypes.py @@ -271,6 +271,10 @@ class ServerStoresConfig(BaseModel): default=None, description="Responses store configuration (uses SQL backend)", ) + prompts: KVStoreReference | None = Field( + default=None, + description="Prompts store configuration (uses KV backend)", + ) class StorageConfig(BaseModel): diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml index ecf9eed3bb..ebb4de5f1b 100644 --- a/llama_stack/distributions/ci-tests/run.yaml +++ b/llama_stack/distributions/ci-tests/run.yaml @@ -247,6 +247,9 @@ storage: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default registered_resources: models: [] shields: diff --git a/llama_stack/distributions/dell/run-with-safety.yaml b/llama_stack/distributions/dell/run-with-safety.yaml index 2563f2f4b2..e0da8060d8 100644 --- a/llama_stack/distributions/dell/run-with-safety.yaml +++ b/llama_stack/distributions/dell/run-with-safety.yaml @@ -109,6 +109,9 @@ storage: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default registered_resources: models: - metadata: {} diff --git a/llama_stack/distributions/dell/run.yaml b/llama_stack/distributions/dell/run.yaml index 7bada394fc..bc3117d88f 100644 --- a/llama_stack/distributions/dell/run.yaml +++ b/llama_stack/distributions/dell/run.yaml @@ -105,6 +105,9 @@ storage: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default registered_resources: models: - metadata: {} diff --git a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml index 01b5db4f97..2fa9d198b2 100644 --- a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml @@ -122,6 +122,9 @@ storage: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default registered_resources: models: - metadata: {} diff --git a/llama_stack/distributions/meta-reference-gpu/run.yaml b/llama_stack/distributions/meta-reference-gpu/run.yaml index 87c33dde05..5c7f75ca8e 100644 --- a/llama_stack/distributions/meta-reference-gpu/run.yaml +++ b/llama_stack/distributions/meta-reference-gpu/run.yaml @@ -112,6 +112,9 @@ storage: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default registered_resources: models: - metadata: {} diff --git a/llama_stack/distributions/nvidia/run-with-safety.yaml b/llama_stack/distributions/nvidia/run-with-safety.yaml index c23d0f9cb2..1d57ad17ae 100644 --- a/llama_stack/distributions/nvidia/run-with-safety.yaml +++ b/llama_stack/distributions/nvidia/run-with-safety.yaml @@ -111,6 +111,9 @@ storage: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default registered_resources: models: - metadata: {} diff --git a/llama_stack/distributions/nvidia/run.yaml b/llama_stack/distributions/nvidia/run.yaml index 81e744d537..8c50b8bfb5 100644 --- a/llama_stack/distributions/nvidia/run.yaml +++ b/llama_stack/distributions/nvidia/run.yaml @@ -100,6 +100,9 @@ storage: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default registered_resources: models: [] shields: [] diff --git a/llama_stack/distributions/open-benchmark/run.yaml b/llama_stack/distributions/open-benchmark/run.yaml index 4fd0e199bb..912e48dd38 100644 --- a/llama_stack/distributions/open-benchmark/run.yaml +++ b/llama_stack/distributions/open-benchmark/run.yaml @@ -142,6 +142,9 @@ storage: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default registered_resources: models: - metadata: {} diff --git a/llama_stack/distributions/postgres-demo/run.yaml b/llama_stack/distributions/postgres-demo/run.yaml index 0d7ecff48d..dd1c2bc7fd 100644 --- a/llama_stack/distributions/postgres-demo/run.yaml +++ b/llama_stack/distributions/postgres-demo/run.yaml @@ -87,6 +87,9 @@ storage: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default registered_resources: models: - metadata: {} diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml index 92483c78e6..30cb5ff90a 100644 --- a/llama_stack/distributions/starter-gpu/run.yaml +++ b/llama_stack/distributions/starter-gpu/run.yaml @@ -250,6 +250,9 @@ storage: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default registered_resources: models: [] shields: diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml index 3b9d8f8907..98a85f0613 100644 --- a/llama_stack/distributions/starter/run.yaml +++ b/llama_stack/distributions/starter/run.yaml @@ -247,6 +247,9 @@ storage: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default registered_resources: models: [] shields: diff --git a/llama_stack/distributions/template.py b/llama_stack/distributions/template.py index 64f21e6265..b27e807d0b 100644 --- a/llama_stack/distributions/template.py +++ b/llama_stack/distributions/template.py @@ -257,6 +257,10 @@ def run_config( backend="sql_default", table_name="openai_conversations", ).model_dump(exclude_none=True), + "prompts": KVStoreReference( + backend="kv_default", + namespace="prompts", + ).model_dump(exclude_none=True), } storage_config = dict( diff --git a/llama_stack/distributions/watsonx/run.yaml b/llama_stack/distributions/watsonx/run.yaml index ca3c8402dc..8456115d26 100644 --- a/llama_stack/distributions/watsonx/run.yaml +++ b/llama_stack/distributions/watsonx/run.yaml @@ -115,6 +115,9 @@ storage: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default registered_resources: models: [] shields: [] diff --git a/tests/external/run-byoa.yaml b/tests/external/run-byoa.yaml index 4d63046c6b..62d6b18252 100644 --- a/tests/external/run-byoa.yaml +++ b/tests/external/run-byoa.yaml @@ -25,6 +25,9 @@ storage: conversations: table_name: openai_conversations backend: sql_default + prompts: + namespace: prompts + backend: kv_default external_apis_dir: ~/.llama/apis.d external_providers_dir: ~/.llama/providers.d server: diff --git a/tests/unit/cli/test_stack_config.py b/tests/unit/cli/test_stack_config.py index 7b9f3ca0cc..0977a1e43e 100644 --- a/tests/unit/cli/test_stack_config.py +++ b/tests/unit/cli/test_stack_config.py @@ -44,6 +44,9 @@ def config_with_image_name_int(): responses: backend: sql_default table_name: responses + prompts: + backend: kv_default + namespace: prompts providers: inference: - provider_id: provider1 diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py index 3b0643a139..81fb039387 100644 --- a/tests/unit/distribution/test_distribution.py +++ b/tests/unit/distribution/test_distribution.py @@ -48,6 +48,7 @@ def _default_storage() -> StorageConfig: metadata=KVStoreReference(backend="kv_default", namespace="registry"), inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"), conversations=SqlStoreReference(backend="sql_default", table_name="conversations"), + prompts=KVStoreReference(backend="kv_default", namespace="prompts"), ), ) diff --git a/tests/unit/prompts/prompts/conftest.py b/tests/unit/prompts/prompts/conftest.py index fe30e1a77a..c876f2041b 100644 --- a/tests/unit/prompts/prompts/conftest.py +++ b/tests/unit/prompts/prompts/conftest.py @@ -18,7 +18,7 @@ SqlStoreReference, StorageConfig, ) -from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends +from llama_stack.providers.utils.kvstore import register_kvstore_backends @pytest.fixture @@ -38,6 +38,7 @@ async def temp_prompt_store(tmp_path_factory): metadata=KVStoreReference(backend="kv_test", namespace="registry"), inference=InferenceStoreReference(backend="sql_test", table_name="inference"), conversations=SqlStoreReference(backend="sql_test", table_name="conversations"), + prompts=KVStoreReference(backend="kv_test", namespace="prompts"), ), ) mock_run_config = StackRunConfig( @@ -50,6 +51,6 @@ async def temp_prompt_store(tmp_path_factory): store = PromptServiceImpl(config, deps={}) register_kvstore_backends({"kv_test": storage.backends["kv_test"]}) - store.kvstore = await kvstore_impl(KVStoreReference(backend="kv_test", namespace="prompts")) + await store.initialize() yield store