Skip to content

Commit 9f7d0d2

Browse files
committed
fix(server): add check for vector store document id
Signed-off-by: Radek Ježek <[email protected]>
1 parent 08fbbdf commit 9f7d0d2

File tree

4 files changed

+48
-6
lines changed

4 files changed

+48
-6
lines changed

apps/agentstack-sdk-py/src/agentstack_sdk/platform/vector_store.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import typing
77
import uuid
8-
from typing import Literal
8+
from typing import Literal, Self
99

1010
import pydantic
1111

@@ -29,12 +29,24 @@ class VectorStoreDocument(pydantic.BaseModel):
2929
class VectorStoreItem(pydantic.BaseModel):
3030
id: str = pydantic.Field(default_factory=lambda: uuid.uuid4().hex)
3131
document_id: str
32-
document_type: typing.Literal["platform_file", "external"] = "platform_file"
32+
document_type: typing.Literal["platform_file", "external"] | None = "platform_file"
3333
model_id: str | typing.Literal["platform"] = "platform"
3434
text: str
3535
embedding: list[float]
3636
metadata: Metadata | None = None
3737

38+
@pydantic.model_validator(mode="after")
39+
def validate_document_id(self) -> Self:
40+
"""Validate that document_id is a valid UUID when document_type is platform_file."""
41+
if self.document_type == "platform_file":
42+
try:
43+
_ = uuid.UUID(self.document_id)
44+
except ValueError as ex:
45+
raise ValueError(
46+
f"document_id must be a valid UUID when document_type is platform_file, got: {self.document_id}"
47+
) from ex
48+
return self
49+
3850

3951
class VectorStoreSearchResult(pydantic.BaseModel):
4052
item: VectorStoreItem

apps/agentstack-server/src/agentstack_server/domain/models/vector_store.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
# SPDX-License-Identifier: Apache-2.0
33

44
from enum import StrEnum
5-
from typing import Literal
5+
from typing import Literal, Self
66
from uuid import UUID, uuid4
77

8-
from pydantic import AwareDatetime, BaseModel, Field
8+
from pydantic import AwareDatetime, BaseModel, Field, model_validator
99

1010
from agentstack_server.domain.models.common import Metadata
1111
from agentstack_server.utils.utils import utc_now
@@ -67,6 +67,18 @@ class VectorStoreItem(BaseModel):
6767
embedding: list[float]
6868
metadata: Metadata | None = None
6969

70+
@model_validator(mode="after")
71+
def validate_document_id(self) -> Self:
72+
"""Validate that document_id is a valid UUID when document_type is platform_file."""
73+
if self.document_type == DocumentType.PLATFORM_FILE:
74+
try:
75+
_ = UUID(self.document_id)
76+
except ValueError as ex:
77+
raise ValueError(
78+
f"document_id must be a valid UUID when document_type is platform_file, got: {self.document_id}"
79+
) from ex
80+
return self
81+
7082

7183
class VectorStoreSearchResult(BaseModel):
7284
"""Result of a vector store search operation containing full item data and similarity score."""

apps/agentstack-server/src/agentstack_server/infrastructure/vector_database/vector_db.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,14 @@
1717
String,
1818
Table,
1919
Text,
20+
case,
2021
)
2122
from sqlalchemy.dialects.postgresql import JSONB, insert
2223
from sqlalchemy.dialects.postgresql import UUID as SQL_UUID
2324
from sqlalchemy.ext.asyncio import AsyncConnection
2425

2526
from agentstack_server.domain.models.vector_store import (
27+
DocumentType,
2628
VectorStoreDocumentInfo,
2729
VectorStoreItem,
2830
VectorStoreSearchResult,
@@ -152,6 +154,7 @@ def _to_item(self, row: Row) -> VectorStoreItem:
152154
return VectorStoreItem(
153155
id=row.id,
154156
document_id=row.vector_store_document_id,
157+
document_type=row.document_type or DocumentType.EXTERNAL,
155158
embedding=row.embedding.to_list(),
156159
text=row.text,
157160
metadata=row.metadata,
@@ -177,7 +180,19 @@ async def similarity_search(
177180
# Select all columns plus the distance as a named column
178181
query = (
179182
table.select()
180-
.add_columns(table.c.embedding.cosine_distance(query_vector).label("distance"))
183+
.add_columns(
184+
table.c.embedding.cosine_distance(query_vector).label("distance"),
185+
case(
186+
(vector_store_documents_table.c.file_id.is_not(None), DocumentType.PLATFORM_FILE),
187+
else_=DocumentType.EXTERNAL,
188+
).label("document_type"),
189+
)
190+
.join(
191+
vector_store_documents_table,
192+
(table.c.vector_store_document_id == vector_store_documents_table.c.id)
193+
& (table.c.vector_store_id == vector_store_documents_table.c.vector_store_id),
194+
isouter=True,
195+
)
181196
.where(table.c.vector_store_id == collection_id)
182197
.order_by(table.c.embedding.cosine_distance(query_vector))
183198
.limit(limit)

apps/agentstack-server/tests/integration/vector_database/test_vector_db.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from sqlalchemy import text
99
from sqlalchemy.ext.asyncio import AsyncConnection
1010

11-
from agentstack_server.domain.models.vector_store import VectorStoreItem, VectorStoreSearchResult
11+
from agentstack_server.domain.models.vector_store import DocumentType, VectorStoreItem, VectorStoreSearchResult
1212
from agentstack_server.infrastructure.vector_database.vector_db import VectorDatabaseRepository
1313

1414
pytestmark = pytest.mark.integration
@@ -33,20 +33,23 @@ async def sample_vector_items(test_collection_id: UUID) -> list[VectorStoreItem]
3333
VectorStoreItem(
3434
id=uuid.uuid4(),
3535
document_id="doc_001",
36+
document_type=DocumentType.EXTERNAL,
3637
embedding=[1.0] * 128,
3738
text="The quick brown fox jumps over the lazy dog.",
3839
metadata={"source": "test_doc_1.txt", "chapter": "1"},
3940
),
4041
VectorStoreItem(
4142
id=uuid.uuid4(),
4243
document_id="doc_001",
44+
document_type=DocumentType.EXTERNAL,
4345
embedding=[2.0] * 128,
4446
text="Artificial intelligence is revolutionizing technology.",
4547
metadata={"source": "test_doc_1.txt", "chapter": "2"},
4648
),
4749
VectorStoreItem(
4850
id=uuid.uuid4(),
4951
document_id="doc_002",
52+
document_type=DocumentType.EXTERNAL,
5053
embedding=[3.0] * 128,
5154
text="Vector databases enable efficient similarity search.",
5255
metadata={"source": "test_doc_2.txt", "chapter": "1"},

0 commit comments

Comments
 (0)