Skip to content

Commit d98a8c9

Browse files
committed
- Refactored the MCP Server for better usability and extensibility
- Added ListIndex (beta) - Minor other changes
1 parent 9736195 commit d98a8c9

File tree

15 files changed

+233
-21
lines changed

15 files changed

+233
-21
lines changed

packages/fetchcraft-core/src/examples/advanced/arbitrary_object_index.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from typing import List, Any, Dict
66

77
import openai
8-
from mongomock.mongo_client import MongoClient
98
from pydantic_ai import Tool
109
from pydantic_ai.models.openai import OpenAIChatModel
1110
from pydantic_ai.providers.openai import OpenAIProvider
@@ -15,7 +14,7 @@
1514
from fetchcraft.agents import PydanticAgent, RetrieverTool
1615
from fetchcraft.embeddings import OpenAIEmbeddings
1716
from fetchcraft.index.vector_index import VectorIndex
18-
from fetchcraft.node import Node, ObjectNode, DefaultObjectMapper, ObjectType
17+
from fetchcraft.node import Node, ObjectNode, DefaultObjectMapper, ObjectType, DocumentNode
1918
from fetchcraft.retriever import VectorIndexRetriever
2019
from fetchcraft.vector_store import QdrantVectorStore
2120

@@ -25,7 +24,6 @@
2524

2625
COLLECTION_NAME = os.getenv("COLLECTION_NAME", "fetchcraft_objects")
2726

28-
mongo_client = MongoClient()
2927
client = QdrantClient(":memory:")
3028

3129

@@ -35,13 +33,7 @@ async def get_vector_index(index_id: str, nodes: List[Node]):
3533
api_key=OPENAI_API_KEY,
3634
base_url=OPENAI_BASE_URL
3735
)
38-
#
39-
# document_store = MongoDBDocumentStore(
40-
# client=mongo_client,
41-
# database_name=COLLECTION_NAME,
42-
# collection_name=COLLECTION_NAME
43-
# )
44-
#
36+
4537
vector_store = QdrantVectorStore(
4638
client=client,
4739
collection_name=COLLECTION_NAME,
@@ -50,10 +42,9 @@ async def get_vector_index(index_id: str, nodes: List[Node]):
5042

5143
index = VectorIndex(
5244
vector_store=vector_store,
53-
# doc_store=document_store,
5445
index_id=index_id
5546
)
56-
await index.add_nodes(DocumentNode, nodes)
47+
_ids = await index.add_nodes(doc=None, nodes=nodes)
5748

5849
return index
5950

packages/fetchcraft-core/src/examples/advanced/recursive_object_retrieval.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ async def get_vector_index(index_id: str, nodes: List[Node]):
4646
# doc_store=document_store,
4747
index_id=index_id
4848
)
49-
await index.add_nodes(DocumentNode, nodes)
49+
await index.add_nodes(doc=None, nodes=nodes)
5050

5151
return index
5252

packages/fetchcraft-core/src/fetchcraft/document_store/mongodb_store.py

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
MongoDB document store implementation.
33
"""
44

5-
from typing import List, Optional, Dict, Any, Type
5+
from typing import List, Optional, Dict, Any, Type, Union
66
from pydantic import BaseModel, Field, ConfigDict
77

88
from .base import DocumentStore
@@ -413,7 +413,67 @@ async def get_documents_by_doc_id(self, doc_id: str) -> List[Node]:
413413
documents.append(doc_class(**doc_dict))
414414

415415
return documents
416-
416+
417+
async def find(
418+
self,
419+
query: Dict[str, Any],
420+
values: List[str],
421+
unique: bool = False,
422+
) -> Union[List[Any], List[Dict[str, Any]]]:
423+
"""
424+
Find arbitrary values from a MongoDB collection.
425+
426+
Args:
427+
query: MongoDB filter query
428+
values: list of dot-path fields to return (e.g. ["metadata.source"])
429+
unique: whether to return unique values only
430+
431+
Returns:
432+
- If one value field is provided:
433+
List[Any]
434+
- If multiple value fields are provided:
435+
List[Dict[str, Any]]
436+
"""
437+
438+
# Build projection
439+
projection = {field: 1 for field in values}
440+
projection["_id"] = 0
441+
442+
cursor = await self.collection.find(query, projection)
443+
444+
def extract(doc: Dict[str, Any], path: str):
445+
"""Safely extract dotted-path values."""
446+
current = doc
447+
for key in path.split("."):
448+
if not isinstance(current, dict) or key not in current:
449+
return None
450+
current = current[key]
451+
return current
452+
453+
results = []
454+
455+
for doc in cursor:
456+
if len(values) == 1:
457+
results.append(extract(doc, values[0]))
458+
else:
459+
results.append({v: extract(doc, v) for v in values})
460+
461+
if unique:
462+
if len(values) == 1:
463+
return list({v for v in results if v is not None})
464+
else:
465+
# Deduplicate dicts
466+
seen = set()
467+
unique_results = []
468+
for item in results:
469+
key = tuple(sorted(item.items()))
470+
if key not in seen:
471+
seen.add(key)
472+
unique_results.append(item)
473+
return unique_results
474+
475+
return results
476+
417477
async def close(self):
418478
"""Close the MongoDB connection."""
419479
if self.client:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from fetchcraft.index.base import BaseIndex, IndexFactory
22
from fetchcraft.index.vector_index import VectorIndex
3+
from fetchcraft.index.list_index import ListIndex
34

45
__all__ = [
56
"BaseIndex",
67
"IndexFactory",
78
"VectorIndex",
9+
"ListIndex"
810
]

packages/fetchcraft-core/src/fetchcraft/index/base.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,38 @@ async def _resolve_to_top_parent(
126126
# Max depth reached, return current node
127127
return current_node
128128

129+
async def _resolve_parent_node(
130+
self,
131+
doc: Node,
132+
score: float
133+
) -> tuple[Node, float]:
134+
resolved_node: tuple[Node, float] = (doc, score)
135+
seen_parent_ids: Set[str] = set()
136+
137+
# Check if this is a SymNode that needs parent resolution
138+
if doc.node_type == NodeType.SYMNODE and doc.parent_id:
139+
# Recursively resolve to top-level parent
140+
top_parent = await self._resolve_to_top_parent(doc)
141+
142+
if top_parent and top_parent.id not in seen_parent_ids:
143+
resolved_node = (top_parent, score)
144+
seen_parent_ids.add(top_parent.id)
145+
elif not top_parent:
146+
# Resolution failed, fall back to original node
147+
if doc.id not in seen_parent_ids:
148+
resolved_node = (doc, score)
149+
seen_parent_ids.add(doc.id)
150+
else:
151+
# Not a SymNode
152+
# Check if this document is already a parent we've seen
153+
if doc.id not in seen_parent_ids:
154+
resolved_node = (doc, score)
155+
# If this could be a parent (Chunk), track it to avoid duplicates
156+
if isinstance(doc, Chunk):
157+
seen_parent_ids.add(doc.id)
158+
159+
return resolved_node
160+
129161
async def _resolve_parent_nodes(
130162
self,
131163
results: List[tuple[D, float]]
@@ -197,7 +229,7 @@ async def delete_documents(self, document_ids):
197229
pass
198230

199231
@abstractmethod
200-
async def as_retriever(self, top_k, resolve_parents, object_mapper: Optional[ObjectMapper] = None, **search_kwargs):
232+
def as_retriever(self, top_k: int = 4, resolve_parents: bool = True, object_mapper: Optional[ObjectMapper] = None, **search_kwargs):
201233
"""
202234
Create a retriever from this index.
203235
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import logging
2+
from typing import List, Optional, AsyncIterator, Tuple
3+
4+
from fetchcraft.index.base import BaseIndex, D
5+
from fetchcraft.kv_storage import KVStorage
6+
from fetchcraft.kv_storage.storage import InMemoryKVStorage
7+
from fetchcraft.node import Node, ObjectMapper
8+
from fetchcraft.retriever import ListIndexRetriever
9+
10+
logger = logging.getLogger(__name__)
11+
12+
13+
class ListIndex(BaseIndex[Node]):
14+
storage: KVStorage
15+
16+
def __init__(self, index_id: Optional[str], storage: Optional[KVStorage] = None, **kwargs):
17+
_storage = storage or InMemoryKVStorage()
18+
super().__init__(index_id=index_id, storage=_storage, **kwargs)
19+
20+
21+
async def add_nodes(self, nodes: List[D], doc: Optional[D] = None, show_progress: bool = False):
22+
self.storage.insert_nodes(nodes)
23+
24+
async def search_by_text_iter(self, query: str, query_embedding: List[float] = None, resolve_parents: bool = True, **kwargs) -> AsyncIterator[Tuple[D, float]]:
25+
for node in self.storage.get_all():
26+
score = 1.0
27+
if resolve_parents:
28+
node, score = await self._resolve_parent_node(node, 1.0)
29+
yield node, score
30+
31+
async def get_document(self, document_id):
32+
pass
33+
34+
async def delete_documents(self, document_ids):
35+
pass
36+
37+
def as_retriever(self, top_k: int = 4, resolve_parents: bool = True, object_mapper: Optional[ObjectMapper] = None, **search_kwargs):
38+
return ListIndexRetriever(index=self, top_k=top_k, resolve_parents=resolve_parents, object_mapper=object_mapper, **search_kwargs)

packages/fetchcraft-core/src/fetchcraft/index/vector_index.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,8 @@ async def add_nodes(self, doc: Optional[D], nodes: List[D], show_progress: bool
9999
:param doc: The source document
100100
"""
101101
existing_docs = {}
102-
if self._doc_store:
102+
if self._doc_store and doc:
103103
for node in nodes:
104-
# _docs = await self._doc_store.list_documents(filters={"metadata.source": node.metadata.get("source")})
105104
_docs = await self._doc_store.list_documents(filters={"persistent_key": node.persistent_key})
106105
existing_docs.update({doc.id: doc for doc in _docs})
107106

@@ -116,7 +115,10 @@ async def add_nodes(self, doc: Optional[D], nodes: List[D], show_progress: bool
116115

117116
try:
118117
if self._doc_store:
119-
await self._doc_store.add_documents([doc] + nodes)
118+
all_nodes = nodes
119+
if doc:
120+
all_nodes = [doc] + nodes
121+
await self._doc_store.add_documents(all_nodes)
120122
except Exception as e:
121123
logger.error(f"Error adding documents to document store: {e}")
122124
raise e
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from fetchcraft.kv_storage.storage import KVStorage
2+
3+
__all__ = [
4+
"KVStorage"
5+
]
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from abc import abstractmethod, ABC
2+
from typing import *
3+
from pydantic import BaseModel
4+
5+
from fetchcraft.node import Node
6+
7+
8+
class KVStorage(BaseModel, ABC):
9+
10+
@abstractmethod
11+
def insert_nodes(self, nodes: List[Node]):
12+
...
13+
14+
@abstractmethod
15+
def get_all(self) -> List[Node]:
16+
...
17+
18+
19+
class InMemoryKVStorage(KVStorage):
20+
_storage: Dict[str, Any]
21+
22+
def __init__(self):
23+
super().__init__()
24+
self._storage = {}
25+
26+
def insert_nodes(self, nodes: List[Node]):
27+
self._storage.update({node.id: node for node in nodes})
28+
29+
def get_all(self) -> List[Node]:
30+
return list(self._storage.values())
31+

packages/fetchcraft-core/src/fetchcraft/node.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,7 @@ def from_retriever(cls, text: str, retriever: ObjectNodeMixin):
504504
return cls(
505505
text=text,
506506
object_type=ObjectType.VECTOR_INDEX_RETRIEVER,
507+
obj=retriever,
507508
data=retriever.to_json(),
508509
)
509510

@@ -534,8 +535,9 @@ class DefaultObjectMapper(ObjectMapper):
534535
object_map: Dict[str, Any] = {}
535536
factories: Dict[str, Any] = {}
536537

537-
def __init__(self, factories: Optional[Dict[str, Any]] = None):
538+
def __init__(self, factories: Optional[Dict[str, Any]] = None, object_map: Optional[Dict[str, Any]] = None):
538539
super().__init__()
540+
self.object_map = object_map or {}
539541
self.factories = factories or {}
540542

541543
async def resolve_object_node(self, node: ObjectNode, score: float, query: str, top_k: Optional[int] = None, **kwargs) -> List[Node]:

0 commit comments

Comments
 (0)