22
33from pydantic import BaseModel
44
5+ from minds .knowledge_bases .preprocessing import PreprocessingConfig
56from minds .rest_api import RestAPI
67
78
@@ -25,6 +26,8 @@ class KnowledgeBaseConfig(BaseModel):
2526 description : str
2627 vector_store_config : Optional [VectorStoreConfig ] = None
2728 embedding_config : Optional [EmbeddingConfig ] = None
29+ # Params to apply to retrieval pipeline.
30+ params : Optional [Dict ] = None
2831
2932
3033class KnowledgeBaseDocument (BaseModel ):
@@ -39,7 +42,7 @@ def __init__(self, name, api: RestAPI):
3942 self .name = name
4043 self .api = api
4144
42- def insert_from_select (self , query : str ):
45+ def insert_from_select (self , query : str , preprocessing_config : PreprocessingConfig = None ):
4346 '''
4447 Inserts select content of a connected datasource into this knowledge base
4548
@@ -48,9 +51,11 @@ def insert_from_select(self, query: str):
4851 update_request = {
4952 'query' : query
5053 }
54+ if preprocessing_config is not None :
55+ update_request ['preprocessing' ] = preprocessing_config .model_dump ()
5156 _ = self .api .put (f'/knowledge_bases/{ self .name } ' , data = update_request )
5257
53- def insert_documents (self , documents : List [KnowledgeBaseDocument ]):
58+ def insert_documents (self , documents : List [KnowledgeBaseDocument ], preprocessing_config : PreprocessingConfig = None ):
5459 '''
5560 Inserts documents directly into this knowledge base
5661
@@ -59,9 +64,11 @@ def insert_documents(self, documents: List[KnowledgeBaseDocument]):
5964 update_request = {
6065 'rows' : [d .model_dump () for d in documents ]
6166 }
67+ if preprocessing_config is not None :
68+ update_request ['preprocessing' ] = preprocessing_config .model_dump ()
6269 _ = self .api .put (f'/knowledge_bases/{ self .name } ' , data = update_request )
6370
64- def insert_urls (self , urls : List [str ]):
71+ def insert_urls (self , urls : List [str ], preprocessing_config : PreprocessingConfig = None ):
6572 '''
6673 Crawls URLs & inserts the retrieved webpages into this knowledge base
6774
@@ -70,9 +77,11 @@ def insert_urls(self, urls: List[str]):
7077 update_request = {
7178 'urls' : urls
7279 }
80+ if preprocessing_config is not None :
81+ update_request ['preprocessing' ] = preprocessing_config .model_dump ()
7382 _ = self .api .put (f'/knowledge_bases/{ self .name } ' , data = update_request )
7483
75- def insert_files (self , files : List [str ]):
84+ def insert_files (self , files : List [str ], preprocessing_config : PreprocessingConfig = None ):
7685 '''
7786 Inserts files that have already been uploaded to MindsDB into this knowledge base
7887
@@ -81,6 +90,8 @@ def insert_files(self, files: List[str]):
8190 update_request = {
8291 'files' : files
8392 }
93+ if preprocessing_config is not None :
94+ update_request ['preprocessing' ] = preprocessing_config .model_dump ()
8495 _ = self .api .put (f'/knowledge_bases/{ self .name } ' , data = update_request )
8596
8697
@@ -117,6 +128,8 @@ def create(self, config: KnowledgeBaseConfig) -> KnowledgeBase:
117128 if config .embedding_config .params is not None :
118129 embedding_data .update (config .embedding_config .params )
119130 create_request ['embedding_model' ] = embedding_data
131+ if config .params is not None :
132+ create_request ['params' ] = config .params
120133
121134 _ = self .api .post ('/knowledge_bases' , data = create_request )
122135 return self .get (config .name )
0 commit comments