Skip to content

Commit b9bf9c5

Browse files
authored
Merge branch 'mindsdb:main' into patch-1
2 parents 722911d + 8f44e3b commit b9bf9c5

File tree

13 files changed

+612
-38
lines changed

13 files changed

+612
-38
lines changed

.github/workflows/test_on_deploy.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
runs-on: ubuntu-latest
1010
strategy:
1111
matrix:
12-
python-version: ['3.8', '3.9','3.10', '3.11']
12+
python-version: ['3.10']
1313
steps:
1414
- name: Checkout code
1515
uses: actions/checkout@v2
@@ -28,4 +28,4 @@ jobs:
2828
env:
2929
PYTHONPATH: ./
3030
API_KEY: ${{ secrets.API_KEY }}
31-
BASE_URL: ${{ secrets.BASE_URL }}
31+
BASE_URL: 'https://mdb.ai'

README.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,5 +149,14 @@ client.datasources.drop('my_datasource')
149149
```
150150
>Note: The SDK currently does not support automatically removing a data source if it is no longer connected to any mind.
151151
152-
### Other SDKs
153-
#### [Command-Line](https://github.com/Better-Boy/minds-cli-sdk)
152+
### Community Supported SDKs
153+
154+
- [Java-SDK](https://github.com/Better-Boy/minds-java-sdk)
155+
- [Ruby-SDK](https://github.com/tungnt1203/minds_ruby_sdk)
156+
- [Dart-SDK](https://github.com/ArnavK-09/mdb_dart)
157+
- [C# SDK](https://github.com/priyanshuverma-dev/Minds.SDK)
158+
- [Go SDK](https://github.com/Abiji-2020/minds-go-sdk)
159+
160+
#### Command Line Tools
161+
- [Minds CLI](https://github.com/Better-Boy/minds-cli-sdk)
162+

minds/__about__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
__title__ = 'minds_sdk'
22
__package_name__ = 'minds'
3-
__version__ = '1.0.8'
3+
__version__ = '1.2.0'
44
__description__ = 'An AI-Data Mind is an LLM with the built-in power to answer data questions for Agents'
55
__email__ = '[email protected]'
66
__author__ = 'MindsDB Inc'

minds/client.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from minds.rest_api import RestAPI
33

44
from minds.datasources import Datasources
5+
from minds.knowledge_bases import KnowledgeBases
56
from minds.minds import Minds
67

78

@@ -12,5 +13,6 @@ def __init__(self, api_key, base_url=None):
1213
self.api = RestAPI(api_key, base_url)
1314

1415
self.datasources = Datasources(self)
16+
self.knowledge_bases = KnowledgeBases(self)
1517

1618
self.minds = Minds(self)

minds/datasources/datasources.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from typing import List, Optional, Union
22

33
from pydantic import BaseModel, Field
4-
4+
import minds.utils as utils
55
import minds.exceptions as exc
66

77
class DatabaseConfig(BaseModel):
@@ -37,8 +37,10 @@ def create(self, ds_config: DatabaseConfig, update=False):
3737

3838
name = ds_config.name
3939

40+
utils.validate_datasource_name(name)
41+
4042
if update:
41-
self.api.put('/datasources', data=ds_config.model_dump())
43+
self.api.put(f'/datasources/{name}', data=ds_config.model_dump())
4244
else:
4345
self.api.post('/datasources', data=ds_config.model_dump())
4446
return self.get(name)

minds/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,8 @@ class UnknownError(Exception):
2020

2121

2222
class MindNameInvalid(Exception):
23+
...
24+
25+
26+
class DatasourceNameInvalid(Exception):
2327
...

minds/knowledge_bases/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .knowledge_bases import *
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
from typing import Any, Dict, List, Optional, Union
2+
3+
from pydantic import BaseModel
4+
5+
from minds.knowledge_bases.preprocessing import PreprocessingConfig
6+
from minds.rest_api import RestAPI
7+
8+
9+
class VectorStoreConfig(BaseModel):
10+
'''Configuration for the underlying vector store for knowledge base embeddings'''
11+
engine: str
12+
connection_data: Dict[str, Any]
13+
table: str = 'embeddings'
14+
15+
16+
class EmbeddingConfig(BaseModel):
17+
'''Configuration for embeddings to use with underlying vector store for knowledge base'''
18+
provider: str
19+
model: str
20+
params: Optional[Dict[str, Any]] = None
21+
22+
23+
class KnowledgeBaseConfig(BaseModel):
24+
'''Configuration for a knowledge base'''
25+
name: str
26+
description: str
27+
vector_store_config: Optional[VectorStoreConfig] = None
28+
embedding_config: Optional[EmbeddingConfig] = None
29+
# Params to apply to retrieval pipeline.
30+
params: Optional[Dict] = None
31+
32+
33+
class KnowledgeBaseDocument(BaseModel):
34+
'''Represents a document that can be inserted into a knowledge base'''
35+
id: Union[int, str]
36+
content: str
37+
metadata: Optional[Dict[str, Any]] = {}
38+
39+
40+
class KnowledgeBase:
41+
def __init__(self, name, api: RestAPI):
42+
self.name = name
43+
self.api = api
44+
45+
def insert_from_select(self, query: str, preprocessing_config: PreprocessingConfig = None):
46+
'''
47+
Inserts select content of a connected datasource into this knowledge base
48+
49+
:param query: The SQL SELECT query to use to retrieve content to be inserted
50+
'''
51+
update_request = {
52+
'query': query
53+
}
54+
if preprocessing_config is not None:
55+
update_request['preprocessing'] = preprocessing_config.model_dump()
56+
_ = self.api.put(f'/knowledge_bases/{self.name}', data=update_request)
57+
58+
def insert_documents(self, documents: List[KnowledgeBaseDocument], preprocessing_config: PreprocessingConfig = None):
59+
'''
60+
Inserts documents directly into this knowledge base
61+
62+
:param documents: The documents to insert
63+
'''
64+
update_request = {
65+
'rows': [d.model_dump() for d in documents]
66+
}
67+
if preprocessing_config is not None:
68+
update_request['preprocessing'] = preprocessing_config.model_dump()
69+
_ = self.api.put(f'/knowledge_bases/{self.name}', data=update_request)
70+
71+
def insert_urls(self, urls: List[str], preprocessing_config: PreprocessingConfig = None):
72+
'''
73+
Crawls URLs & inserts the retrieved webpages into this knowledge base
74+
75+
:param urls: Valid URLs to crawl & insert
76+
'''
77+
update_request = {
78+
'urls': urls
79+
}
80+
if preprocessing_config is not None:
81+
update_request['preprocessing'] = preprocessing_config.model_dump()
82+
_ = self.api.put(f'/knowledge_bases/{self.name}', data=update_request)
83+
84+
def insert_files(self, files: List[str], preprocessing_config: PreprocessingConfig = None):
85+
'''
86+
Inserts files that have already been uploaded to MindsDB into this knowledge base
87+
88+
:param files: Names of preuploaded files to insert
89+
'''
90+
update_request = {
91+
'files': files
92+
}
93+
if preprocessing_config is not None:
94+
update_request['preprocessing'] = preprocessing_config.model_dump()
95+
_ = self.api.put(f'/knowledge_bases/{self.name}', data=update_request)
96+
97+
98+
class KnowledgeBases:
99+
def __init__(self, client):
100+
self.api = client.api
101+
102+
def create(self, config: KnowledgeBaseConfig) -> KnowledgeBase:
103+
'''
104+
Create new knowledge base and return it
105+
106+
:param config: knowledge base configuration, properties:
107+
- name: str, name of knowledge base
108+
- description: str, description of the knowledge base. Used by minds to know what data can be retrieved.
109+
- vector_store_config: VectorStoreConfig, configuration for embeddings vector store.
110+
- embedding_config: EmbeddingConfig, configuration for embeddings.
111+
:return: knowledge base object
112+
'''
113+
create_request = {
114+
'name': config.name,
115+
'description': config.description
116+
}
117+
if config.vector_store_config is not None:
118+
vector_store_data = {
119+
'engine': config.vector_store_config.engine,
120+
'connection_data': config.vector_store_config.connection_data
121+
}
122+
create_request['vector_store'] = vector_store_data
123+
if config.embedding_config is not None:
124+
embedding_data = {
125+
'provider': config.embedding_config.provider,
126+
'name': config.embedding_config.model
127+
}
128+
if config.embedding_config.params is not None:
129+
embedding_data.update(config.embedding_config.params)
130+
create_request['embedding_model'] = embedding_data
131+
if config.params is not None:
132+
create_request['params'] = config.params
133+
134+
_ = self.api.post('/knowledge_bases', data=create_request)
135+
return self.get(config.name)
136+
137+
def list(self) -> List[KnowledgeBase]:
138+
'''
139+
Returns list of knowledge bases
140+
141+
:return: iterable knowledge bases
142+
'''
143+
144+
list_knowledge_bases_response = self.api.get('/knowledge_bases')
145+
knowledge_bases = list_knowledge_bases_response.json()
146+
147+
all_knowledge_bases = []
148+
for knowledge_base in knowledge_bases:
149+
all_knowledge_bases.append(KnowledgeBase(knowledge_base['name'], self.api))
150+
return all_knowledge_bases
151+
152+
def get(self, name: str) -> KnowledgeBase:
153+
'''
154+
Get knowledge base by name
155+
156+
:param name: name of knowledge base
157+
:return: knowledge base object
158+
'''
159+
160+
knowledge_base_response = self.api.get(f'/knowledge_bases/{name}')
161+
knowledge_base = knowledge_base_response.json()
162+
return KnowledgeBase(knowledge_base['name'], self.api)
163+
164+
def drop(self, name: str, force=False):
165+
'''
166+
Drop knowledge base by name
167+
168+
:param name: name of knowledge base
169+
:param force: if True - remove from all minds, default: False
170+
'''
171+
data = None
172+
if force:
173+
data = {'cascade': True}
174+
175+
self.api.delete(f'/knowledge_bases/{name}', data=data)
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
from typing import Any, Dict, List, Literal, Optional
2+
3+
from pydantic import BaseModel, Field, model_validator
4+
5+
6+
DEFAULT_LLM_MODEL = 'gpt-4o'
7+
DEFAULT_LLM_MODEL_PROVIDER = 'openai'
8+
9+
10+
class TextChunkingConfig(BaseModel):
11+
'''Configuration for chunking text content before they are inserted into a knowledge base'''
12+
separators: List[str] = Field(
13+
default=['\n\n', '\n', ' ', ''],
14+
description='List of separators to use for splitting text, in order of priority'
15+
)
16+
chunk_size: int = Field(
17+
default=1000,
18+
description='The target size of each text chunk',
19+
gt=0
20+
)
21+
chunk_overlap: int = Field(
22+
default=200,
23+
description='The number of characters to overlap between chunks',
24+
ge=0
25+
)
26+
27+
28+
class LLMConfig(BaseModel):
29+
model_name: str = Field(default=DEFAULT_LLM_MODEL, description='LLM model to use for context generation')
30+
provider: str = Field(default=DEFAULT_LLM_MODEL_PROVIDER, description='LLM model provider to use for context generation')
31+
params: Dict[str, Any] = Field(default={}, description='Additional parameters to pass in when initializing the LLM')
32+
33+
34+
class ContextualConfig(BaseModel):
35+
'''Configuration specific to contextual preprocessing'''
36+
llm_config: LLMConfig = Field(
37+
default=LLMConfig(),
38+
description='LLM configuration to use for context generation'
39+
)
40+
context_template: Optional[str] = Field(
41+
default=None,
42+
description='Custom template for context generation'
43+
)
44+
chunk_size: int = Field(
45+
default=1000,
46+
description='The target size of each text chunk',
47+
gt=0
48+
)
49+
chunk_overlap: int = Field(
50+
default=200,
51+
description='The number of characters to overlap between chunks',
52+
ge=0
53+
)
54+
55+
56+
class PreprocessingConfig(BaseModel):
57+
'''Complete preprocessing configuration'''
58+
type: Literal['contextual', 'text_chunking'] = Field(
59+
default='text_chunking',
60+
description='Type of preprocessing to apply'
61+
)
62+
contextual_config: Optional[ContextualConfig] = Field(
63+
default=None,
64+
description='Configuration for contextual preprocessing'
65+
)
66+
text_chunking_config: Optional[TextChunkingConfig] = Field(
67+
default=None,
68+
description='Configuration for text chunking preprocessing'
69+
)
70+
71+
@model_validator(mode='after')
72+
def validate_config_presence(self) -> 'PreprocessingConfig':
73+
'''Ensure the appropriate config is present for the chosen type'''
74+
if self.type == 'contextual' and not self.contextual_config:
75+
self.contextual_config = ContextualConfig()
76+
if self.type == 'text_chunking' and not self.text_chunking_config:
77+
self.text_chunking_config = TextChunkingConfig()
78+
return self

0 commit comments

Comments
 (0)