Skip to content

Commit a4fed59

Browse files
committed
insert uses api endpoints instead of query
1 parent c56f0b3 commit a4fed59

File tree

2 files changed

+56
-37
lines changed

2 files changed

+56
-37
lines changed

mindsdb_sdk/knowledge_bases.py

Lines changed: 35 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -150,35 +150,48 @@ def insert(self, data: Union[pd.DataFrame, Query, dict]):
150150
"""
151151
Insert data to knowledge base
152152
153-
>>> # insert using query
154-
>>> my_kb.insert(server.databases.example_db.tables.houses_sales.filter(type='house'))
155153
>>> # using dataframe
156154
>>> my_kb.insert(pd.read_csv('house_sales.csv'))
157155
>>> # using dict
158156
>>> my_kb.insert({'type': 'house', 'date': '2020-02-02'})
159157
160-
Data will be if id (defined by id_column param, see create knowledge base) is already exists in knowledge base
161-
it will be replaced
158+
If id is already exists in knowledge base:
159+
- it will be replaced
160+
- `id` column can be defined by id_column param, see create knowledge base
162161
163162
:param data: Dataframe or Query object or dict.
164163
"""
165164

165+
if isinstance(data, Query):
166+
# for back compatibility
167+
return self.insert_query(data)
168+
166169
if isinstance(data, dict):
167-
data = pd.DataFrame([data])
170+
data = [data]
171+
elif isinstance(data, pd.DataFrame):
172+
data = data.to_dict('records')
173+
else:
174+
raise ValueError("Unknown data type, accepted types: DataFrame, Query, dict")
175+
176+
return self.api.insert_into_knowledge_base(
177+
self.project.name,
178+
self.name,
179+
data={'rows': data}
180+
)
168181

169-
if isinstance(data, pd.DataFrame):
170-
# insert data
171-
data_split = data.to_dict('split')
182+
def insert_query(self, data: Query):
183+
"""
184+
Insert data to knowledge base using query
172185
173-
ast_query = Insert(
174-
table=self.table_name,
175-
columns=data_split['columns'],
176-
values=data_split['data']
177-
)
178-
sql = ast_query.to_string()
186+
>>> my_kb.insert(server.databases.example_db.tables.houses_sales.filter(type='house'))
179187
180-
else:
181-
# insert from select
188+
Data will be if id (defined by id_column param, see create knowledge base) is already exists in knowledge base
189+
it will be replaced
190+
191+
:param data: Dataframe or Query object or dict.
192+
"""
193+
if is_saving():
194+
# generate insert from select query
182195
if data.database is not None:
183196
ast_query = Insert(
184197
table=self.table_name,
@@ -188,11 +201,15 @@ def insert(self, data: Union[pd.DataFrame, Query, dict]):
188201
else:
189202
sql = f'INSERT INTO {self.table_name.to_string()} ({data.sql})'
190203

191-
if is_saving():
192204
# don't execute it right now, return query object
193205
return Query(self, sql, self.database)
194206

195-
self.api.sql_query(sql, self.database)
207+
# query have to be in context of mindsdb project
208+
self.api.insert_into_knowledge_base(
209+
self.project.name,
210+
self.name,
211+
data={'query': data.sql}
212+
)
196213

197214

198215
class KnowledgeBases(CollectionBase):

tests/test_sdk.py

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import mindsdb_sdk
1313

1414
from mindsdb_sdk.agents import Agent
15-
from mindsdb_sdk.connect import DEFAULT_LOCAL_API_URL
15+
from mindsdb_sdk.connect import DEFAULT_LOCAL_API_URL, DEFAULT_CLOUD_API_URL
1616
from mindsdb_sdk.skills import SQLSkill
1717
from mindsdb_sdk.connectors import rest_api
1818

@@ -1128,8 +1128,9 @@ def check_project_jobs(self, project, model, database, kb, mock_post):
11281128
call_stack_num=-2
11291129
)
11301130

1131+
@patch('requests.Session.put')
11311132
@patch('requests.Session.post')
1132-
def check_project_kb(self, project, model, database, mock_post):
1133+
def check_project_kb(self, project, model, database, mock_post, mock_put):
11331134

11341135
response_mock(mock_post, pd.DataFrame([{
11351136
'NAME': 'my_kb',
@@ -1162,34 +1163,35 @@ def check_project_kb(self, project, model, database, mock_post):
11621163
assert kb.storage.db.name == 'pvec'
11631164
assert kb.model.name == 'openai_emb'
11641165

1165-
# insert
1166+
# --- insert ---
11661167

1168+
# table
11671169
kb.insert(
11681170
database.tables.tbl2.filter(a=1)
11691171
)
1170-
check_sql_call(
1171-
mock_post,
1172-
f''' insert into {project.name}.{kb.name} (
1173-
select * from {database.name}.tbl2 where a=1
1174-
)'''
1175-
)
1172+
1173+
args, kwargs = mock_put.call_args
1174+
assert args[0] == f'{DEFAULT_CLOUD_API_URL}/api/projects/{project.name}/knowledge_bases/my_kb'
1175+
assert kwargs == {'json': {'knowledge_base': {'query': 'SELECT * FROM pg1.tbl2 WHERE a = 1'}}}
1176+
1177+
# query
11761178
kb.insert(
11771179
database.query('select * from tbl2 limit 1')
11781180
)
1179-
check_sql_call(
1180-
mock_post,
1181-
f''' insert into {project.name}.{kb.name} (
1182-
select * from {database.name} (select * from tbl2 limit 1)
1183-
)'''
1184-
)
1181+
args, kwargs = mock_put.call_args
1182+
assert args[0] == f'{DEFAULT_CLOUD_API_URL}/api/projects/{project.name}/knowledge_bases/my_kb'
1183+
assert kwargs == {'json': {'knowledge_base': {'query': 'select * from tbl2 limit 1'}}}
11851184

1185+
# dataframe
11861186
kb.insert(
11871187
pd.DataFrame([[1, 'Alice'], [2, 'Bob']], columns=['id', 'name'])
11881188
)
1189-
check_sql_call(
1190-
mock_post,
1191-
f'''INSERT INTO {project.name}.{kb.name}(id, name) VALUES (1, 'Alice'), (2, 'Bob')'''
1192-
)
1189+
1190+
args, kwargs = mock_put.call_args
1191+
assert args[0] == f'{DEFAULT_CLOUD_API_URL}/api/projects/{project.name}/knowledge_bases/my_kb'
1192+
assert kwargs == {'json': {
1193+
'knowledge_base': {'rows': [{'id': 1, 'name': 'Alice'}, {'id': 2, 'name': 'Bob'}]}
1194+
}}
11931195

11941196
# query
11951197
df = kb.find(query='dog', limit=5).fetch()

0 commit comments

Comments
 (0)