From 1d1a61ad1c480d487cc440df882f50f0ff4c4d91 Mon Sep 17 00:00:00 2001 From: Jacky Date: Sat, 22 May 2021 13:51:31 +1000 Subject: [PATCH 1/2] change APi endpoint and Update SDK --- utils/automate_api.py | 8 +-- vectorai/api/api.py | 141 ++++++++++++++++++------------------------ 2 files changed, 61 insertions(+), 88 deletions(-) diff --git a/utils/automate_api.py b/utils/automate_api.py index ca0aa316..247d29fb 100644 --- a/utils/automate_api.py +++ b/utils/automate_api.py @@ -2,13 +2,9 @@ import os from openapi_to_sdk.sdk_automation import PythonSDKBuilder - url="https://vectorai-development-api.azurewebsites.net" - url="https://vectorai-development-api.azurewebsites.net" - # url = "https://api.vctr.ai" + url = "https://vectorai-production-api.azurewebsites.net" sdk = PythonSDKBuilder( url=url, - # url="https://vectorai-development-api.azurewebsites.net", - # url='https://vecdb-aueast-api.azurewebsites.net', inherited_properties=['username', 'api_key', 'url'], decorators=[ 'retry()', @@ -16,9 +12,7 @@ override_param_defaults=dict( min_score=None, cursor=None, - # url='https://vecdb-aueast-api.azurewebsites.net', url=url, - # sort=False, sort_by_created_at_date=False, ), internal_functions=[ diff --git a/vectorai/api/api.py b/vectorai/api/api.py index e5f461b5..3310b7fc 100644 --- a/vectorai/api/api.py +++ b/vectorai/api/api.py @@ -1129,7 +1129,7 @@ def bulk_encode(self, encoders, documents, **kwargs): @retry() @return_curl_or_response('json') - def predict_knn_regression(self, collection_name, vector, search_field, target_field, impute_value, k=5, weighting=True, predict_operation="mean", **kwargs): + def predict_knn_regression(self, collection_name, vector, search_field, target_field, impute_value, k=5, weighting=True, predict_operation="mean", include_search_results=True, **kwargs): """Predict KNN regression. Predict with KNN regression using normal search. @@ -1145,6 +1145,7 @@ def predict_knn_regression(self, collection_name, vector, search_field, target_f weighting: weighting impute_value: What value to fill if target field is missing. predict_operation: How to predict using the vectors. +include_search_results: If True, returns the results as well. """ return requests.post( @@ -1160,6 +1161,7 @@ def predict_knn_regression(self, collection_name, vector, search_field, target_f weighting=weighting, impute_value=impute_value, predict_operation=predict_operation, + include_search_results=include_search_results, )) @retry() @@ -1241,6 +1243,10 @@ def filters(self, collection_name, filters=[], page=1, page_size=20, asc=False, These are the available conditions: "==", "!=", ">=", ">", "<", "<=" + +If you are looking to combine your filters with multiple ORs, simply add the following inside the query +`{"strict":"must_or"}`. + Args ======== @@ -2121,7 +2127,7 @@ def text_chunking(self, collection_name, text_field, chunk_field, insert_results @retry() @return_curl_or_response('json') - def text_chunking_encoder(self, collection_name, text_field, chunk_field, insert_results_to_seperate_collection_name, encoder_task="text", refresh=True, store_to_pipeline=True, **kwargs): + def text_chunking_encoder(self, collection_name, text_field, chunk_field, insert_results_to_seperate_collection_name, encoder_task="text", refresh=True, store_to_pipeline=True, alias="", **kwargs): """Chunk a text field and encode the chunks Split text into separate sentences. Encode each sentence to create chunkvectors. These are stored as \_chunkvector\_. The chunk field created is `field` + \_chunk\_. @@ -2137,6 +2143,7 @@ def text_chunking_encoder(self, collection_name, text_field, chunk_field, insert refresh: Whether to refresh the whole collection and re-encode all to vectors insert_results_to_seperate_collection_name: If specified the chunks will be inserted into a seperate collection. Default is None which means no seperate collection. store_to_pipeline: Whether to store the encoder to the chunking pipeline +alias: If alias is present, it will create change the name of the created vector field into field_{alias}_chunkvector_ """ return requests.post( @@ -2151,6 +2158,7 @@ def text_chunking_encoder(self, collection_name, text_field, chunk_field, insert refresh=refresh, insert_results_to_seperate_collection_name=insert_results_to_seperate_collection_name, store_to_pipeline=store_to_pipeline, + alias=alias, )) @retry() @@ -2178,6 +2186,31 @@ def process_pdf(self, collection_name, file_url, filename, **kwargs): filename=filename, )) + @retry() + @return_curl_or_response('json') + def bulk_process_pdf(self, collection_name, file_urls, filenames, **kwargs): + """Process multiple pdfs +Insert multiple PDFs into Vector AI: + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: What collection to insert the PDF into +file_urls: The file url blobs +filenames: The name of the PDF files + +""" + return requests.post( + url=self.url+'/collection/job/bulk_process_pdf', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + file_urls=file_urls, + filenames=filenames, + )) + @retry() @return_curl_or_response('json') def process_doc(self, collection_name, file_url, filename, **kwargs): @@ -2203,6 +2236,31 @@ def process_doc(self, collection_name, file_url, filename, **kwargs): filename=filename, )) + @retry() + @return_curl_or_response('json') + def bulk_process_doc(self, collection_name, file_urls, filenames, **kwargs): + """Process multiple doc or docx files +Insert multiple word docs into Vector AI + +Args +======== +username: Username +api_key: Api Key, you can request it from request_api_key +collection_name: What collection to insert the word doc into +file_urls: The file url blobs +filenames: The name of the Doc or DocX files + +""" + return requests.post( + url=self.url+'/collection/job/bulk_process_doc', + json=dict( + username=self.username, + api_key=self.api_key, + collection_name=collection_name, + file_urls=file_urls, + filenames=filenames, + )) + @retry() @return_curl_or_response('json') def copy_collection_from_another_user(self, collection_name, source_collection_name, source_username, source_api_key, **kwargs): @@ -4123,82 +4181,3 @@ def tag_documents_from_hub(self, tag_collection_name, vector_field, hub_username hub_api_key=hub_api_key, )) - @retry() - @return_curl_or_response('json') - def rank_comparator(self, ranked_list_1, ranked_list_2, **kwargs): - """Compare ranks between 2 results list. -Compare the ranks between 2 results list in VecDB - -Args -======== -username: Username -api_key: Api Key, you can request it from request_api_key -ranked_list_1: First ranked List -ranked_list_2: Second ranked list - -""" - return requests.post( - url=self.url+'/experimentation/rank_comparator', - json=dict( - username=self.username, - api_key=self.api_key, - ranked_list_1=ranked_list_1, - ranked_list_2=ranked_list_2, - )) - - @retry() - @return_curl_or_response('json') - def bias_indicator(self, anchor_documents, documents, metadata_field, vector_field, **kwargs): - """Compare bias of documents against anchor documents -Compare bias of documents against anchor documents - -Args -======== -username: Username -api_key: Api Key, you can request it from request_api_key -anchor_documents: Anchor documents to compare other documents against. -documents: Documents to compare against the anchor documents -metadata_field: Field from which the vector was derived -vector_field: Vector field to compare against - -""" - return requests.post( - url=self.url+'/experimentation/bias_indicator', - json=dict( - username=self.username, - api_key=self.api_key, - anchor_documents=anchor_documents, - documents=documents, - metadata_field=metadata_field, - vector_field=vector_field, - )) - - @retry() - @return_curl_or_response('json') - def cluster_comparator(self, collection_name, cluster_field, cluster_value, vector_field, alias, **kwargs): - """Compare clusters -Compare the clusters for cluster comparator - -Args -======== -username: Username -api_key: Api Key, you can request it from request_api_key -collection_name: the name of the collection -cluster_field: the cluster field -cluster_value: the cluster values by which to compare on -vector_field: The vector field that has been clustered -alias: The alias of the vector field - -""" - return requests.post( - url=self.url+'/experimentation/cluster_comparator', - json=dict( - username=self.username, - api_key=self.api_key, - collection_name=collection_name, - cluster_field=cluster_field, - cluster_value=cluster_value, - vector_field=vector_field, - alias=alias, - )) - From 59c0cadf848a3f5086f3a482021d5343abc967cd Mon Sep 17 00:00:00 2001 From: Jacky Date: Sat, 22 May 2021 13:53:41 +1000 Subject: [PATCH 2/2] update the VectorAI Production API --- vectorai/client.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vectorai/client.py b/vectorai/client.py index 635eeaf0..91d78f2e 100644 --- a/vectorai/client.py +++ b/vectorai/client.py @@ -31,9 +31,7 @@ class ViClient(ViWriteClient, ViAnalyticsClient): def __init__(self, username: str=None, api_key: str=None, - # Old API URL: https://vecdb-aueast-api.azurewebsites.net - # url: str="https://vectorai-development-api-vectorai-test-api.azurewebsites.net/", - url: str="https://vectorai-development-api.azurewebsites.net", + url: str ="https://vectorai-production-api.azurewebsites.net", analytics_url="https://vector-analytics.vctr.ai", verbose: bool = True) -> None: super().__init__(username, api_key, url) if username is None: