Skip to content

Commit 22223cc

Browse files
Embedder rework (#338)
* feat: embedder rework * perf: deprecate Cohere and Python embedders * chore: update submodules * fix: search for embedders.json instead of reducer.pkl * perf: update dump/load paths * chore: update submodules
1 parent 4b5c343 commit 22223cc

File tree

5 files changed

+3
-17
lines changed

5 files changed

+3
-17
lines changed

controller/embedding/manager.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,6 @@ def __recreate_or_extend_embedding(project_id: str, embedding_id: str) -> Embedd
230230

231231
if (
232232
new_embedding_item.platform == enums.EmbeddingPlatform.OPENAI.value
233-
or new_embedding_item.platform == enums.EmbeddingPlatform.COHERE.value
234233
or new_embedding_item.platform == enums.EmbeddingPlatform.AZURE.value
235234
):
236235
agreement_item = agreement.get_by_xfkey(

controller/embedding/terms.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,6 @@
1111
"terms": "Please note that by enabling this third-party API, you are stating that you accept its addition as a sub-processor under the terms of our Data Processing Agreement. Please be aware that the OpenAI API policies may conflict with your internal data and privacy policies. For more information please check: @@PLACEHOLDER@@. For questions you can contact us at [email protected].",
1212
"link": "https://openai.com/policies/api-data-usage-policies",
1313
},
14-
EmbeddingPlatform.COHERE.value:{
15-
"platform": EmbeddingPlatform.COHERE.value,
16-
"terms": "Please note that by enabling this third-party API, you are stating that you accept its addition as a sub-processor under the terms of our Data Processing Agreement. Please be aware that the Cohere API policies may conflict with your internal data and privacy policies. For more information please check: @@PLACEHOLDER@@. For questions you can contact us at [email protected].",
17-
"link": "https://cohere.com/terms-of-use",
18-
},
19-
EmbeddingPlatform.PYTHON.value: {
20-
"platform": EmbeddingPlatform.PYTHON.value,
21-
"terms": None,
22-
"link": None,
23-
},
2414
EmbeddingPlatform.AZURE.value: {
2515
"platform": EmbeddingPlatform.AZURE.value,
2616
"terms": "Please note that by enabling this third-party API, you are stating that you accept its addition as a sub-processor under the terms of our Data Processing Agreement. Please be aware that the Azure API policies may conflict with your internal data and privacy policies. For more information please check: @@PLACEHOLDER@@. For questions you can contact us at [email protected].",

controller/record/manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ def __check_and_prep_edit_records(
270270
continue
271271

272272
emb_path = os.path.join(
273-
"/inference", project_id, f"embedder-{str(embedding_item.id)}.pkl"
273+
"/inference", project_id, f"embedder-{str(embedding_item.id)}.json"
274274
)
275275
if not os.path.exists(emb_path):
276276
errors_found.append(

controller/transfer/project_transfer_manager.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -339,10 +339,7 @@ def __transform_embedding_by_name(embedding_name: str):
339339
attribute_name = splitted_name[0]
340340
embedding_type = splitted_name[1]
341341
model = "-".join(splitted_name[2:])
342-
if "bag-of-words" == model or "bag-of-characters" == model or "tf-idf" == model:
343-
platform = enums.EmbeddingPlatform.PYTHON.value
344-
else:
345-
platform = enums.EmbeddingPlatform.HUGGINGFACE.value
342+
platform = enums.EmbeddingPlatform.HUGGINGFACE.value
346343
name = f"{attribute_name}-{embedding_type}-{platform}-{model}"
347344
return platform, model, name
348345

submodules/model

0 commit comments

Comments
 (0)