Unstructured-IO
diff --git a/‎CHANGELOG.md‎
Lines changed: 2 additions & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/embed/example_voyageai.py‎
Lines changed: 25 additions & 0 deletions b/‎examples/embed/example_voyageai.py‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎requirements/base.txt‎
Lines changed: 1 addition & 1 deletion b/‎requirements/base.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎requirements/deps/constraints.txt‎
Lines changed: 4 additions & 1 deletion b/‎requirements/deps/constraints.txt‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎requirements/dev.txt‎
Lines changed: 3 additions & 3 deletions b/‎requirements/dev.txt‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎requirements/extra-docx.txt‎
Lines changed: 1 addition & 1 deletion b/‎requirements/extra-docx.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎requirements/extra-odt.txt‎
Lines changed: 1 addition & 1 deletion b/‎requirements/extra-odt.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎requirements/extra-paddleocr.txt‎
Lines changed: 3 additions & 3 deletions b/‎requirements/extra-paddleocr.txt‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎requirements/extra-pdf-image.txt‎
Lines changed: 3 additions & 3 deletions b/‎requirements/extra-pdf-image.txt‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎requirements/huggingface.txt‎
Lines changed: 1 addition & 1 deletion b/‎requirements/huggingface.txt‎
Lines changed: 1 addition & 1 deletion
@@ -1,9 +1,10 @@
-## 0.14.3-dev4
+## 0.14.3-dev5
 
 ### Enhancements
 
 * **Move `category` field from Text class to Element class.**
 * **`partition_docx()` now supports pluggable picture sub-partitioners.** A subpartitioner that accepts a DOCX `Paragraph` and generates elements is now supported. This allows adding a custom sub-partitioner that extracts images and applies OCR or summarization for the image.
+* **Add VoyageAI embedder** Adds VoyageAI embeddings to support embedding via Voyage AI.
 
 ### Features
 
 
@@ -0,0 +1,25 @@
+import os
+
+from unstructured.documents.elements import Text
+from unstructured.embed.voyageai import VoyageAIEmbeddingConfig, VoyageAIEmbeddingEncoder
+
+# To use Voyage AI you will need to pass
+# Voyage AI API Key (obtained from https://dash.voyageai.com/)
+# as the ``api_key`` parameter.
+#
+# The ``model_name`` parameter is mandatory, please check the available models
+# at https://docs.voyageai.com/docs/embeddings
+
+embedding_encoder = VoyageAIEmbeddingEncoder(
+    config=VoyageAIEmbeddingConfig(api_key=os.environ["VOYAGE_API_KEY"], model_name="voyage-law-2")
+)
+elements = embedding_encoder.embed_documents(
+    elements=[Text("This is sentence 1"), Text("This is sentence 2")],
+)
+
+query = "This is the query"
+query_embedding = embedding_encoder.embed_query(query=query)
+
+[print(e, e.embeddings) for e in elements]
+print(query, query_embedding)
+print(embedding_encoder.is_unit_vector, embedding_encoder.num_of_dimensions)
@@ -86,7 +86,7 @@ tabulate==0.9.0
     # via -r ./base.in
 tqdm==4.66.4
     # via nltk
-typing-extensions==4.11.0
+typing-extensions==4.12.0
     # via
     #   -r ./base.in
     #   emoji
 
@@ -57,7 +57,10 @@ unstructured-client<=0.18.0
 
 fsspec==2024.5.0
 
-# python 3.12 support 
+# python 3.12 support
 numpy>=1.26.0
 wrapt>=1.14.0
 
+
+# NOTE(robinson): for compatiblity with voyage embeddings
+langsmith==0.1.62
@@ -151,7 +151,7 @@ jsonschema-specifications==2023.12.1
     #   jsonschema
 jupyter==1.0.0
     # via -r ./dev.in
-jupyter-client==8.6.1
+jupyter-client==8.6.2
     # via
     #   ipykernel
     #   jupyter-console
@@ -185,7 +185,7 @@ jupyter-server==2.14.0
     #   notebook-shim
 jupyter-server-terminals==0.5.3
     # via jupyter-server
-jupyterlab==4.2.0
+jupyterlab==4.2.1
     # via notebook
 jupyterlab-pygments==0.3.0
     # via nbconvert
@@ -392,7 +392,7 @@ traitlets==5.14.3
     #   qtconsole
 types-python-dateutil==2.9.0.20240316
     # via arrow
-typing-extensions==4.11.0
+typing-extensions==4.12.0
     # via
     #   -c ./base.txt
     #   -c ./test.txt
 
@@ -12,7 +12,7 @@ python-docx==1.1.2
     # via
     #   -c ././deps/constraints.txt
     #   -r ./extra-docx.in
-typing-extensions==4.11.0
+typing-extensions==4.12.0
     # via
     #   -c ./base.txt
     #   python-docx
@@ -14,7 +14,7 @@ python-docx==1.1.2
     # via
     #   -c ././deps/constraints.txt
     #   -r ./extra-odt.in
-typing-extensions==4.11.0
+typing-extensions==4.12.0
     # via
     #   -c ./base.txt
     #   python-docx
@@ -8,7 +8,7 @@ attrdict==2.0.1
     # via unstructured-paddleocr
 babel==2.15.0
     # via flask-babel
-bce-python-sdk==0.9.10
+bce-python-sdk==0.9.11
     # via visualdl
 blinker==1.8.2
     # via flask
@@ -45,7 +45,7 @@ flask==3.0.3
     #   visualdl
 flask-babel==4.0.0
     # via visualdl
-fonttools==4.51.0
+fonttools==4.52.1
     # via matplotlib
 future==1.0.0
     # via bce-python-sdk
@@ -200,7 +200,7 @@ six==1.16.0
     #   imgaug
     #   python-dateutil
     #   visualdl
-tifffile==2024.5.10
+tifffile==2024.5.22
     # via scikit-image
 tqdm==4.66.4
     # via
 
@@ -39,7 +39,7 @@ filelock==3.14.0
     #   transformers
 flatbuffers==24.3.25
     # via onnxruntime
-fonttools==4.51.0
+fonttools==4.52.1
     # via matplotlib
 fsspec==2024.5.0
     # via
@@ -118,7 +118,7 @@ numpy==1.26.4
     #   transformers
 omegaconf==2.3.0
     # via effdet
-onnx==1.16.0
+onnx==1.16.1
     # via
     #   -r ./extra-pdf-image.in
     #   unstructured-inference
@@ -278,7 +278,7 @@ tqdm==4.66.4
     #   transformers
 transformers==4.41.1
     # via unstructured-inference
-typing-extensions==4.11.0
+typing-extensions==4.12.0
     # via
     #   -c ./base.txt
     #   huggingface-hub
 
@@ -102,7 +102,7 @@ tqdm==4.66.4
     #   transformers
 transformers==4.41.1
     # via -r ./huggingface.in
-typing-extensions==4.11.0
+typing-extensions==4.12.0
     # via
     #   -c ./base.txt
     #   huggingface-hub