Skip to content

Commit 0fd2910

Browse files
authored
feat: add llama-parse as dependency and bump llama-index to 0.9.48 (#279)
1 parent 2674d83 commit 0fd2910

16 files changed

+139
-71
lines changed

.github/workflows/_run_e2e_tests.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ jobs:
8888
ASTRA_DB_ID: "${{ steps.astra-db.outputs.db_id }}"
8989
OPENAI_API_KEY: "${{ secrets.E2E_TESTS_OPEN_AI_KEY }}"
9090
LANGCHAIN_API_KEY: "${{ secrets.E2E_TESTS_LANGCHAIN_API_KEY }}"
91+
LLAMA_CLOUD_API_KEY: "${{ secrets.E2E_TESTS_LLAMA_CLOUD_API_KEY }}"
9192
GCLOUD_ACCOUNT_KEY_JSON: "${{ secrets.E2E_TESTS_GCLOUD_ACCOUNT_KEY_JSON }}"
9293
run: |
9394
source scripts/ci-common-env.sh
@@ -114,6 +115,7 @@ jobs:
114115
HUGGINGFACE_HUB_KEY: "${{ secrets.E2E_TESTS_HUGGINGFACE_HUB_KEY }}"
115116
NVIDIA_API_KEY: "${{ secrets.E2E_TESTS_NVIDIA_API_KEY }}"
116117
LANGCHAIN_API_KEY: "${{ secrets.E2E_TESTS_LANGCHAIN_API_KEY }}"
118+
LLAMA_CLOUD_API_KEY: "${{ secrets.E2E_TESTS_LLAMA_CLOUD_API_KEY }}"
117119
run: |
118120
source scripts/ci-common-env.sh
119121
if [ "${{ inputs.suite-name == 'ragstack' }}" == "true" ]; then

pyproject.toml

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,22 @@ authors = ["DataStax"]
77
readme = "PACKAGE_README.md"
88
repository = "https://github.com/datastax/ragstack-ai"
99
documentation = "https://docs.datastax.com/en/ragstack"
10-
packages = [
11-
{include = "ragstack"}
12-
]
10+
packages = [{ include = "ragstack" }]
1311

1412
[tool.poetry.dependencies]
1513
python = ">=3.9,<4.0"
1614
astrapy = "~0.7.0"
1715
cassio = "~0.1.3"
1816
unstructured = "^0.10"
19-
llama-index = { version = "0.9.34", extras = ["langchain"] }
20-
langchain = {version = "0.1.4"}
17+
llama-index = { version = "0.9.48", extras = ["langchain"] }
18+
llama-parse = { version = "0.1.4" }
19+
langchain = { version = "0.1.4" }
2120
langchain-core = "0.1.16"
2221
langchain-community = "0.0.16"
23-
langchain-openai = {version = "0.0.3"}
24-
langchain-google-genai = {version = "0.0.6", optional = true}
25-
langchain-google-vertexai = {version = "0.0.3", optional = true}
26-
langchain-nvidia-ai-endpoints = {version = "0.0.1", optional = true}
22+
langchain-openai = { version = "0.0.3" }
23+
langchain-google-genai = { version = "0.0.6", optional = true }
24+
langchain-google-vertexai = { version = "0.0.3", optional = true }
25+
langchain-nvidia-ai-endpoints = { version = "0.0.1", optional = true }
2726

2827
[tool.poetry.extras]
2928
langchain-google = ["langchain-google-genai", "langchain-google-vertexai"]
@@ -39,5 +38,4 @@ requires = ["poetry-core"]
3938
build-backend = "poetry.core.masonry.api"
4039

4140
[tool.poetry.dev-dependencies]
42-
yamllint = "^1.34.0"
43-
41+
yamllint = "^1.34.0"

ragstack-e2e-tests/.env.template

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,7 @@ VECTOR_DATABASE_TYPE=astradb
2929
# HUGGINGFACE_HUB_KEY=
3030

3131
# Nvidia
32-
# NVIDIA_API_KEY=
32+
# NVIDIA_API_KEY=
33+
34+
# LlamaIndex
35+
# LLAMA_CLOUD_API_KEY=

ragstack-e2e-tests/e2e_tests/langchain/test_compatibility_rag.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import logging
2-
import os
32
from typing import List
43

54
import pytest
@@ -15,6 +14,7 @@
1514
run_conversational_rag,
1615
)
1716
from e2e_tests.langchain.trulens import run_trulens_evaluation
17+
from e2e_tests.test_utils import get_local_resource_path
1818

1919
from langchain.chat_models import ChatOpenAI, AzureChatOpenAI, ChatVertexAI, BedrockChat
2020
from langchain.embeddings import (
@@ -341,12 +341,6 @@ def embed_query(self, text: str) -> List[float]:
341341
assert "Coffee Machine Ultra Cool" in response.content
342342

343343

344-
def get_local_resource_path(filename: str):
345-
dirname = os.path.dirname(__file__)
346-
e2e_tests_dir = os.path.dirname(dirname)
347-
return os.path.join(e2e_tests_dir, "resources", filename)
348-
349-
350344
@pytest.mark.parametrize("chat", ["vertex_gemini_pro_llm", "gemini_pro_llm"])
351345
def test_chat(chat, request, record_property):
352346
set_current_test_info(
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import pytest
2+
3+
from e2e_tests.conftest import (
4+
get_vector_store_handler,
5+
)
6+
7+
from e2e_tests.test_utils.vector_store_handler import (
8+
VectorStoreImplementation,
9+
)
10+
11+
12+
@pytest.fixture
13+
def astra_db():
14+
handler = get_vector_store_handler(VectorStoreImplementation.ASTRADB)
15+
context = handler.before_test()
16+
yield context
17+
handler.after_test()
18+
19+
20+
@pytest.fixture
21+
def cassandra():
22+
handler = get_vector_store_handler(VectorStoreImplementation.CASSANDRA)
23+
context = handler.before_test()
24+
yield context
25+
handler.after_test()

ragstack-e2e-tests/e2e_tests/llama_index/test_compatibility_rag.py

Lines changed: 10 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import logging
2-
import os
32

43
import pytest
54
from langchain.embeddings import VertexAIEmbeddings, HuggingFaceInferenceAPIEmbeddings
@@ -29,32 +28,15 @@
2928
from e2e_tests.conftest import (
3029
set_current_test_info,
3130
get_required_env,
32-
get_vector_store_handler,
3331
)
3432
from vertexai.vision_models import MultiModalEmbeddingModel, Image
3533

34+
from e2e_tests.test_utils import get_local_resource_path
3635
from e2e_tests.test_utils.vector_store_handler import (
37-
VectorStoreImplementation,
3836
VectorStoreTestContext,
3937
)
4038

4139

42-
@pytest.fixture
43-
def astra_db():
44-
handler = get_vector_store_handler(VectorStoreImplementation.ASTRADB)
45-
context = handler.before_test()
46-
yield context
47-
handler.after_test()
48-
49-
50-
@pytest.fixture
51-
def cassandra():
52-
handler = get_vector_store_handler(VectorStoreImplementation.CASSANDRA)
53-
context = handler.before_test()
54-
yield context
55-
handler.after_test()
56-
57-
5840
@pytest.fixture
5941
def openai_llm():
6042
return "openai", OpenAI(api_key=get_required_env("OPEN_AI_KEY"))
@@ -110,7 +92,7 @@ def bedrock_anthropic_llm():
11092
model="anthropic.claude-v2",
11193
aws_access_key_id=get_required_env("AWS_ACCESS_KEY_ID"),
11294
aws_secret_access_key=get_required_env("AWS_SECRET_ACCESS_KEY"),
113-
aws_region_name=get_required_env("BEDROCK_AWS_REGION"),
95+
region_name=get_required_env("BEDROCK_AWS_REGION"),
11496
)
11597

11698

@@ -120,7 +102,7 @@ def bedrock_meta_llm():
120102
model="meta.llama2-13b-chat-v1",
121103
aws_access_key_id=get_required_env("AWS_ACCESS_KEY_ID"),
122104
aws_secret_access_key=get_required_env("AWS_SECRET_ACCESS_KEY"),
123-
aws_region_name=get_required_env("BEDROCK_AWS_REGION"),
105+
region_name=get_required_env("BEDROCK_AWS_REGION"),
124106
)
125107

126108

@@ -138,12 +120,16 @@ def bedrock_titan_embedding():
138120

139121
@pytest.fixture
140122
def bedrock_cohere_embedding():
123+
import boto3
124+
141125
return (
142126
"bedrock-cohere",
143127
1024,
144-
BedrockEmbedding.from_credentials(
145-
model_name="cohere.embed-english-v3",
146-
aws_region=get_required_env("BEDROCK_AWS_REGION"),
128+
BedrockEmbedding(
129+
client=boto3.Session(
130+
region_name=get_required_env("BEDROCK_AWS_REGION")
131+
).client("bedrock-runtime"),
132+
model="cohere.embed-english-v3",
147133
),
148134
)
149135

@@ -339,12 +325,6 @@ def test_multimodal(vector_store, embedding, llm, request):
339325
assert "Coffee Machine Ultra Cool" in response
340326

341327

342-
def get_local_resource_path(filename: str):
343-
dirname = os.path.dirname(__file__)
344-
e2e_tests_dir = os.path.dirname(dirname)
345-
return os.path.join(e2e_tests_dir, "resources", filename)
346-
347-
348328
@pytest.mark.parametrize(
349329
"chat",
350330
["gemini_pro_llm", "vertex_gemini_pro_llm"],
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import pytest
2+
3+
try:
4+
from llama_parse import LlamaParse
5+
except ImportError:
6+
pytest.skip("llama_parse is not supported, skipping tests", allow_module_level=True)
7+
8+
from llama_index import (
9+
VectorStoreIndex,
10+
StorageContext,
11+
ServiceContext,
12+
)
13+
14+
from llama_index.embeddings import OpenAIEmbedding
15+
from llama_index.llms import OpenAI
16+
17+
from e2e_tests.conftest import (
18+
set_current_test_info,
19+
get_required_env,
20+
)
21+
from e2e_tests.test_utils import get_local_resource_path
22+
from e2e_tests.test_utils.vector_store_handler import (
23+
VectorStoreTestContext,
24+
)
25+
26+
27+
@pytest.fixture
28+
def llama_parse_text():
29+
return "text", LlamaParse(result_type="text")
30+
31+
32+
@pytest.fixture
33+
def llama_parse_markdown():
34+
return "markdown", LlamaParse(result_type="markdown")
35+
36+
37+
@pytest.mark.parametrize("vector_store", ["cassandra", "astra_db"])
38+
@pytest.mark.parametrize(
39+
"llama_parse_instance",
40+
["llama_parse_text", "llama_parse_markdown"],
41+
)
42+
def test_llama_parse(vector_store, llama_parse_instance, request):
43+
vector_store_context: VectorStoreTestContext = request.getfixturevalue(vector_store)
44+
lp_type, lp = request.getfixturevalue(llama_parse_instance)
45+
llm = OpenAI(api_key=get_required_env("OPEN_AI_KEY"))
46+
embedding = OpenAIEmbedding(api_key=get_required_env("OPEN_AI_KEY"))
47+
48+
set_current_test_info(
49+
"llama_index::llama_parse",
50+
f"{lp_type},{vector_store}",
51+
)
52+
vector_store = vector_store_context.new_llamaindex_vector_store(
53+
embedding_dimension=1536
54+
)
55+
storage_context = StorageContext.from_defaults(vector_store=vector_store)
56+
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embedding)
57+
58+
file_path = get_local_resource_path("tree.pdf")
59+
documents = lp.load_data(file_path)
60+
61+
index = VectorStoreIndex.from_documents(
62+
documents, storage_context=storage_context, service_context=service_context
63+
)
64+
65+
retriever = index.as_retriever()
66+
assert len(retriever.retrieve("What was Eldenroot?")) > 0
15.9 KB
Binary file not shown.

ragstack-e2e-tests/e2e_tests/test_utils/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@ def get_required_env(name) -> str:
1818
return value
1919

2020

21+
def get_local_resource_path(filename: str):
22+
dirname = os.path.dirname(__file__)
23+
e2e_tests_dir = os.path.dirname(dirname)
24+
return os.path.join(e2e_tests_dir, "resources", filename)
25+
26+
2127
def random_string() -> str:
2228
return str(uuid.uuid4()).split("-")[0]
2329

ragstack-e2e-tests/e2e_tests/test_utils/astradb_vector_store_handler.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -114,18 +114,8 @@ def search_documents(self, vector: List[float], limit: int) -> List[str]:
114114
return docs
115115

116116

117-
def metaclass_resolver(*classes):
118-
metaclass = tuple(set(type(cls) for cls in classes))
119-
metaclass = (
120-
metaclass[0]
121-
if len(metaclass) == 1
122-
else type("_".join(mcls.__name__ for mcls in metaclass), metaclass, {})
123-
) # class M_C
124-
return metaclass("_".join(cls.__name__ for cls in classes), classes, {})
125-
126-
127117
class EnhancedAstraDBLlamaIndexVectorStore(
128-
metaclass_resolver(EnhancedLlamaIndexVectorStore, AstraDBVectorStore)
118+
AstraDBVectorStore, EnhancedLlamaIndexVectorStore
129119
):
130120

131121
def put_document(

0 commit comments

Comments
 (0)