|
1 | 1 | = Migrate to RAGStack
|
2 | 2 |
|
3 |
| -Migrating an existing LangChain application to RAGStack is easy - just change your `requirements.txt` or `pyproject.toml` file to use `ragstack-ai`. |
| 3 | +Migrating existing LangChain or LlamaIndex applications to RAGStack is easy - just change your `requirements.txt` or `pyproject.toml` file to use `ragstack-ai`. |
4 | 4 |
|
5 |
| -RAGStack contains the below packages as of version 0.3.1. When RAGStack is installed, these packages are replaced with the stable, tested `ragstack-ai` versions listed below. For the latest list, see xref:ROOT:changelog.adoc[]. |
| 5 | +RAGStack contains the below packages as of version `0.6.0`. When RAGStack is installed, these packages are replaced with the stable, tested `ragstack-ai` versions listed here. For the latest list, see xref:ROOT:changelog.adoc[]. |
6 | 6 | [%autowidth]
|
7 | 7 | [cols="2*",options="header"]
|
8 | 8 | |===
|
9 | 9 | | Library | Version
|
10 | 10 |
|
11 | 11 | | astrapy
|
12 |
| -| >=0.6.2,<0.7.0 |
| 12 | +| >=0.7.0,<0.8.0 |
13 | 13 |
|
14 | 14 | | cassio
|
15 | 15 | | >=0.1.3,<0.2.0
|
16 | 16 |
|
17 | 17 | | langchain
|
18 |
| -| ==0.0.349 |
| 18 | +| https://datastax.github.io/ragstack-ai/api_reference/0.6.0/langchain[==0.1.4]{external-link-icon} |
19 | 19 |
|
20 | 20 | | llama-index
|
21 |
| -| ==0.9.14 |
| 21 | +| ==0.9.34 |
22 | 22 |
|
23 | 23 | | unstructured
|
24 | 24 | | >=0.10,<0.11
|
25 |
| - |
26 | 25 | |===
|
27 | 26 |
|
28 |
| -== Example migration |
| 27 | +== Example LangChain migration |
29 | 28 |
|
30 | 29 | Here is a simple LangChain application that loads a dataset from HuggingFace and embeds the document objects in AstraDB.
|
31 | 30 |
|
32 |
| -.migration.py |
| 31 | +.langchain-migration.py |
33 | 32 | [%collapsible%open]
|
34 | 33 | ====
|
35 | 34 | [source,python]
|
@@ -211,7 +210,200 @@ yarl 1.9.4
|
211 | 210 | +
|
212 | 211 | [source,python]
|
213 | 212 | ----
|
214 |
| -python3 migration.py |
| 213 | +python3 langchain-migration.py |
215 | 214 | ----
|
216 | 215 |
|
217 | 216 | ...and you should see the same output as before, with no changes to your code required!
|
| 217 | + |
| 218 | +== Example LlamaIndex migration |
| 219 | + |
| 220 | +Here is an application that uses LlamaIndex to index a set of documents. |
| 221 | + |
| 222 | +.llama-migration.py |
| 223 | +[%collapsible%open] |
| 224 | +==== |
| 225 | +[source,python] |
| 226 | +---- |
| 227 | +import os |
| 228 | +from llama_index.llama_dataset import download_llama_dataset |
| 229 | +from llama_index.vector_stores import AstraDBVectorStore |
| 230 | +from llama_index import VectorStoreIndex, SimpleDirectoryReader, StorageContext |
| 231 | +
|
| 232 | +# Download and load dataset |
| 233 | +dataset = download_llama_dataset("PaulGrahamEssayDataset", "./data") |
| 234 | +documents = SimpleDirectoryReader("./data/source_files").load_data() |
| 235 | +
|
| 236 | +# Display basic information about the documents |
| 237 | +print(f"Total documents: {len(documents)}") |
| 238 | +first_doc = documents[0] |
| 239 | +print(f"First document, id: {first_doc.doc_id}") |
| 240 | +print(f"First document, hash: {first_doc.hash}") |
| 241 | +print(f"First document, text ({len(first_doc.text)} characters):\n{'=' * 20}\n{first_doc.text[:360]} ...") |
| 242 | +
|
| 243 | +# Setup AstraDB Vector Store |
| 244 | +astra_db_store = AstraDBVectorStore( |
| 245 | + token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"), |
| 246 | + api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"), |
| 247 | + collection_name="test", |
| 248 | + embedding_dimension=1536 |
| 249 | +) |
| 250 | +
|
| 251 | +# Create Storage Context and Index |
| 252 | +storage_context = StorageContext.from_defaults(vector_store=astra_db_store) |
| 253 | +index = VectorStoreIndex.from_documents(documents, storage_context=storage_context) |
| 254 | +
|
| 255 | +# Query the index |
| 256 | +def execute_query(query_string, mode="default", top_k=3, mmr_prefetch_factor=None): |
| 257 | + retriever = index.as_retriever( |
| 258 | + vector_store_query_mode=mode, |
| 259 | + similarity_top_k=top_k, |
| 260 | + vector_store_kwargs={"mmr_prefetch_factor": mmr_prefetch_factor} if mmr_prefetch_factor else {} |
| 261 | + ) |
| 262 | +
|
| 263 | + nodes_with_scores = retriever.retrieve(query_string) |
| 264 | +
|
| 265 | + print(query_string) |
| 266 | + print(f"Found {len(nodes_with_scores)} nodes.") |
| 267 | + for idx, node_with_score in enumerate(nodes_with_scores): |
| 268 | + print(f" [{idx}] score = {node_with_score.score}") |
| 269 | + print(f" id = {node_with_score.node.node_id}") |
| 270 | + print(f" text = {node_with_score.node.text[:90]} ...") |
| 271 | +
|
| 272 | +# Execute queries |
| 273 | +query_string_1 = "Why did the author choose to work on AI?" |
| 274 | +execute_query(query_string_1) |
| 275 | +execute_query(query_string_1, mode="mmr", mmr_prefetch_factor=4) |
| 276 | +---- |
| 277 | +==== |
| 278 | + |
| 279 | +. This application requires installation of the following packages: |
| 280 | ++ |
| 281 | +[source,python] |
| 282 | +---- |
| 283 | +pip install llama-index |
| 284 | +---- |
| 285 | ++ |
| 286 | +. Your application is tested and working at `llama-index` version `0.9.29`. But then, LlamaIndex version `0.10.1` changes the module to split every integration into its own PyPi package. Oh no, your application no longer works! |
| 287 | +. You decide to use RAGStack's pinned, tested version of LlamaIndex (currently `0.9.34`) instead of the latest version of LlamaIndex(`0.10.1`), to avoid this sudden change happening again in the future. |
| 288 | +. Install the `ragstack-ai` package with the `--upgrade-strategy="only-if-needed"` option. |
| 289 | +This ensures pip will not upgrade any packages that are already installed, unless required by the `ragstack-ai` package. |
| 290 | ++ |
| 291 | +[source,python] |
| 292 | +---- |
| 293 | +pip install ragstack-ai --upgrade-strategy="only-if-needed" |
| 294 | +---- |
| 295 | ++ |
| 296 | +[NOTE] |
| 297 | +==== |
| 298 | +If you're having trouble with your migration, try uninstalling your current LlamaIndex packages and reinstalling the `ragstack-ai` package. |
| 299 | +[source,python] |
| 300 | +---- |
| 301 | +pip uninstall llama-index-agent-openai llama-index-core llama-index-embeddings-openai llama-index-legacy llama-index-llms-openai llama-index-multi-modal-llms-openai llama-index-question-gen-openai llama-index-readers-file llama-index-program-openai |
| 302 | +Successfully uninstalled llama-index-0.9.29 |
| 303 | +pip install ragstack-ai --upgrade-strategy="only-if-needed" |
| 304 | +---- |
| 305 | +==== |
| 306 | ++ |
| 307 | +. Once the `ragstack-ai` package is installed, run `pip list` to see your current list of packages. |
| 308 | +Notice that the installed version of llama-index is `0.9.34`. |
| 309 | ++ |
| 310 | +.Pip list |
| 311 | +[%collapsible%open] |
| 312 | +==== |
| 313 | +[source,console] |
| 314 | +---- |
| 315 | +Package Version |
| 316 | +------------------- ------------ |
| 317 | +aiohttp 3.9.1 |
| 318 | +aiosignal 1.3.1 |
| 319 | +annotated-types 0.6.0 |
| 320 | +anyio 4.2.0 |
| 321 | +astrapy 0.7.4 |
| 322 | +attrs 23.2.0 |
| 323 | +backoff 2.2.1 |
| 324 | +beautifulsoup4 4.12.3 |
| 325 | +cassandra-driver 3.29.0 |
| 326 | +cassio 0.1.4 |
| 327 | +certifi 2023.11.17 |
| 328 | +chardet 5.2.0 |
| 329 | +charset-normalizer 3.3.2 |
| 330 | +click 8.1.7 |
| 331 | +dataclasses-json 0.6.3 |
| 332 | +Deprecated 1.2.14 |
| 333 | +deprecation 2.1.0 |
| 334 | +distro 1.9.0 |
| 335 | +emoji 2.10.0 |
| 336 | +filetype 1.2.0 |
| 337 | +frozenlist 1.4.1 |
| 338 | +fsspec 2023.12.2 |
| 339 | +geomet 0.2.1.post1 |
| 340 | +greenlet 3.0.3 |
| 341 | +h11 0.14.0 |
| 342 | +h2 4.1.0 |
| 343 | +hpack 4.0.0 |
| 344 | +httpcore 1.0.2 |
| 345 | +httpx 0.25.2 |
| 346 | +hyperframe 6.0.1 |
| 347 | +idna 3.6 |
| 348 | +joblib 1.3.2 |
| 349 | +jsonpatch 1.33 |
| 350 | +jsonpointer 2.4 |
| 351 | +langchain 0.1.4 |
| 352 | +langchain-community 0.0.16 |
| 353 | +langchain-core 0.1.16 |
| 354 | +langchain-openai 0.0.3 |
| 355 | +langdetect 1.0.9 |
| 356 | +langsmith 0.0.83 |
| 357 | +llama-index 0.9.34 |
| 358 | +lxml 5.1.0 |
| 359 | +marshmallow 3.20.2 |
| 360 | +multidict 6.0.4 |
| 361 | +mypy-extensions 1.0.0 |
| 362 | +nest-asyncio 1.6.0 |
| 363 | +networkx 3.2.1 |
| 364 | +nltk 3.8.1 |
| 365 | +numpy 1.26.3 |
| 366 | +openai 1.9.0 |
| 367 | +packaging 23.2 |
| 368 | +pandas 2.2.0 |
| 369 | +pip 23.3.1 |
| 370 | +pydantic 2.5.3 |
| 371 | +pydantic_core 2.14.6 |
| 372 | +python-dateutil 2.8.2 |
| 373 | +python-dotenv 1.0.1 |
| 374 | +python-iso639 2024.1.2 |
| 375 | +python-magic 0.4.27 |
| 376 | +pytz 2023.3.post1 |
| 377 | +PyYAML 6.0.1 |
| 378 | +ragstack-ai 0.6.0 |
| 379 | +rapidfuzz 3.6.1 |
| 380 | +regex 2023.12.25 |
| 381 | +requests 2.31.0 |
| 382 | +setuptools 68.2.2 |
| 383 | +six 1.16.0 |
| 384 | +sniffio 1.3.0 |
| 385 | +soupsieve 2.5 |
| 386 | +SQLAlchemy 2.0.25 |
| 387 | +tabulate 0.9.0 |
| 388 | +tenacity 8.2.3 |
| 389 | +tiktoken 0.5.2 |
| 390 | +toml 0.10.2 |
| 391 | +tqdm 4.66.1 |
| 392 | +typing_extensions 4.9.0 |
| 393 | +typing-inspect 0.9.0 |
| 394 | +tzdata 2023.4 |
| 395 | +unstructured 0.10.30 |
| 396 | +urllib3 2.1.0 |
| 397 | +wrapt 1.16.0 |
| 398 | +yarl 1.9.4 |
| 399 | +---- |
| 400 | +==== |
| 401 | ++ |
| 402 | +. Run your application... |
| 403 | ++ |
| 404 | +[source,python] |
| 405 | +---- |
| 406 | +python3 llama-migration.py |
| 407 | +---- |
| 408 | ++ |
| 409 | +...and you should see the same output as before, with no changes to your code required! |
0 commit comments