|
| 1 | += Migration |
| 2 | + |
| 3 | +Migrating an existing LangChain application to RAGStack is easy - just change your `requirements.txt` or `pyproject.toml` file to use `ragstack-ai`. |
| 4 | + |
| 5 | +RAGStack contains the below packages as of version 0.3.1. When RAGStack is installed, these packages are replaced with the stable, tested `ragstack-ai` versions listed below. For the latest list, see xref:ROOT:changelog.adoc[]. |
| 6 | +[%autowidth] |
| 7 | +[cols="2*",options="header"] |
| 8 | +|=== |
| 9 | +| Library | Version |
| 10 | + |
| 11 | +| astrapy |
| 12 | +| >=0.6.2,<0.7.0 |
| 13 | + |
| 14 | +| cassio |
| 15 | +| >=0.1.3,<0.2.0 |
| 16 | + |
| 17 | +| langchain |
| 18 | +| ==0.0.349 |
| 19 | + |
| 20 | +| llama-index |
| 21 | +| ==0.9.14 |
| 22 | + |
| 23 | +| unstructured |
| 24 | +| >=0.10,<0.11 |
| 25 | + |
| 26 | +|=== |
| 27 | + |
| 28 | +== Example migration |
| 29 | + |
| 30 | +Here is a simple LangChain application that loads a dataset from HuggingFace and embeds the document objects in AstraDB. |
| 31 | + |
| 32 | +.migration.py |
| 33 | +[%collapsible%open] |
| 34 | +==== |
| 35 | +[source,python] |
| 36 | +---- |
| 37 | +import os |
| 38 | +from datasets import load_dataset |
| 39 | +from dotenv import load_dotenv |
| 40 | +from langchain.document_loaders import PyPDFDirectoryLoader |
| 41 | +from langchain.vectorstores.astradb import AstraDB |
| 42 | +from langchain.embeddings import OpenAIEmbeddings |
| 43 | +from langchain.schema import Document |
| 44 | +
|
| 45 | +load_dotenv() |
| 46 | +
|
| 47 | +ASTRA_DB_APPLICATION_TOKEN = os.environ.get("ASTRA_DB_APPLICATION_TOKEN") |
| 48 | +ASTRA_DB_API_ENDPOINT = os.environ.get("ASTRA_DB_API_ENDPOINT") |
| 49 | +OPEN_AI_API_KEY = os.environ.get("OPENAI_API_KEY") |
| 50 | +ASTRA_DB_COLLECTION = os.environ.get("ASTRA_DB_COLLECTION") |
| 51 | +
|
| 52 | +embedding = OpenAIEmbeddings() |
| 53 | +vstore = AstraDB( |
| 54 | + embedding=embedding, |
| 55 | + collection_name="test3", |
| 56 | + token=os.environ["ASTRA_DB_APPLICATION_TOKEN"], |
| 57 | + api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"], |
| 58 | +) |
| 59 | +print(vstore.astra_db.collection(ASTRA_DB_COLLECTION).find()) |
| 60 | +
|
| 61 | +philo_dataset = load_dataset("datastax/philosopher-quotes")["train"] |
| 62 | +print("An example entry:") |
| 63 | +print(philo_dataset[16]) |
| 64 | +
|
| 65 | +docs = [] |
| 66 | +for entry in philo_dataset: |
| 67 | + metadata = {"author": entry["author"]} |
| 68 | + if entry["tags"]: |
| 69 | + for tag in entry["tags"].split(";"): |
| 70 | + metadata[tag] = "y" |
| 71 | + doc = Document(page_content=entry["quote"], metadata=metadata) |
| 72 | + docs.append(doc) |
| 73 | +
|
| 74 | +inserted_ids = vstore.add_documents(docs) |
| 75 | +print(f"\nInserted {len(inserted_ids)} documents.") |
| 76 | +
|
| 77 | +print(vstore.astra_db.collection(ASTRA_DB_COLLECTION).find()) |
| 78 | +
|
| 79 | +vstore.clear() |
| 80 | +---- |
| 81 | +==== |
| 82 | + |
| 83 | +. This application requires installation of the following packages: |
| 84 | ++ |
| 85 | +[source,python] |
| 86 | +---- |
| 87 | +pip install langchain datasets openai astrapy tiktoken python-dotenv |
| 88 | +---- |
| 89 | ++ |
| 90 | +. You decide you want to use RAGStack's pinned, tested version of LangChain (`langchain-0.0.349`) instead of the latest version of LangChain (`langchain-0.0.350`). |
| 91 | ++ |
| 92 | +. Install the `ragstack-ai` package with the `--upgrade-strategy="only-if-needed"` option. |
| 93 | +This ensures pip will not upgrade any packages that are already installed, unless required by the `ragstack-ai` package. |
| 94 | ++ |
| 95 | +[source,python] |
| 96 | +---- |
| 97 | +pip install ragstack-ai --upgrade-strategy="only-if-needed" |
| 98 | +---- |
| 99 | ++ |
| 100 | +[NOTE] |
| 101 | +==== |
| 102 | +If you're having trouble with your migration, try uninstalling your current LangChain package and reinstalling the `ragstack-ai` package. |
| 103 | +[source,python] |
| 104 | +---- |
| 105 | +pip uninstall langchain |
| 106 | +Successfully uninstalled langchain-0.0.350 |
| 107 | +pip install ragstack-ai --upgrade-strategy="only-if-needed" |
| 108 | +---- |
| 109 | +==== |
| 110 | ++ |
| 111 | +. Once the `ragstack-ai` package is installed, run `pip list` to see your current list of packages. |
| 112 | +Notice that the installed version of langchain is `0.0.349`. |
| 113 | ++ |
| 114 | +.Pip list |
| 115 | +[%collapsible%open] |
| 116 | +==== |
| 117 | +[source,console] |
| 118 | +---- |
| 119 | +Package Version |
| 120 | +------------------- ------------ |
| 121 | +aiohttp 3.9.1 |
| 122 | +aiosignal 1.3.1 |
| 123 | +annotated-types 0.6.0 |
| 124 | +anyio 4.1.0 |
| 125 | +astrapy 0.6.2 |
| 126 | +attrs 23.1.0 |
| 127 | +backoff 2.2.1 |
| 128 | +beautifulsoup4 4.12.2 |
| 129 | +cassandra-driver 3.28.0 |
| 130 | +cassio 0.1.3 |
| 131 | +certifi 2023.11.17 |
| 132 | +chardet 5.2.0 |
| 133 | +charset-normalizer 3.3.2 |
| 134 | +click 8.1.7 |
| 135 | +dataclasses-json 0.6.3 |
| 136 | +datasets 2.15.0 |
| 137 | +Deprecated 1.2.14 |
| 138 | +dill 0.3.7 |
| 139 | +distro 1.8.0 |
| 140 | +emoji 2.9.0 |
| 141 | +filelock 3.13.1 |
| 142 | +filetype 1.2.0 |
| 143 | +frozenlist 1.4.0 |
| 144 | +fsspec 2023.10.0 |
| 145 | +geomet 0.2.1.post1 |
| 146 | +greenlet 3.0.2 |
| 147 | +h11 0.14.0 |
| 148 | +h2 4.1.0 |
| 149 | +hpack 4.0.0 |
| 150 | +httpcore 1.0.2 |
| 151 | +httpx 0.25.2 |
| 152 | +huggingface-hub 0.19.4 |
| 153 | +hyperframe 6.0.1 |
| 154 | +idna 3.6 |
| 155 | +joblib 1.3.2 |
| 156 | +jsonpatch 1.33 |
| 157 | +jsonpointer 2.4 |
| 158 | +langchain 0.0.349 |
| 159 | +langchain-community 0.0.1 |
| 160 | +langchain-core 0.0.13 |
| 161 | +langdetect 1.0.9 |
| 162 | +langsmith 0.0.69 |
| 163 | +llama-index 0.9.14 |
| 164 | +lxml 4.9.3 |
| 165 | +marshmallow 3.20.1 |
| 166 | +multidict 6.0.4 |
| 167 | +multiprocess 0.70.15 |
| 168 | +mypy-extensions 1.0.0 |
| 169 | +nest-asyncio 1.5.8 |
| 170 | +nltk 3.8.1 |
| 171 | +numpy 1.26.2 |
| 172 | +openai 1.3.8 |
| 173 | +packaging 23.2 |
| 174 | +pandas 2.1.4 |
| 175 | +pip 23.2.1 |
| 176 | +pyarrow 14.0.1 |
| 177 | +pyarrow-hotfix 0.6 |
| 178 | +pydantic 2.5.2 |
| 179 | +pydantic_core 2.14.5 |
| 180 | +python-dateutil 2.8.2 |
| 181 | +python-dotenv 1.0.0 |
| 182 | +python-iso639 2023.12.11 |
| 183 | +python-magic 0.4.27 |
| 184 | +pytz 2023.3.post1 |
| 185 | +PyYAML 6.0.1 |
| 186 | +ragstack-ai 0.3.1 |
| 187 | +rapidfuzz 3.5.2 |
| 188 | +regex 2023.10.3 |
| 189 | +requests 2.31.0 |
| 190 | +setuptools 65.5.0 |
| 191 | +six 1.16.0 |
| 192 | +sniffio 1.3.0 |
| 193 | +soupsieve 2.5 |
| 194 | +SQLAlchemy 2.0.23 |
| 195 | +tabulate 0.9.0 |
| 196 | +tenacity 8.2.3 |
| 197 | +tiktoken 0.5.2 |
| 198 | +tqdm 4.66.1 |
| 199 | +typing_extensions 4.9.0 |
| 200 | +typing-inspect 0.9.0 |
| 201 | +tzdata 2023.3 |
| 202 | +unstructured 0.10.30 |
| 203 | +urllib3 2.1.0 |
| 204 | +wrapt 1.16.0 |
| 205 | +xxhash 3.4.1 |
| 206 | +yarl 1.9.4 |
| 207 | +---- |
| 208 | +==== |
| 209 | ++ |
| 210 | +. Run your application... |
| 211 | ++ |
| 212 | +[source,python] |
| 213 | +---- |
| 214 | +python3 migration.py |
| 215 | +---- |
| 216 | + |
| 217 | +...and you should see the same output as before, with no changes to your code required! |
0 commit comments