Skip to content

Commit 8c72159

Browse files
authored
Upgrade Chroma to the latest version + change to uv (#1111)
* use uv in AutoRAG package * Change to the latest langchain-upstage library * Fix error at prompt.py * don't use IncludeEnum from Chroma (deprecated) * use uv at test.yml * use uv run
1 parent 9a97fd5 commit 8c72159

File tree

7 files changed

+8489
-89
lines changed

7 files changed

+8489
-89
lines changed

.github/workflows/test.yml

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,10 @@ jobs:
1616
runs-on: ubuntu-latest
1717
steps:
1818
- uses: actions/checkout@v4
19-
- uses: actions/setup-python@v5
20-
with:
21-
python-version: '3.10'
2219
- uses: actions/setup-java@v4
2320
with:
2421
distribution: 'zulu'
2522
java-version: '17'
26-
- name: Upgrade pip
27-
run: |
28-
python3 -m pip install --upgrade pip
2923
- name: Update apt-get
3024
run: |
3125
sudo apt-get update
@@ -38,24 +32,30 @@ jobs:
3832
- name: Install tesseract
3933
run: |
4034
sudo apt-get install tesseract-ocr
35+
- name: Install uv
36+
uses: astral-sh/setup-uv@v5
37+
- name: Install Python 3.10
38+
run: uv python install 3.10
39+
- name: Install Venv
40+
run: uv venv
4141
- name: Install AutoRAG
4242
run: |
43-
pip install -e './autorag[ko,dev,parse,ja]'
43+
uv pip install -e './autorag[all]'
4444
- name: Install dependencies
4545
run: |
46-
pip install -r tests/requirements.txt
46+
uv pip install -r tests/requirements.txt
4747
- name: Upgrade pyOpenSSL
4848
run: |
49-
pip install --upgrade pyOpenSSL
49+
uv pip install --upgrade pyOpenSSL
5050
- name: Install NLTK and download model
5151
run: |
52-
pip install nltk
53-
python3 -c "import nltk; nltk.download('punkt_tab')"
54-
python3 -c "import nltk; nltk.download('averaged_perceptron_tagger_eng')"
52+
uv pip install nltk
53+
uv run python -c "import nltk; nltk.download('punkt_tab')"
54+
uv run python -c "import nltk; nltk.download('averaged_perceptron_tagger_eng')"
5555
- name: delete tests package
56-
run: python3 tests/delete_tests.py
56+
run: uv run python tests/delete_tests.py
5757
- name: Run AutoRAG tests
5858
env:
5959
PYTHONPATH: ${PYTHONPATH}:./autorag
6060
run: |
61-
python3 -m pytest -o log_cli=true --log-cli-level=INFO -n auto tests/autorag
61+
uv run python -m pytest -o log_cli=true --log-cli-level=INFO -n auto tests/autorag

autorag/autorag/data/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
DirectoryLoader,
1717
)
1818
from langchain_unstructured import UnstructuredLoader
19-
from langchain_upstage import UpstageDocumentParseLoader
19+
from langchain_upstage import UpstageLayoutAnalysisLoader
2020

2121
from llama_index.core.node_parser import (
2222
TokenTextSplitter,
@@ -59,7 +59,7 @@
5959
# 6. All files
6060
"directory": DirectoryLoader,
6161
"unstructured": UnstructuredLoader,
62-
"upstagedocumentparse": UpstageDocumentParseLoader,
62+
"upstagedocumentparse": UpstageLayoutAnalysisLoader,
6363
}
6464

6565
chunk_modules = {

autorag/autorag/data/qa/query/prompt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@
179179
role=MessageRole.USER,
180180
content="""Document 1: ヌエヴォ·ラレド自治体はメキシコのタマウリパス州にあります。
181181
シウダー・デポルティーバ(スポーツ・シティ)は、
182-
メキシコのヌエボ・ラレドにある複合スポーツ施設です。
182+
メキシコのヌエボ・ラレドにある複合スポーツ施設です。""",
183183
),
184184
ChatMessage(
185185
role=MessageRole.ASSISTANT,

autorag/autorag/vectordb/chroma.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
AsyncHttpClient,
1010
)
1111
from chromadb.api.models.AsyncCollection import AsyncCollection
12-
from chromadb.api.types import IncludeEnum, QueryResult
12+
from chromadb.api.types import QueryResult
1313

1414
from autorag.utils.util import apply_recursive
1515
from autorag.vectordb.base import BaseVectorStore
@@ -74,11 +74,9 @@ async def add(self, ids: List[str], texts: List[str]):
7474

7575
async def fetch(self, ids: List[str]) -> List[List[float]]:
7676
if isinstance(self.collection, AsyncCollection):
77-
fetch_result = await self.collection.get(
78-
ids, include=[IncludeEnum.embeddings]
79-
)
77+
fetch_result = await self.collection.get(ids, include=["embeddings"])
8078
else:
81-
fetch_result = self.collection.get(ids, include=[IncludeEnum.embeddings])
79+
fetch_result = self.collection.get(ids, include=["embeddings"])
8280
fetch_embeddings = fetch_result["embeddings"]
8381
return fetch_embeddings
8482

autorag/pyproject.toml

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,73 @@ classifiers = [
2424
"Topic :: Software Development :: Libraries :: Python Modules",
2525
]
2626
urls = { Homepage = "https://github.com/Marker-Inc-Korea/AutoRAG" }
27-
dynamic = ["version", "dependencies"]
27+
dynamic = ["version"]
28+
dependencies = [
29+
"pydantic<2.10.0", # incompatible with llama index
30+
"numpy<2.0.0", # temporal not using numpy 2.0.0
31+
"pandas>=2.1.0",
32+
"tqdm",
33+
"tiktoken>=0.7.0", # for counting token
34+
"openai>=1.0.0",
35+
"rank_bm25", # for bm25 retrieval
36+
"pyyaml", # for yaml file
37+
"pyarrow", # for pandas with parquet
38+
"fastparquet", # for pandas with parquet
39+
"sacrebleu", # for bleu score
40+
"evaluate", # for meteor and other scores
41+
"rouge_score", # for rouge score
42+
"rich", # for pretty logging
43+
"click", # for cli
44+
"cohere>=5.8.0", # for cohere services
45+
"tokenlog>=0.0.2", # for token logging
46+
"aiohttp", # for async http requests
47+
"voyageai", # for voyageai reranker
48+
"mixedbread-ai", # for mixedbread-ai reranker
49+
"llama-index-llms-bedrock",
50+
"scikit-learn",
51+
"emoji",
52+
53+
# Vector DB
54+
"pymilvus>=2.3.0", # for using milvus vectordb
55+
"chromadb>=1.0.0", # for chroma vectordb
56+
"weaviate-client", # for weaviate vectordb
57+
"pinecone[grpc]", # for pinecone vectordb
58+
"couchbase", # for couchbase vectordb
59+
"qdrant-client", # for qdrant vectordb
60+
61+
# API server
62+
"quart",
63+
"pyngrok",
64+
65+
# LlamaIndex
66+
"llama-index",
67+
"llama-index-core",
68+
"llama-index-readers-file",
69+
"llama-index-embeddings-openai",
70+
"llama-index-embeddings-ollama",
71+
"llama-index-embeddings-openai-like",
72+
"llama-index-llms-openai",
73+
"llama-index-llms-openai-like",
74+
"llama-index-retrievers-bm25",
75+
76+
# WebUI
77+
"streamlit",
78+
"gradio",
79+
80+
# Langchain
81+
"langchain-core",
82+
"langchain-unstructured",
83+
"langchain-upstage",
84+
"langchain-community",
85+
86+
# autorag dashboard
87+
"panel",
88+
"seaborn",
89+
"ipykernel",
90+
"ipywidgets",
91+
"ipywidgets_bokeh",
92+
]
93+
2894

2995
[tool.poetry]
3096
name = "AutoRAG"
@@ -34,7 +100,6 @@ authors = ["Marker-Inc <vkehfdl1@gmail.com>"]
34100

35101
[tool.setuptools.dynamic]
36102
version = { file = ["autorag/VERSION"] }
37-
dependencies = { file = ["requirements.txt"] }
38103

39104
[tool.setuptools]
40105
include-package-data = true

autorag/requirements.txt

Lines changed: 0 additions & 65 deletions
This file was deleted.

0 commit comments

Comments
 (0)