Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.10'
- uses: actions/setup-java@v4
with:
distribution: 'zulu'
java-version: '17'
- name: Upgrade pip
run: |
python3 -m pip install --upgrade pip
- name: Update apt-get
run: |
sudo apt-get update
Expand All @@ -38,24 +32,30 @@ jobs:
- name: Install tesseract
run: |
sudo apt-get install tesseract-ocr
- name: Install uv
uses: astral-sh/setup-uv@v5
- name: Install Python 3.10
run: uv python install 3.10
- name: Install Venv
run: uv venv
- name: Install AutoRAG
run: |
pip install -e './autorag[ko,dev,parse,ja]'
uv pip install -e './autorag[all]'
- name: Install dependencies
run: |
pip install -r tests/requirements.txt
uv pip install -r tests/requirements.txt
- name: Upgrade pyOpenSSL
run: |
pip install --upgrade pyOpenSSL
uv pip install --upgrade pyOpenSSL
- name: Install NLTK and download model
run: |
pip install nltk
python3 -c "import nltk; nltk.download('punkt_tab')"
python3 -c "import nltk; nltk.download('averaged_perceptron_tagger_eng')"
uv pip install nltk
uv run python -c "import nltk; nltk.download('punkt_tab')"
uv run python -c "import nltk; nltk.download('averaged_perceptron_tagger_eng')"
- name: delete tests package
run: python3 tests/delete_tests.py
run: uv run python tests/delete_tests.py
- name: Run AutoRAG tests
env:
PYTHONPATH: ${PYTHONPATH}:./autorag
run: |
python3 -m pytest -o log_cli=true --log-cli-level=INFO -n auto tests/autorag
uv run python -m pytest -o log_cli=true --log-cli-level=INFO -n auto tests/autorag
4 changes: 2 additions & 2 deletions autorag/autorag/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
DirectoryLoader,
)
from langchain_unstructured import UnstructuredLoader
from langchain_upstage import UpstageDocumentParseLoader
from langchain_upstage import UpstageLayoutAnalysisLoader

from llama_index.core.node_parser import (
TokenTextSplitter,
Expand Down Expand Up @@ -59,7 +59,7 @@
# 6. All files
"directory": DirectoryLoader,
"unstructured": UnstructuredLoader,
"upstagedocumentparse": UpstageDocumentParseLoader,
"upstagedocumentparse": UpstageLayoutAnalysisLoader,
}

chunk_modules = {
Expand Down
2 changes: 1 addition & 1 deletion autorag/autorag/data/qa/query/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
role=MessageRole.USER,
content="""Document 1: ヌエヴォ·ラレド自治体はメキシコのタマウリパス州にあります。
シウダー・デポルティーバ(スポーツ・シティ)は、
メキシコのヌエボ・ラレドにある複合スポーツ施設です。
メキシコのヌエボ・ラレドにある複合スポーツ施設です。""",
),
ChatMessage(
role=MessageRole.ASSISTANT,
Expand Down
8 changes: 3 additions & 5 deletions autorag/autorag/vectordb/chroma.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
AsyncHttpClient,
)
from chromadb.api.models.AsyncCollection import AsyncCollection
from chromadb.api.types import IncludeEnum, QueryResult
from chromadb.api.types import QueryResult

from autorag.utils.util import apply_recursive
from autorag.vectordb.base import BaseVectorStore
Expand Down Expand Up @@ -74,11 +74,9 @@ async def add(self, ids: List[str], texts: List[str]):

async def fetch(self, ids: List[str]) -> List[List[float]]:
if isinstance(self.collection, AsyncCollection):
fetch_result = await self.collection.get(
ids, include=[IncludeEnum.embeddings]
)
fetch_result = await self.collection.get(ids, include=["embeddings"])
else:
fetch_result = self.collection.get(ids, include=[IncludeEnum.embeddings])
fetch_result = self.collection.get(ids, include=["embeddings"])
fetch_embeddings = fetch_result["embeddings"]
return fetch_embeddings

Expand Down
69 changes: 67 additions & 2 deletions autorag/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,73 @@ classifiers = [
"Topic :: Software Development :: Libraries :: Python Modules",
]
urls = { Homepage = "https://github.com/Marker-Inc-Korea/AutoRAG" }
dynamic = ["version", "dependencies"]
dynamic = ["version"]
dependencies = [
"pydantic<2.10.0", # incompatible with llama index
"numpy<2.0.0", # temporal not using numpy 2.0.0
"pandas>=2.1.0",
"tqdm",
"tiktoken>=0.7.0", # for counting token
"openai>=1.0.0",
"rank_bm25", # for bm25 retrieval
"pyyaml", # for yaml file
"pyarrow", # for pandas with parquet
"fastparquet", # for pandas with parquet
"sacrebleu", # for bleu score
"evaluate", # for meteor and other scores
"rouge_score", # for rouge score
"rich", # for pretty logging
"click", # for cli
"cohere>=5.8.0", # for cohere services
"tokenlog>=0.0.2", # for token logging
"aiohttp", # for async http requests
"voyageai", # for voyageai reranker
"mixedbread-ai", # for mixedbread-ai reranker
"llama-index-llms-bedrock",
"scikit-learn",
"emoji",

# Vector DB
"pymilvus>=2.3.0", # for using milvus vectordb
"chromadb>=1.0.0", # for chroma vectordb
"weaviate-client", # for weaviate vectordb
"pinecone[grpc]", # for pinecone vectordb
"couchbase", # for couchbase vectordb
"qdrant-client", # for qdrant vectordb

# API server
"quart",
"pyngrok",

# LlamaIndex
"llama-index",
"llama-index-core",
"llama-index-readers-file",
"llama-index-embeddings-openai",
"llama-index-embeddings-ollama",
"llama-index-embeddings-openai-like",
"llama-index-llms-openai",
"llama-index-llms-openai-like",
"llama-index-retrievers-bm25",

# WebUI
"streamlit",
"gradio",

# Langchain
"langchain-core",
"langchain-unstructured",
"langchain-upstage",
"langchain-community",

# autorag dashboard
"panel",
"seaborn",
"ipykernel",
"ipywidgets",
"ipywidgets_bokeh",
]


[tool.poetry]
name = "AutoRAG"
Expand All @@ -34,7 +100,6 @@ authors = ["Marker-Inc <vkehfdl1@gmail.com>"]

[tool.setuptools.dynamic]
version = { file = ["autorag/VERSION"] }
dependencies = { file = ["requirements.txt"] }

[tool.setuptools]
include-package-data = true
Expand Down
65 changes: 0 additions & 65 deletions autorag/requirements.txt

This file was deleted.

Loading