Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 52 additions & 40 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
@@ -1,51 +1,63 @@
name: Publish Python 🐍 distributions 📦 to PyPI
permissions:
contents: read

on:
push:
branches:
- main
release:
types: [published]

jobs:
pypi-publish:
name: upload release to PyPI
runs-on: ubuntu-latest
# Specifying a GitHub environment is optional, but strongly encouraged
environment: release
permissions:
# IMPORTANT: this permission is mandatory for trusted publishing
id-token: write
contents: read
set-version:
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v4

- name: Export tag
id: vars
run: echo tag=${GITHUB_REF#refs/*/} >> $GITHUB_OUTPUT
if: ${{ github.event_name == 'release' }}

- name: Update project version
run: |
sed -i "s/^version = \".*\"/version = \"$RELEASE_VERSION\"/" pyproject.toml
env:
RELEASE_VERSION: ${{ steps.vars.outputs.tag }}
if: ${{ github.event_name == 'release' }}

- name: Upload updated pyproject.toml
uses: actions/upload-artifact@v4
with:
fetch-depth: 0
# retrieve your distributions here
- name: Set up Python
name: pyproject-toml
path: pyproject.toml

publish:
runs-on: ubuntu-latest
needs: [set-version]
steps:
- name: Check out
uses: actions/checkout@v4

- name: Set up the python environment
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install setuptools wheel twine build setuptools-scm
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v46
- name: Check for VERSION file change
id: version_changed
env:
ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
run: |
echo "changed=false" >> $GITHUB_ENV
if echo "${ALL_CHANGED_FILES}" | grep -q 'VERSION'; then
echo "changed=true" >> $GITHUB_ENV
fi
- name: Build
if: env.changed == 'true'
run: |
python -m build
- name: Publish package distributions to PyPI
if: env.changed == 'true'
uses: pypa/gh-action-pypi-publish@release/v1
python-version: '3.11'
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
packages-dir: autorag/dist/
enable-cache: 'true'
cache-suffix: '3.11'
- name: Download updated pyproject.toml
uses: actions/download-artifact@v4
with:
name: pyproject-toml
- name: Install Python dependencies
run: uv sync --frozen --all-extras
shell: bash

- name: Build package
run: uv build

- name: Publish package
run: uv publish
env:
UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
29 changes: 23 additions & 6 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,37 @@ on:
push:
branches:
- main
- v0.4
pull_request:
branches:
- main
- v0.4

env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: postgres
POSTGRES_HOST: localhost
POSTGRES_PORT: 5432

jobs:
build:
runs-on: ubuntu-latest
services:
postgres:
image: postgres:16
env:
POSTGRES_USER: ${{ env.POSTGRES_USER }}
POSTGRES_PASSWORD: ${{ env.POSTGRES_PASSWORD }}
POSTGRES_DB: ${{ env.POSTGRES_DB }}
options: >-
--health-cmd pg_isready
--health-interval 5s
--health-timeout 5s
--health-retries 5
ports:
- 5432:5432
steps:
- uses: actions/checkout@v4
- uses: actions/setup-java@v4
Expand All @@ -39,9 +60,7 @@ jobs:
- name: Install Venv
run: uv venv
- name: Install AutoRAG
run: uv pip install -r pyproject.toml --all-extras -e .
- name: Install dev dependencies
run: uv pip install --group dev
run: uv sync --all-extras
- name: Upgrade pyOpenSSL
run: |
uv pip install --upgrade pyOpenSSL
Expand All @@ -50,8 +69,6 @@ jobs:
uv pip install nltk
uv run python -c "import nltk; nltk.download('punkt_tab')"
uv run python -c "import nltk; nltk.download('averaged_perceptron_tagger_eng')"
- name: delete tests package
run: uv run python tests/delete_tests.py
- name: Run AutoRAG tests
run: |
uv run python -m pytest -o log_cli=true --log-cli-level=INFO -n auto tests/autorag
uv run python -m pytest -o log_cli=true --log-cli-level=INFO -n auto tests
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,5 @@ pytest.ini
.DS_Store
projects/tutorial_1
!projects/tutorial_1/config.yaml

.vscode/
1 change: 0 additions & 1 deletion autorag/VERSION

This file was deleted.

14 changes: 1 addition & 13 deletions autorag/__init__.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,15 @@
import logging
import logging.config
import os
import sys
from random import random
from typing import List, Any
from typing import Any

from llama_index.core.embeddings.mock_embed_model import MockEmbedding
from llama_index.core.base.llms.types import CompletionResponse
from llama_index.core.llms.mock import MockLLM
from llama_index.llms.bedrock import Bedrock
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.embeddings.openai import OpenAIEmbeddingModelType

from llama_index.llms.openai import OpenAI
from llama_index.llms.openai_like import OpenAILike
from langchain_openai.embeddings import OpenAIEmbeddings
from rich.logging import RichHandler

version_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "VERSION")

with open(version_path, "r") as f:
__version__ = f.read().strip()


class LazyInit:
def __init__(self, factory, *args, **kwargs):
Expand Down
3 changes: 0 additions & 3 deletions autorag/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,9 @@

autorag_dir = os.path.dirname(os.path.realpath(__file__))
version_file = os.path.join(autorag_dir, "VERSION")
with open(version_file, "r") as f:
__version__ = f.read().strip()


@click.group()
@click.version_option(__version__)
def cli():
pass

Expand Down
3 changes: 2 additions & 1 deletion autorag/nodes/passagereranker/flag_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ def __init__(
self.model = FlagReranker(model_name_or_path=model_name, **model_params)

def __del__(self):
del self.model
if hasattr(self, "model"):
del self.model
empty_cuda_cache()
super().__del__()

Expand Down
2 changes: 1 addition & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
-e ./autorag
-e .
sphinx
furo
myst-parser
Expand Down
31 changes: 14 additions & 17 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,29 +10,26 @@
copyright = "2024, Marker-Inc"
author = "Marker-Inc"

with open("../../autorag/autorag/VERSION") as f:
version = f.read().strip()

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.extlinks",
"sphinx.ext.intersphinx",
"sphinx.ext.mathjax",
"sphinx.ext.todo",
"sphinx.ext.viewcode",
"myst_parser",
"sphinx_copybutton",
"sphinx_design",
"sphinx_inline_tabs",
"sphinxcontrib.googleanalytics",
"sphinx_sitemap",
"sphinx.ext.autodoc",
"sphinx.ext.extlinks",
"sphinx.ext.intersphinx",
"sphinx.ext.mathjax",
"sphinx.ext.todo",
"sphinx.ext.viewcode",
"myst_parser",
"sphinx_copybutton",
"sphinx_design",
"sphinx_inline_tabs",
"sphinxcontrib.googleanalytics",
"sphinx_sitemap",
]
source_suffix = {
".rst": "restructuredtext",
".md": "markdown",
".rst": "restructuredtext",
".md": "markdown",
}

templates_path = ["_templates"]
Expand Down
5 changes: 5 additions & 0 deletions postgresql/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
POSTGRES_HOST=localhost
POSTGRES_PORT=5432
POSTGRES_USER=postgres
POSTGRES_PASSWORD=postgres
POSTGRES_DB=postgres
109 changes: 109 additions & 0 deletions postgresql/db/init/001_schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
-- Auto-generated from your design (PostgreSQL 16+)

-- 1) File table
CREATE TABLE IF NOT EXISTS file (
id BIGSERIAL PRIMARY KEY,
type VARCHAR(255) NOT NULL, -- raw, image, audio, video
path VARCHAR(255) NOT NULL,
fsspec_type VARCHAR(255) NOT NULL,
fsspec_kwargs JSONB,
fsspec_nickname VARCHAR(255)
);

-- 2) Document table
CREATE TABLE IF NOT EXISTS document (
id BIGSERIAL PRIMARY KEY,
filepath BIGINT NOT NULL REFERENCES file(id),
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
last_modified_at TIMESTAMPTZ NOT NULL DEFAULT now(),
filename TEXT,
author TEXT,
title TEXT
);

-- 3) Page table
CREATE TABLE IF NOT EXISTS page (
id BIGSERIAL PRIMARY KEY,
page_num INT NOT NULL,
document_id BIGINT NOT NULL REFERENCES document(id) ON DELETE CASCADE,
image_path BIGINT REFERENCES file(id),
metadata JSONB,
CONSTRAINT uq_page_per_doc UNIQUE (document_id, page_num)
);

-- 4) Caption table
CREATE TABLE IF NOT EXISTS caption (
id BIGSERIAL PRIMARY KEY,
page_id BIGINT NOT NULL REFERENCES page(id) ON DELETE CASCADE,
contents TEXT NOT NULL,
module_name VARCHAR(255),
module_kwargs JSONB
);

-- 5) Chunk table
CREATE TABLE IF NOT EXISTS chunk (
id BIGSERIAL PRIMARY KEY,
parent_caption BIGINT NOT NULL REFERENCES caption(id) ON DELETE CASCADE,
contents TEXT NOT NULL,
chunk_method VARCHAR(255),
chunk_kwargs JSONB
);

-- 6) ImageChunk table
CREATE TABLE IF NOT EXISTS image_chunk (
id BIGSERIAL PRIMARY KEY,
parent_page BIGINT NOT NULL REFERENCES page(id) ON DELETE CASCADE,
image_path BIGINT NOT NULL REFERENCES file(id),
chunk_method VARCHAR(255),
chunk_kwargs JSONB
);

-- 7) CaptionChunkRelation (M2M)
CREATE TABLE IF NOT EXISTS caption_chunk_relation (
caption_id BIGINT NOT NULL REFERENCES caption(id) ON DELETE CASCADE,
chunk_id BIGINT NOT NULL REFERENCES chunk(id) ON DELETE CASCADE,
PRIMARY KEY (caption_id, chunk_id)
);

-- 8) Query table
CREATE TABLE IF NOT EXISTS query (
id BIGSERIAL PRIMARY KEY,
query TEXT NOT NULL,
generation_gt TEXT[] NOT NULL
);

-- 9) RetrievalRelation table (chunk_id XOR image_chunk_id)
CREATE TABLE IF NOT EXISTS retrieval_relation (
query_id BIGINT NOT NULL REFERENCES query(id) ON DELETE CASCADE,
group_index INT NOT NULL,
group_order INT NOT NULL,
chunk_id BIGINT REFERENCES chunk(id) ON DELETE CASCADE,
image_chunk_id BIGINT REFERENCES image_chunk(id) ON DELETE CASCADE,
PRIMARY KEY (query_id, group_index, group_order),
CONSTRAINT ck_rr_one_only CHECK (
(chunk_id IS NULL) <> (image_chunk_id IS NULL)
)
);

-- 10) last_modified_at trigger for document
CREATE OR REPLACE FUNCTION set_last_modified_at()
RETURNS TRIGGER LANGUAGE plpgsql AS $$
BEGIN
NEW.last_modified_at := now();
RETURN NEW;
END $$;

DO $$
BEGIN
IF NOT EXISTS(
SELECT 1
FROM information_schema.triggers
WHERE event_object_table = 'document'
AND trigger_name = 'tr_document_set_last_modified'
)
THEN
CREATE TRIGGER tr_document_set_last_modified
BEFORE UPDATE ON document
FOR EACH ROW EXECUTE FUNCTION set_last_modified_at();
END IF;
END $$;
Loading
Loading