Skip to content
Open
Show file tree
Hide file tree
Changes from 75 commits
Commits
Show all changes
110 commits
Select commit Hold shift + click to select a range
a520953
reapplied commits
Jun 27, 2025
9f28efd
fixed path
Jun 30, 2025
9b3cd34
upgraded dependencies
Jul 1, 2025
0d9d598
fixed pathes
Jul 1, 2025
563fd45
added dependencies
Jul 1, 2025
4cfd258
added test for colpali retriever
Jul 7, 2025
e4dd82c
added code for caching results
Jul 8, 2025
85506b4
fixed the test cahce and added cache itsef
Jul 8, 2025
a0064a4
added e2e test code
Jul 8, 2025
a0a8f55
updated code of test to match the query
Jul 8, 2025
8e4c225
updated test and cache
Jul 8, 2025
628c6a3
updated test files
Jul 8, 2025
5d92ca6
updated caches for the tests
Jul 9, 2025
cbbc41d
updated cache
Jul 9, 2025
accf675
added config yaml for colpali and updated tests
Jul 9, 2025
ed060e1
added image size to colpali config
Jul 9, 2025
07c4886
added creation of colpaliresource on creation,
Jul 10, 2025
2162e91
moved colpali resource to colpali folder
Jul 10, 2025
bc4881b
partlt changed the calculation of scores
Jul 10, 2025
592ea07
updated retriever code to use page embeddings and score them and then…
Jul 11, 2025
a688b84
changed enum+str to strenum, added consistency check for the model na…
Jul 14, 2025
66fcdeb
added lock for gpu related operations
Jul 14, 2025
d6bc224
added cpu pools usage
Jul 15, 2025
3039f13
changed bloat16 to float16
Jul 15, 2025
dbac766
import fixes
Jul 15, 2025
9201739
fixed lint issues
Jul 15, 2025
b15ca56
fixed cache and refactored cache code
Jul 15, 2025
3acd205
removed ignores
Jul 15, 2025
bb52615
removed unnecessary processor caching
Jul 15, 2025
474eda4
switched device choice to existing function
Jul 16, 2025
07fab25
Merge branch 'development' into f/colpali_retriever
Jul 16, 2025
8e6ce6d
removed unnecessary setting of lib version
Jul 16, 2025
9ac5474
added comment for the colpali-engine dependency
Jul 16, 2025
e1f3002
put colpali config outside of the request config
Jul 21, 2025
5d93a2c
added batch that is being collected from different tasks and then bei…
Jul 23, 2025
1241dd7
added hasg based cache
Jul 24, 2025
9302672
added test for parallel queries and images
Jul 25, 2025
d3896d8
fixed moving to cpu embeddings
Jul 25, 2025
297166a
added additional pools not to block other pools
Jul 25, 2025
d5d1202
replaced progress bar
Jul 25, 2025
05a656c
caching models in docker on creating image
Jul 28, 2025
6b96ee4
fixed donwload model script
Jul 28, 2025
2728522
fixed docker file
Jul 28, 2025
f2bd680
added file with common models info and pathes, changed docker file to…
Jul 28, 2025
5c4a8a3
changed config according to fields in app config
Jul 30, 2025
137d828
added cache in model while saving it
Aug 1, 2025
f915106
fixed device
Aug 1, 2025
cea1cc1
fixed model cache save
Aug 4, 2025
e3aa186
added copying additional files for docker to download models
Jul 29, 2025
377ed97
fixed format
Aug 5, 2025
3dd64a9
removed from resource config model type, now it is being calculated b…
Aug 5, 2025
5394186
fixed tests
Aug 5, 2025
1453542
fixed format
Aug 6, 2025
6628b2c
Merge branch 'development' into f/colpali_retriever
Aug 6, 2025
6386c8a
changed doc_id to original index that was in from_doc_records
Aug 7, 2025
22e3544
poetry lock
Aug 7, 2025
0af4073
updated poetry lock with minimal changes comapring to original one
Aug 7, 2025
1b12f88
fixed test
Aug 7, 2025
6475b59
renamed file to match cache for test
Aug 8, 2025
c65faff
updated cahce method, and replaced cahce files
Aug 8, 2025
e4063e4
changed yaml name
Aug 8, 2025
ccfe1e3
added fair queue to process tasks
Aug 8, 2025
040cbd6
renamed function
Aug 8, 2025
e787ade
changed variables order
Aug 8, 2025
5d29e5f
Merge branch 'development' into f/colpali_retriever
Aug 8, 2025
2b367dc
changed colpali version to realeased version
Aug 8, 2025
6116d29
made separate docker file+changed script to download model
Aug 21, 2025
60b5b3c
added arg for base image
Aug 21, 2025
d0b45c3
added stage to donwload model and some logs to verify env variables
Aug 21, 2025
719f8f8
fixed model env variable and added perint to log when loading the model
Aug 21, 2025
d937b86
fixed env
Aug 21, 2025
e714b53
removed unseccessary variable from docker
Aug 22, 2025
e11eec9
Merge branch 'development' into f/colpali_retriever
paknikolai Aug 22, 2025
5ccabd3
updated readme
Aug 22, 2025
becad27
Merge branch 'development' into f/colpali_retriever
Sep 8, 2025
f478ffc
removed image size field because it was unecessary
Sep 10, 2025
9864a8a
removed unused import
Sep 10, 2025
8139132
changed paramerers order to be consistent with embeddings
Sep 10, 2025
bb6db25
removed unnecessary print
Sep 10, 2025
5cac3be
removed unused dependency
Sep 10, 2025
8c682c5
moved load model to different function and removed set config from re…
Sep 11, 2025
ed020fa
replaced with map with progress query and image processing
Sep 11, 2025
f10a631
made a str enum and renamed map fro known models
Sep 11, 2025
ef9c231
removed batch prcessor
Sep 11, 2025
f1000da
fixed queries
Sep 11, 2025
90cd989
fixed format
Sep 11, 2025
111dc8e
fixed lint
Sep 11, 2025
1c73c81
fixed queries
Sep 11, 2025
a39bee5
added batch size to parameters
Sep 11, 2025
6c106a8
moved model call inside resource
Sep 12, 2025
fe92a1e
removed unusd config
Sep 12, 2025
90fda28
fixed docker file since folder with dial rag is already copied
Sep 12, 2025
f9b22b7
fixed downloading script
Sep 12, 2025
e381ae5
merged develop branch
Sep 16, 2025
af19431
fixed format
Sep 16, 2025
c796ffd
added some comments
Sep 16, 2025
06def61
Merge branch 'development' into f/colpali_retriever
paknikolai Sep 17, 2025
e2717ac
fixed type for images in calculate_images_embeddings
Sep 18, 2025
dc8d368
simplified replacing cached resource in tests
Sep 18, 2025
dc2beb5
created constabt for the test folder
Sep 18, 2025
b3ef125
removed colpali index from colpali resource
Sep 19, 2025
056722f
moved common embeddings to separate folder
Sep 19, 2025
ead9b7e
Merge branch 'development' into f/colpali_retriever
paknikolai Sep 19, 2025
0532587
separated download scripts
Sep 19, 2025
95bf413
corrected argument line
Sep 19, 2025
5d3c6cd
replaced env variable for model path with config
Sep 19, 2025
b829ff7
removed padding of embeddings
Sep 19, 2025
73a05ed
changed hf_home to cache models there
Sep 25, 2025
53e09a8
updated donwload
Sep 25, 2025
73e51bb
Merge branch 'development' into f/colpali_retriever
Sep 25, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ FROM builder AS builder_download_model
COPY download_model.py .

# Model: https://huggingface.co/epam/bge-small-en
RUN python download_model.py "epam/bge-small-en" "$BGE_EMBEDDINGS_MODEL_PATH" "openvino" "torch"
RUN python download_model.py embeddings "epam/bge-small-en" "$BGE_EMBEDDINGS_MODEL_PATH" "openvino" "torch"


FROM builder AS builder_repo_digest
Expand Down
39 changes: 39 additions & 0 deletions Dockerfile.colpali
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Set base image with default value
ARG BASE_IMAGE_NAME=epam/ai-dial-rag:latest

# Stage 1: Download ColPali model
FROM ${BASE_IMAGE_NAME} AS colpali_downloader

# Set environment variables for ColPali models
ENV COLPALI_MODELS_BASE_PATH=/colpali_models

# Set specific model to download with default value
ARG COLPALI_MODEL_NAME=vidore/colSmol-256M
ENV COLPALI_MODEL_NAME=${COLPALI_MODEL_NAME}

# Switch to root user for model downloads
USER root

# Copy necessary files for ColPali model download
COPY aidial_rag/__init__.py aidial_rag/
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to copy aidial_rag files here? epam/ai-dial-rag base image should already have these files.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indeed here it has already been copied in base image, left only copying downloading script

COPY aidial_rag/retrievers/__init__.py aidial_rag/retrievers/
COPY aidial_rag/retrievers/colpali_retriever/__init__.py aidial_rag/retrievers/colpali_retriever/
COPY aidial_rag/retrievers/colpali_retriever/colpali_models.py aidial_rag/retrievers/colpali_retriever/
COPY download_model.py ./

# Download the specified ColPali model
RUN python download_model.py colpali "$COLPALI_MODELS_BASE_PATH" "$COLPALI_MODEL_NAME"

# Stage 2: Final image with downloaded model
FROM ${BASE_IMAGE_NAME}

# Set environment variables for ColPali models
ENV COLPALI_MODELS_BASE_PATH=/colpali_models

# Copy the downloaded ColPali model from the downloader stage
COPY --from=colpali_downloader --chown=appuser "$COLPALI_MODELS_BASE_PATH" "$COLPALI_MODELS_BASE_PATH"

# Switch back to appuser
USER appuser

# The base image already has EXPOSE 5000 and CMD, so we inherit those
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,19 @@ The `docker_compose_local` folder contains the Docker Compose file and auxiliary
```


## Building docker file with predownloaded ColPali model
Due to large weight of each model, a separate docker image was created to avoid making the base image hold those weights when they are not needed.

`Dockerfile.colpali` - additional docker file that saves into the image one of the ColPali models.

There are a few arguments for building the image:

- `BASE_IMAGE_NAME` - argument that allows you to set the base image name for ai-dial-rag, default is `epam/ai-dial-rag:latest`
- `COLPALI_MODEL_NAME` - name of the ColPali model to download, default is `vidore/colSmol-256M`

And environment variable:
- `COLPALI_MODELS_BASE_PATH` - path where to store models inside the image, default is `/colpali_models`


## Lint

Expand Down
9 changes: 9 additions & 0 deletions aidial_rag/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@
from aidial_rag.request_context import RequestContext, create_request_context
from aidial_rag.resources.cpu_pools import init_cpu_pools
from aidial_rag.retrieval_chain import create_retrieval_chain
from aidial_rag.retrievers.colpali_retriever.colpali_model_resource import (
ColpaliModelResource,
)
from aidial_rag.stages import RetrieverStage
from aidial_rag.transform_history import transform_history
from aidial_rag.utils import profiler_if_enabled, timed_stage
Expand Down Expand Up @@ -196,6 +199,10 @@ class DialRAGApplication(ChatCompletion):

def __init__(self, app_config: AppConfig):
self.app_config = app_config
self.colpali_model_resource = ColpaliModelResource(
app_config.colpali_model_resource_config,
app_config.request.indexing.colpali_index,
)
self.enable_debug_commands = app_config.enable_debug_commands
self.repository_digest = read_repository_digest(REPOSITORY_DIGEST_PATH)
logger.info(
Expand Down Expand Up @@ -289,6 +296,7 @@ async def chat_completion(
indexing_tasks,
index_storage,
dial_api_client,
self.colpali_model_resource,
config=request_config,
)

Expand Down Expand Up @@ -343,6 +351,7 @@ def _make_retrieval_stage(retriever: BaseRetriever, stage_name):
indexing_config=request_config.indexing,
document_records=document_records,
query_chain=query_chain,
colpali_model_resource=self.colpali_model_resource,
make_retrieval_stage=_make_retrieval_stage,
)

Expand Down
6 changes: 6 additions & 0 deletions aidial_rag/app_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
from aidial_rag.configuration_endpoint import RequestConfig
from aidial_rag.index_storage import IndexStorageConfig
from aidial_rag.resources.cpu_pools import CpuPoolsConfig
from aidial_rag.retrievers.colpali_retriever.colpali_model_resource import (
ColpaliModelResourceConfig,
)


class AppConfig(BaseSettings):
Expand Down Expand Up @@ -38,6 +41,9 @@ class AppConfig(BaseSettings):

cpu_pools: CpuPoolsConfig = Field(default=CpuPoolsConfig())
index_storage: IndexStorageConfig = Field(default=IndexStorageConfig())
colpali_model_resource_config: ColpaliModelResourceConfig | None = Field(
default=None
)
request: RequestConfig = Field(default=RequestConfig())

model_config = SettingsConfigDict(
Expand Down
1 change: 1 addition & 0 deletions aidial_rag/document_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class DocumentRecord(BaseDoc):
embeddings_index: MultiEmbeddings | None
multimodal_embeddings_index: MultiEmbeddings | None
description_embeddings_index: MultiEmbeddings | None
colpali_embeddings_index: MultiEmbeddings | None
mime_type: str
document_bytes: bytes # Could be attached document or converted document

Expand Down
41 changes: 39 additions & 2 deletions aidial_rag/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@
from aidial_rag.request_context import RequestContext
from aidial_rag.resources.dial_limited_resources import DialLimitedResources
from aidial_rag.retrievers.bm25_retriever import BM25Retriever
from aidial_rag.retrievers.colpali_retriever.colpali_model_resource import (
ColpaliModelResource,
)
from aidial_rag.retrievers.colpali_retriever.colpali_retriever import (
ColpaliRetriever,
)
from aidial_rag.retrievers.description_retriever.description_retriever import (
DescriptionRetriever,
)
Expand Down Expand Up @@ -107,6 +113,7 @@ async def load_document_impl(
attachment_link: AttachmentLink,
stage_stream: SupportsWriteStr,
index_settings: IndexSettings,
colpali_model_resource: ColpaliModelResource,
config: RequestConfig,
) -> DocumentRecord:
logger_stream = LoggerStream()
Expand Down Expand Up @@ -171,6 +178,18 @@ async def load_document_impl(
)
)

colpali_index_task = None
if index_config.colpali_index is not None:
colpali_index_task = tg.create_task(
ColpaliRetriever.build_index(
model_resource=colpali_model_resource,
colpali_index_config=index_config.colpali_index,
stageio=StreamWithPrefix(io_stream, "ColpaliRetriever: "),
mime_type=mime_type,
original_document=doc_bytes,
)
)

# TODO: try to move is_image check to the parse_document since another loader is not exposed here from the document_loaders.py
if is_image(content_type):
chunks_list = [get_default_image_chunk(attachment_link)]
Expand Down Expand Up @@ -203,6 +222,9 @@ async def load_document_impl(
description_indexes = (
description_index_task.result() if description_index_task else None
)
colpali_indexes = (
colpali_index_task.result() if colpali_index_task else None
)

return DocumentRecord(
format_version=FORMAT_VERSION,
Expand All @@ -212,6 +234,7 @@ async def load_document_impl(
embeddings_index=embeddings_index_task.result(),
multimodal_embeddings_index=multimodal_index,
description_embeddings_index=description_indexes,
colpali_embeddings_index=colpali_indexes,
document_bytes=doc_bytes,
mime_type=mime_type,
)
Expand All @@ -236,6 +259,7 @@ async def load_document(
task: IndexingTask,
index_storage: IndexStorage,
dial_api_client: DialApiClient,
colpali_model_resource: ColpaliModelResource,
config: RequestConfig,
) -> DocumentRecord:
attachment_link = task.attachment_link
Expand Down Expand Up @@ -275,6 +299,7 @@ async def load_document(
attachment_link,
io_stream,
index_settings,
colpali_model_resource,
config,
)
except InvalidDocumentError as e:
Expand All @@ -297,10 +322,16 @@ async def load_document_task(
index_storage: IndexStorage,
dial_api_client: DialApiClient,
config: RequestConfig,
colpali_model_resource: ColpaliModelResource,
) -> DocumentIndexingResult:
try:
doc_record = await load_document(
request_context, task, index_storage, dial_api_client, config
request_context,
task,
index_storage,
dial_api_client,
colpali_model_resource,
config,
)
return DocumentIndexingSuccess(
task=task,
Expand All @@ -319,14 +350,20 @@ async def load_documents(
tasks: Iterable[IndexingTask],
index_storage: IndexStorage,
dial_api_client: DialApiClient,
colpali_model_resource: ColpaliModelResource,
config: RequestConfig,
) -> List[DocumentIndexingResult]:
# TODO: Rewrite this function using TaskGroup to cancel all tasks if one of them fails
# if ignore_document_loading_errors is not set in the config
return await asyncio.gather(
*[
load_document_task(
request_context, task, index_storage, dial_api_client, config
request_context,
task,
index_storage,
dial_api_client,
config,
colpali_model_resource,
)
for task in tasks
],
Expand Down
6 changes: 6 additions & 0 deletions aidial_rag/indexing_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
)
from aidial_rag.document_loaders import ParserConfig
from aidial_rag.document_record import IndexSettings
from aidial_rag.retrievers.colpali_retriever.colpali_index_config import (
ColpaliIndexConfig,
)
from aidial_rag.retrievers.description_retriever.description_retriever import (
DescriptionIndexConfig,
)
Expand All @@ -29,6 +32,9 @@ class IndexingConfig(BaseConfig):
description="Enables DescriptionRetriever which uses vision model to generate page images "
"descriptions and perform search on them.",
)
colpali_index: ColpaliIndexConfig | None = Field(
default=None, description="Enables ColpaliRetriever"
)

def collect_fields_that_rebuild_index(self) -> IndexSettings:
"""Return the IndexingConfig fields that determine when the index needs to be rebuilt."""
Expand Down
49 changes: 49 additions & 0 deletions aidial_rag/resources/cpu_pools.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,26 @@ class CpuPoolsConfig(BaseConfig):
description="Embedding process for the query. Should be `1`, unless you have a lot of cores.",
)

heavy_indexing_embeddings_pool: int = Field(
default=1,
description="Embedding process for gpu heavy tasks."
"Needed not to block ligher tasks on indexing_embeddings_pool. "
"Should be `1`",
)
heavy_query_embeddings_pool: int = Field(
default=1,
description="Embedding process for gpu heavy tasks for queries."
"Needed not to block ligher tasks on query_embeddings_pool."
"Should be `1`",
)


class CpuPools:
indexing_cpu_pool: ThreadPoolExecutor
indexing_embeddings_pool: ThreadPoolExecutor
query_embeddings_pool: ThreadPoolExecutor
heavy_indexing_embeddings_pool: ThreadPoolExecutor
heavyquery_embeddings_pool: ThreadPoolExecutor

def __init__(self, config: CpuPoolsConfig) -> None:
# Using ThreadPoolExecutor instead of ProcessPoolExecutor, because
Expand All @@ -58,6 +73,16 @@ def __init__(self, config: CpuPoolsConfig) -> None:
thread_name_prefix="query_embeddings",
)

self.heavy_indexing_embeddings_pool = ThreadPoolExecutor(
max_workers=config.heavy_indexing_embeddings_pool,
thread_name_prefix="heavy_indexing_embeddings",
)

self.heavy_query_embeddings_pool = ThreadPoolExecutor(
max_workers=config.heavy_query_embeddings_pool,
thread_name_prefix="heavy_query_embeddings",
)

def _run_in_pool(self, pool, func, *args, **kwargs):
return asyncio.get_running_loop().run_in_executor(
pool, func, *args, **kwargs
Expand All @@ -76,6 +101,16 @@ def run_in_query_embeddings_pool(self, func, *args, **kwargs):
self.query_embeddings_pool, func, *args, **kwargs
)

def run_in_heavy_indexing_embeddings_pool(self, func, *args, **kwargs):
return self._run_in_pool(
self.heavy_indexing_embeddings_pool, func, *args, **kwargs
)

def run_in_heavy_query_embeddings_pool(self, func, *args, **kwargs):
return self._run_in_pool(
self.heavy_query_embeddings_pool, func, *args, **kwargs
)

_instance = None

@classmethod
Expand Down Expand Up @@ -103,6 +138,8 @@ async def init_cpu_pools(config: CpuPoolsConfig):
await cpu_pools.run_in_indexing_cpu_pool(sum, range(10))
await cpu_pools.run_in_indexing_embeddings_pool(sum, range(10))
await cpu_pools.run_in_query_embeddings_pool(sum, range(10))
await cpu_pools.run_in_heavy_indexing_embeddings_pool(sum, range(10))
await cpu_pools.run_in_heavy_query_embeddings_pool(sum, range(10))


def run_in_indexing_cpu_pool(func, *args, **kwargs):
Expand All @@ -119,3 +156,15 @@ def run_in_query_embeddings_pool(func, *args, **kwargs):
return CpuPools.instance().run_in_query_embeddings_pool(
func, *args, **kwargs
)


def run_in_heavy_indexing_embeddings_pool(func, *args, **kwargs):
return CpuPools.instance().run_in_heavy_indexing_embeddings_pool(
func, *args, **kwargs
)


def run_in_heavy_query_embeddings_pool(func, *args, **kwargs):
return CpuPools.instance().run_in_heavy_query_embeddings_pool(
func, *args, **kwargs
)
Loading
Loading