Skip to content

Commit 9c703bd

Browse files
rag (#7)
* feat: update proto files * refactor(rag): combine services to a rag workflow * doc(config): update execute instructions * feat(client): add example client implementation for searching with gRPC
1 parent 90b8887 commit 9c703bd

24 files changed

+377
-289
lines changed

.dockerignore

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
configs/
2+
llm_backend/protos/
3+
**/.env
4+
5+
.venv/
6+
.github/
7+
.pre-commit-config.yaml
8+
.ruff_cache/
9+
10+
Dockerfile
11+
.dockerignore
12+
docker-compose.yml
13+
14+
**/__pycache__/
15+
*.py[cod]
16+
*$py.class
17+
18+
.DS_Store
19+
20+
.git/
21+
.gitignore
22+
.gitmodules/

.gitignore

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
.DS_Store
2-
configs/*
3-
!configs/example.toml
42
llm_backend/protos/
53

64
# Byte-compiled / optimized / DLL files

Dockerfile

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
FROM ghcr.io/astral-sh/uv:bookworm-slim AS builder
2+
3+
ENV UV_COMPILE_BYTECODE=1\
4+
UV_LINK_MODE=copy \
5+
UV_PYTHON_INSTALL_DIR=/python \
6+
UV_PYTHON_PREFERENCE=only-managed
7+
8+
RUN uv python install 3.12
9+
10+
WORKDIR /app
11+
12+
RUN --mount=type=cache,target=/root/.cache/uv \
13+
--mount=type=bind,source=uv.lock,target=uv.lock \
14+
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
15+
uv sync --frozen --no-dev --no-install-project
16+
17+
COPY . /app
18+
19+
RUN --mount=type=cache,target=/root/.cache/uv \
20+
uv sync --frozen --no-dev
21+
22+
FROM debian:bookworm-slim
23+
24+
COPY --from=builder --chown=python:python /python /python
25+
COPY --from=builder --chown=app:app /app /app
26+
27+
ENV PATH="/app/.venv/bin:$PATH"
28+
29+
WORKDIR /app
30+
31+
# Generate the protos
32+
RUN ["python3", "scripts/gen_protos.py"]
33+
34+
# Run the application
35+
ENTRYPOINT ["python3", "scripts/serve.py", "--config", "configs/config.toml"]

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ uv run gen-protos
1111

1212
## Usage
1313

14-
Please configure the `configs/config.toml` file (refer to `configs/example.toml` for the options).
14+
Please configure the `configs/config.toml` file.
1515
The following environment variables are required (`export` them or place them in a `.env` file):
1616

1717
- `OPENAI_API_KEY`: Your ChatGPT API key.
@@ -23,6 +23,12 @@ The following environment variables are required (`export` them or place them in
2323
python3 scripts/serve.py --config configs/config.toml
2424
```
2525

26+
You can refer to `scripts/client.py` for an example implementation of a client:
27+
28+
```shell
29+
python3 scripts/client.py
30+
```
31+
2632
## Features
2733

2834
Refer to the protobuf files in the `protos/` directory for the features provided by the server.

configs/config.toml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
[server]
2+
host = 'localhost'
3+
port = 50051
4+
max_workers = 10
5+
6+
[service.retrieve]
7+
# Name of embedding model. All available models can be found [here](https://huggingface.co/models?language=zh)
8+
embedding_model = 'intfloat/multilingual-e5-large'
9+
10+
# The template must contain the `{keywords}` placeholder.
11+
prompt_template = 'Please search for the content related to the following keywords: {keywords}.'
12+
similarity_top_k = 5
13+
14+
[service.summarize]
15+
system_template = """
16+
# Project Mission: My project mission is to extract 5 articles of the same type from the internet each time and provide them to Chat GPT in the same format to generate summaries and digests, making it convenient for the general public to read.
17+
# Input Format: The format during input is as follows: 1.xxx 2.xxx 3.xxx 4.xxx 5.xxx Each news article is numbered with a digit title. There is a blank line between different news articles, but within the same article, there are no line breaks.
18+
# Detailed Project Execution: The detailed execution of the project involves refraining from adding personal opinions. I only generate summaries based on the provided news and refrain from providing responses beyond the scope of the news.
19+
# Audience for My Content: The audience comprises professionals from various fields, as well as students and homemakers. They span a wide range of age groups and have a strong desire for knowledge. However, due to limited time, they cannot personally read a large amount of news information. Therefore, my content typically needs to be transformed into something understandable by the general public, with logical analysis involving specific questions and solutions.
20+
21+
# Assuming you are now a reader, think step by step about what you think the key points of the news would be, and provide the first version of the summary. Then, based on this summary, pose sub-questions and further modify to provide the final summary.
22+
# Answer in Traditional Chinese, and refrain from providing thoughts and content beyond what you've provided. Endeavor to comprehensively describe the key points of the news.
23+
# Responses should strive to be rigorous and formal, with real evidence when answering questions.
24+
# Answers can be as complete and comprehensive as possible, expanding on details and actual content.
25+
# The "Output Format" is: provide an overarching title that summarizes the news content above, then summarizes the content.
26+
"""
27+
28+
# The template must contain the `{context_str}` and `{query_str}` placeholders.
29+
user_template = """
30+
{query_str}
31+
---------------------
32+
{context_str}"""
33+
34+
# The content of `{query_str}` placeholder in the user template.
35+
query_str = '假設你是一個摘要抓取者,請將以下---內的文字做一篇文章摘要,用文章敘述的方式呈現,不要用列點的,至少要有500字,要有標題。'
36+
37+
# The transform function from the request strings to the query strings.
38+
# Must be one of:
39+
# - 'plain': The query string is the same as the request string.
40+
# - 'numbered': Add a number (1., 2., ...) to the beginning of each request string.
41+
content_format = 'plain'
42+
43+
[service.summarize.llm]
44+
model = 'gpt-4o-mini'

configs/example.toml

Lines changed: 0 additions & 43 deletions
This file was deleted.

docker-compose.yaml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
services:
2+
llm-backend:
3+
build: .
4+
restart: always
5+
container_name: sync-llm-backend
6+
ports:
7+
- "50051:50051"
8+
environment:
9+
- LLAMA_INDEX_CACHE_DIR=./llama-index-cache
10+
env_file:
11+
- .env
12+
volumes:
13+
- ./configs:/app/configs
14+
qdrant:
15+
image: qdrant/qdrant:latest
16+
restart: always
17+
container_name: qdrant
18+
ports:
19+
- 6333:6333
20+
- 6334:6334
21+
expose:
22+
- 6333
23+
- 6334
24+
- 6335
25+
configs:
26+
- source: qdrant_config
27+
target: /qdrant/config/production.yaml
28+
volumes:
29+
- ./qdrant_data:/qdrant/storage
30+
31+
configs:
32+
qdrant_config:
33+
content: |
34+
log_level: INFO
35+

llm_backend/__init__.py

Lines changed: 6 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,9 @@
11
import os
22

3-
from grpc._server import _Server
3+
import grpc
44
from pydantic import BaseModel
55

6-
from .search import (
7-
SearchService,
8-
add_SearchServiceServicer_to_server,
9-
)
10-
from .search.config import SearchConfig
11-
from .summarize import (
12-
SummarizeService,
13-
add_SummarizeServiceServicer_to_server,
14-
)
15-
from .summarize.config import SummarizeConfig
6+
from .rag import RagConfig, RagService, add_RagServiceServicer_to_server
167

178

189
class ServerConfig(BaseModel):
@@ -21,21 +12,11 @@ class ServerConfig(BaseModel):
2112
max_workers: int = (os.cpu_count() or 1) * 5
2213

2314

24-
class ServiceConfig(BaseModel):
25-
search: SearchConfig
26-
summarize: SummarizeConfig
27-
28-
2915
class Config(BaseModel):
3016
server: ServerConfig
31-
service: ServiceConfig
32-
33-
34-
def setup_search_service(config: Config, server: _Server):
35-
search_service = SearchService(config.service.search)
36-
add_SearchServiceServicer_to_server(search_service, server)
17+
service: RagConfig
3718

3819

39-
def setup_summarize_service(config: Config, server: _Server):
40-
summarize_service = SummarizeService(config.service.summarize)
41-
add_SummarizeServiceServicer_to_server(summarize_service, server)
20+
def setup_rag_service(config: Config, server: grpc.aio.Server):
21+
rag_service = RagService(config.service)
22+
add_RagServiceServicer_to_server(rag_service, server)

llm_backend/rag/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from ..protos.rag_pb2_grpc import (
2+
add_RagServiceServicer_to_server as add_RagServiceServicer_to_server,
3+
)
4+
from .config import RagConfig as RagConfig
5+
from .service import RagService as RagService
Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
1-
from enum import StrEnum
21
from typing import Annotated
32

43
from llama_index.llms.openai.utils import ALL_AVAILABLE_MODELS
54
from pydantic import AfterValidator, BaseModel, Field
6-
from pydantic_settings import BaseSettings, SettingsConfigDict
5+
from pydantic_settings import BaseSettings
76

8-
from ..utils import contains_placeholder
7+
from .content_formatters import ContentFormat
98

9+
DEFAULT_EMBEDDING_MODEL = "intfloat/multilingual-e5-large"
10+
DEFAULT_OPENAI_MODEL = "gpt-4o-mini"
11+
DEFAULT_SIMILARITY_TOP_K = 10
12+
DEFAULT_QUERY_PROMPT_TEMPLATE = (
13+
"Please search for the content related to the following keywords: {keywords}."
14+
)
1015
DEFAULT_SYSTEM_TEMPLATE = (
1116
"You are an expert Q&A system that is trusted around the world.\n"
1217
"Always answer the query using the provided context information,"
@@ -30,9 +35,33 @@
3035
DEFAULT_QUERY_STR = "請用繁體中文總結這幾篇新聞。"
3136

3237

33-
class ContentFormat(StrEnum):
34-
PLAIN = "plain"
35-
NUMBERED = "numbered"
38+
def contains_placeholder(*placeholders: str):
39+
def validate_template(template: str):
40+
for placeholder in placeholders:
41+
if f"{{{placeholder}}}" not in template:
42+
raise ValueError(f"Template must contain '{{{placeholder}}}'")
43+
return template
44+
45+
return validate_template
46+
47+
48+
class QDrantConfig(BaseSettings):
49+
host: str = Field("test", validation_alias="QDRANT_HOST")
50+
port: int = Field(6333, gt=0, validation_alias="QDRANT_PORT")
51+
collection: str = Field("news", validation_alias="QDRANT_COLLECTION")
52+
53+
54+
class RetrieveConfig(BaseModel):
55+
vector_database: QDrantConfig = QDrantConfig() # type: ignore
56+
embedding_model: str = Field(
57+
DEFAULT_EMBEDDING_MODEL,
58+
description="Name of embedding model."
59+
"All available models can be found [here](https://huggingface.co/models?library=sentence-transformers&language=zh).",
60+
)
61+
prompt_template: Annotated[
62+
str, AfterValidator(contains_placeholder("keywords"))
63+
] = DEFAULT_QUERY_PROMPT_TEMPLATE
64+
similarity_top_k: int = Field(DEFAULT_SIMILARITY_TOP_K, gt=1)
3665

3766

3867
def is_available_model(model_name: str):
@@ -43,23 +72,17 @@ def is_available_model(model_name: str):
4372
return model_name
4473

4574

46-
class ChatgptConfig(BaseSettings):
47-
model_config = SettingsConfigDict(
48-
env_file=(".env", ".env.prod"),
49-
env_file_encoding="utf-8",
50-
case_sensitive=True,
51-
extra="ignore",
52-
)
53-
75+
class ChatGptConfig(BaseSettings):
5476
api_key: str = Field(validation_alias="OPENAI_API_KEY")
5577
model: Annotated[
5678
str,
57-
Field("gpt-3.5-turbo"),
79+
Field(DEFAULT_OPENAI_MODEL),
5880
AfterValidator(is_available_model),
5981
]
6082

6183

62-
class SummarizeQueryConfig(BaseModel):
84+
class SummarizeConfig(BaseModel):
85+
llm: ChatGptConfig = ChatGptConfig() # type: ignore
6386
system_template: str = DEFAULT_SYSTEM_TEMPLATE
6487
user_template: Annotated[
6588
str, AfterValidator(contains_placeholder("context_str", "query_str"))
@@ -71,6 +94,6 @@ class SummarizeQueryConfig(BaseModel):
7194
content_format: ContentFormat = ContentFormat.PLAIN
7295

7396

74-
class SummarizeConfig(BaseModel):
75-
chatgpt: ChatgptConfig
76-
query: SummarizeQueryConfig
97+
class RagConfig(BaseModel):
98+
retrieve: RetrieveConfig
99+
summarize: SummarizeConfig

0 commit comments

Comments
 (0)