Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added examples/.DS_Store
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Avoid commit .DS_Store to git repo

Binary file not shown.
67 changes: 67 additions & 0 deletions examples/chatbot_with_rag/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Chatbot with RAG

* An RAG-Driven AI Chatbot that allows users to manage a private knowledge base by feeding private files, therefore obtaining more accurate, reliable, and secure answers about any private fields that simple LLMs cannot answer
* Use `pytidb` to connect to TiDB
* Use `openai` to deploy embedding model and response generation
* Use Streamlit as web ui

## Prerequisites
* Python 3.10+
* A TiDB Cloud Serverless cluster: Create a free cluster here: tidbcloud.com
* OpenAI API key: Go to Open AI to get your own API key
* Google Auth: Create a web application in Google Cloud Console (https://docs.streamlit.io/develop/tutorials/authentication/google)

## How to run

**Step1**: Clone the repo

```bash
git clone https://github.com/pingcap/pytidb.git
cd examples/chatbot_with_rag
```

**Step2**: Install the required packages and setup environment

```bash
python -m venv .venv
source .venv/bin/activate
pip install -r requirements.txt
```

**Step3**: Set up environment to connect to storage

As you are using a local TiDB server, you can set up the environment variable like this:
(You can also referense)

```bash
cat > .env <<EOF
OPENAI_API_KEY=
TIDB_HOST=localhost
TIDB_PORT=4000
TIDB_USERNAME=root
TIDB_PASSWORD=
TIDB_DATABASE=test
EOF
```

**Step4**: Set up Google Auth Platform info

```bash
cat > .streamlit/secrets.toml <<EOF
[auth]
redirect_uri = "http://localhost:8501/oauth2callback"
cookie_secret =
client_id =
client_secret =
server_metadata_url = "https://accounts.google.com/.well-known/openid-configuration"
EOF
```

**Step5**: Run the Streamlit app

```bash
streamlit run src/app.py
```

**Step6**: open the browser and visit `http://localhost:8501`

7 changes: 7 additions & 0 deletions examples/chatbot_with_rag/env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
OPENAI_API_KEY = "sk-xxxxxx"

TIDB_HOST=xxxxxx
TIDB_PORT=xxxxxx
TIDB_USERNAME=xxxxxx
TIDB_PASSWORD=xxxxxx
TIDB_DATABASE=test
11 changes: 11 additions & 0 deletions examples/chatbot_with_rag/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
streamlit
openai
pytidb

python-dotenv
streamlit-authenticator

sqlalchemy
litellm
PyPDF2
langchain_text_splitters
17 changes: 17 additions & 0 deletions examples/chatbot_with_rag/src/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import streamlit as st

doc_page = st.Page("page_files/doc_page.py", title = "Manage Uploaded Files")
main_page = st.Page("page_files/main_page.py", title = "Chats")
login_page = st.Page("page_files/login_page.py")


def main():
if not st.user.is_logged_in:
pg = st.navigation([login_page])
else:
pg = st.navigation([main_page, doc_page])
pg.run()

if __name__ == "__main__":
main()

73 changes: 73 additions & 0 deletions examples/chatbot_with_rag/src/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from typing import Optional, Any
from pytidb.schema import TableModel, Field
from pytidb.embeddings import EmbeddingFunction


from datetime import datetime
from sqlalchemy import Text, Column, DateTime, text

# models we use
text_embed = EmbeddingFunction("openai/text-embedding-3-small")
llm_model = "gpt-4o-mini"

class Chunk(TableModel, table=True):
__tablename__ = "chunks"
__table_args__ = {"extend_existing": True}

id: int = Field(primary_key=True)
text: str = Field(sa_type=Text)
document_id: int | None = Field(
foreign_key="documents.id",
ondelete="CASCADE",
index=True
)
text_vec: Optional[Any] = text_embed.VectorField(
source_field="text",
)

class Document(TableModel, table=True):
__tablename__ = "documents"
__table_args__ = {"extend_existing": True}
id: int = Field(primary_key=True)
user_id: int| None = Field(nullable=True)
document_name: str = Field(sa_type=Text)

# table chat_history that stores the references to chat sessions of all users. Each row stores info of a session that chat_message represents
class Chat(TableModel, table=True):
__tablename__ = "chat_history"

id: int = Field(primary_key=True)
user_id: int = Field(
foreign_key="users.id",
ondelete="CASCADE",
index=True
)
updated_at: datetime = Field(
sa_column=Column(
DateTime,
nullable=False,
server_default=text("CURRENT_TIMESTAMP"),
server_onupdate=text("CURRENT_TIMESTAMP")
)
)

# table chat_message is the sub-table of chat_history related by foreign key. Each row stores the message text, either a question asked by the user or the answer generated by the AI assistant
class ChatMessage(TableModel, table=True):
__tablename__ = "chat_message"

id: int = Field(primary_key=True)
chat_history_id: int = Field(
foreign_key="chat_history.id",
ondelete="CASCADE",
index=True
)
speaker_id: int
text: str = Field(sa_type=Text)

# table user_chart stores info of users
class User(TableModel, table=True):
__tablename__ = "users"

id: int = Field(primary_key=True)
email: str = Field(unique=True, index=True)
username: str | None = Field(default=None, max_length=225)
29 changes: 29 additions & 0 deletions examples/chatbot_with_rag/src/page_files/doc_page.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import streamlit as st
import json

from utils import list_file_names, delete_file, create_user

user_id = create_user(st.user.email)

st.title("Manage Uploaded Files")

def show_file_list(user_id: int):
files_json = list_file_names(user_id)
files_dict = json.loads(files_json)
files = files_dict.get("files", [])

if not files:
st.info("No files uploaded yet.")
return

for file_name in files:
col1, col2 = st.columns([4, 1])
with col1:
st.write(file_name)
with col2:
if st.button("Delete", key=file_name):
delete_file(user_id, file_name)
st.success(f"Deleted '{file_name}'")
st.rerun()

show_file_list(user_id)
8 changes: 8 additions & 0 deletions examples/chatbot_with_rag/src/page_files/login_page.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import streamlit as st

def login_screen():
st.header("This app is private.")
st.subheader("Please log in.")
st.button("Log in with Google", on_click=st.login)

login_screen()
10 changes: 10 additions & 0 deletions examples/chatbot_with_rag/src/page_files/main_page.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import streamlit as st
from ui import initialize_chat_state
from utils import create_user

st.set_page_config(page_title="Intramind", page_icon="💬")
user_id = create_user(st.user.email)
st.session_state.user_id = user_id
st.session_state.user_name = st.user.name
st.session_state.user_email = st.user.email
initialize_chat_state(user_id)
Loading