Skip to content

Commit 30551a0

Browse files
committed
Add usage instructions and example code to README
1 parent aec65a2 commit 30551a0

File tree

2 files changed

+263
-0
lines changed

2 files changed

+263
-0
lines changed

README.md

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ Connect to `database/sqlite/music.sqlite` database and use `openai` API to answe
4444
source .venv/bin/activate
4545
export OPENAI_API_KEY=sk-xxx
4646
export OPENAI_API_BASE_URL=https://api.openai.com/v1/
47+
export MODEL_NAME=gpt-4o-mini
4748
python camel_database_agent/cli.py \
4849
--database-url sqlite:///database/sqlite/music.sqlite
4950
```
@@ -126,6 +127,172 @@ python camel_database_agent/cli.py \
126127
--database-url postgresql://camel:123456@localhost:5432/school_scheduling
127128
```
128129

130+
## How to Use
131+
132+
```python
133+
import logging
134+
import os
135+
import sys
136+
import uuid
137+
138+
import pandas as pd
139+
from camel.embeddings import OpenAIEmbedding
140+
from camel.models import ModelFactory
141+
from camel.types import ModelPlatformType, ModelType
142+
from colorama import Fore
143+
from tabulate import tabulate
144+
145+
from camel_database_agent import DatabaseAgent
146+
from camel_database_agent.database.database_manager import DatabaseManager
147+
from camel_database_agent.database_base import TrainLevel
148+
149+
# Configure logging settings to show errors on stdout
150+
logging.basicConfig(
151+
level=logging.ERROR,
152+
format="%(message)s",
153+
handlers=[logging.StreamHandler(sys.stdout)],
154+
force=True,
155+
)
156+
# Set specific logging level for the application module
157+
logging.getLogger("camel_database_agent").setLevel(logging.INFO)
158+
logger = logging.getLogger(__name__)
159+
160+
# Configure pandas display options to show complete data
161+
pd.set_option("display.max_rows", None) # Show all rows
162+
pd.set_option("display.max_columns", None) # Show all columns
163+
pd.set_option("display.width", None) # Auto-detect display width
164+
pd.set_option("display.max_colwidth", None) # Show full content of each cell
165+
166+
# Define database connection string
167+
database_url = "sqlite:///database/sqlite/music.sqlite"
168+
169+
# Initialize the database agent with required components
170+
database_agent = DatabaseAgent(
171+
interactive_mode=True,
172+
database_manager=DatabaseManager(db_url=database_url),
173+
# Configure LLM model
174+
model=ModelFactory.create(
175+
model_platform=ModelPlatformType.OPENAI,
176+
model_type=ModelType.GPT_4O_MINI,
177+
api_key=os.getenv("OPENAI_API_KEY"),
178+
url=os.getenv("OPENAI_API_BASE_URL"),
179+
),
180+
# Configure embedding model
181+
embedding_model=OpenAIEmbedding(
182+
api_key=os.getenv("OPENAI_API_KEY"),
183+
url=os.getenv("OPENAI_API_BASE_URL"),
184+
)
185+
)
186+
187+
# Train agent's knowledge about the database schema
188+
database_agent.train_knowledge(
189+
# Training level for database knowledge extraction
190+
# MEDIUM level: Balances training time and knowledge depth by:
191+
# - Analyzing schema relationships
192+
# - Extracting representative sample data
193+
# - Generating a moderate number of query examples
194+
level=TrainLevel.MEDIUM,
195+
# Whether to retrain the knowledge base from scratch
196+
# If True: Forces regeneration of all database insights and examples
197+
# If False: Uses existing cached knowledge if available
198+
reset_train=False,
199+
)
200+
201+
# Display database overview information
202+
print(f"{Fore.GREEN}Database Overview")
203+
print("=" * 50)
204+
print(f"{database_agent.get_summary()}\n\n{Fore.RESET}")
205+
206+
# Display recommended example questions
207+
print(f"{Fore.GREEN}Recommendation Question")
208+
print("=" * 50)
209+
print(f"{database_agent.get_recommendation_question()}\n\n{Fore.RESET}")
210+
211+
# Execute a sample query using natural language
212+
response = database_agent.ask(session_id=str(uuid.uuid4()),
213+
question="List all playlists with more than 5 tracks")
214+
215+
# Handle and display the query results
216+
if response.success:
217+
if response.dataset is not None:
218+
# Format successful results as a table
219+
data = tabulate(
220+
tabular_data=response.dataset, headers='keys', tablefmt='psql'
221+
)
222+
print(f"{Fore.GREEN}{data}{Fore.RESET}")
223+
else:
224+
print(f"{Fore.GREEN}No results found.{Fore.RESET}")
225+
# Display the SQL that was generated
226+
print(f"{Fore.YELLOW}{response.sql}{Fore.RESET}")
227+
else:
228+
# Display error message if query failed
229+
print(f"{Fore.RED}+ {response.error}{Fore.RESET}")
230+
```
231+
232+
Output
233+
234+
```shell
235+
$ python example.py
236+
Successfully connected to database: sqlite:///database/sqlite/music.sqlite
237+
Workspace: /Users/zhanglei/camel_database_agent_data
238+
Train knowledge Took 0.1063 seconds
239+
Database Overview
240+
==================================================
241+
This database is designed to support a digital music platform, encompassing key features for artist management, employee administration, customer relations, and sales transactions.
242+
243+
### Key Features:
244+
245+
1. **Artist and Album Management**:
246+
The `Artist` and `Album` tables form the foundation for managing musical artists and their respective albums. Each artist is uniquely identified and can have multiple albums linked to them, allowing for comprehensive tracking of discographies.
247+
248+
2. **Employee and Customer Management**:
249+
The `Employee` table captures detailed information about staff, including their roles, contact details, and reporting structure, which is essential for organizational management. The `Customer` table holds customer profiles, enabling personalized service and facilitating communication through provided contact information.
250+
251+
3. **Media Type and Genre Classification**:
252+
The `MediaType` and `Genre` tables classify music tracks, enabling easy filtering and searching for users based on their preferences for specific genres or media types (e.g. digital downloads, CDs). This classification enhances user experience by making music discovery intuitive and engaging.
253+
254+
4. **Track and Playlist Management**:
255+
The `Track` table contains detailed attributes for individual music tracks, including duration and pricing. The `Playlist` table allows users to create and manage custom playlists, which can enrich user engagement and retention by providing a personalized listening experience.
256+
257+
5. **Sales Tracking and Invoicing**:
258+
The `Invoice` and `InvoiceLine` tables keep track of sales transactions, linking customers with the purchases they make. This structure not only supports effective billing through clear associations between invoices and the tracks purchased but also facilitates revenue tracking and financial reporting. The ability to view total sales and detailed line items allows for comprehensive sales analysis.
259+
260+
6. **Flexible Design for Data Relationships**:
261+
Through the use of foreign keys and relationships, such as the linkage between customers and their respective invoices, the database provides a robust structure for maintaining data integrity. The design ensures that all relevant information is easily accessible, promoting efficient database utilization.
262+
263+
Overall, this database structure provides a complete solution for managing a music platform, supporting critical business functions like customer engagement, sales tracking, and music cataloging. It enables organizations to operate efficiently, ensuring a seamless experience for both customers and internal staff.
264+
265+
266+
Recommendation Question
267+
==================================================
268+
List all playlists with more than 5 tracks.
269+
What are the sales figures for each month in 2009?
270+
Show each artist and the number of albums they've released.
271+
What is the total revenue generated from invoices for each customer?
272+
Which tracks belong to the album 'Ball to the Wall'?
273+
274+
275+
Question to SQL Took 2.8951 seconds
276+
Execute Query SQL Took 0.1036 seconds
277+
+----+--------------+--------------+
278+
| | PlaylistId | TrackCount |
279+
|----+--------------+--------------|
280+
| 0 | 1 | 3290 |
281+
| 1 | 3 | 213 |
282+
| 2 | 5 | 1477 |
283+
| 3 | 8 | 3290 |
284+
| 4 | 10 | 213 |
285+
| 5 | 11 | 39 |
286+
| 6 | 12 | 75 |
287+
| 7 | 13 | 25 |
288+
| 8 | 14 | 25 |
289+
| 9 | 15 | 25 |
290+
| 10 | 16 | 15 |
291+
| 11 | 17 | 26 |
292+
+----+--------------+--------------+
293+
SELECT PlaylistId, COUNT(TrackId) as TrackCount FROM PlaylistTrack GROUP BY PlaylistId HAVING TrackCount > 5;
294+
```
295+
129296
## Spider 2.0-Lite(Planned)
130297
131298
[Spider 2.0-Lite](https://github.com/xlang-ai/Spider2/tree/main/spider2-lite) is a text-to-SQL evaluation framework that includes 547 real enterprise-level database use cases, involving various database systems such as BigQuery, Snowflake, and SQLite, to assess the ability of language models in converting text to SQL in complex enterprise environments.

example.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import logging
2+
import os
3+
import sys
4+
import uuid
5+
6+
import pandas as pd
7+
from camel.embeddings import OpenAIEmbedding
8+
from camel.models import ModelFactory
9+
from camel.types import ModelPlatformType, ModelType
10+
from colorama import Fore
11+
from tabulate import tabulate
12+
13+
from camel_database_agent import DatabaseAgent
14+
from camel_database_agent.database.database_manager import DatabaseManager
15+
from camel_database_agent.database_base import TrainLevel
16+
17+
# Configure logging settings to show errors on stdout
18+
logging.basicConfig(
19+
level=logging.ERROR,
20+
format="%(message)s",
21+
handlers=[logging.StreamHandler(sys.stdout)],
22+
force=True,
23+
)
24+
# Set specific logging level for the application module
25+
logging.getLogger("camel_database_agent").setLevel(logging.INFO)
26+
logger = logging.getLogger(__name__)
27+
28+
# Configure pandas display options to show complete data
29+
pd.set_option("display.max_rows", None) # Show all rows
30+
pd.set_option("display.max_columns", None) # Show all columns
31+
pd.set_option("display.width", None) # Auto-detect display width
32+
pd.set_option("display.max_colwidth", None) # Show full content of each cell
33+
34+
# Define database connection string
35+
database_url = "sqlite:///database/sqlite/music.sqlite"
36+
37+
# Initialize the database agent with required components
38+
database_agent = DatabaseAgent(
39+
interactive_mode=True,
40+
database_manager=DatabaseManager(db_url=database_url),
41+
# Configure LLM model
42+
model=ModelFactory.create(
43+
model_platform=ModelPlatformType.OPENAI,
44+
model_type=ModelType.GPT_4O_MINI,
45+
api_key=os.getenv("OPENAI_API_KEY"),
46+
url=os.getenv("OPENAI_API_BASE_URL"),
47+
),
48+
# Configure embedding model
49+
embedding_model=OpenAIEmbedding(
50+
api_key=os.getenv("OPENAI_API_KEY"),
51+
url=os.getenv("OPENAI_API_BASE_URL"),
52+
),
53+
)
54+
55+
# Train agent's knowledge about the database schema
56+
database_agent.train_knowledge(
57+
# Training level for database knowledge extraction
58+
# MEDIUM level: Balances training time and knowledge depth by:
59+
# - Analyzing schema relationships
60+
# - Extracting representative sample data
61+
# - Generating a moderate number of query examples
62+
level=TrainLevel.MEDIUM,
63+
# Whether to retrain the knowledge base from scratch
64+
# If True: Forces regeneration of all database insights and examples
65+
# If False: Uses existing cached knowledge if available
66+
reset_train=False,
67+
)
68+
69+
# Display database overview information
70+
print(f"{Fore.GREEN}Database Overview")
71+
print("=" * 50)
72+
print(f"{database_agent.get_summary()}\n\n{Fore.RESET}")
73+
74+
# Display recommended example questions
75+
print(f"{Fore.GREEN}Recommendation Question")
76+
print("=" * 50)
77+
print(f"{database_agent.get_recommendation_question()}\n\n{Fore.RESET}")
78+
79+
# Execute a sample query using natural language
80+
response = database_agent.ask(
81+
session_id=str(uuid.uuid4()), question="List all playlists with more than 5 tracks"
82+
)
83+
84+
# Handle and display the query results
85+
if response.success:
86+
if response.dataset is not None:
87+
# Format successful results as a table
88+
data = tabulate(tabular_data=response.dataset, headers='keys', tablefmt='psql')
89+
print(f"{Fore.GREEN}{data}{Fore.RESET}")
90+
else:
91+
print(f"{Fore.GREEN}No results found.{Fore.RESET}")
92+
# Display the SQL that was generated
93+
print(f"{Fore.YELLOW}{response.sql}{Fore.RESET}")
94+
else:
95+
# Display error message if query failed
96+
print(f"{Fore.RED}+ {response.error}{Fore.RESET}")

0 commit comments

Comments
 (0)