|
9 | 9 | from langchain.chat_models import init_chat_model |
10 | 10 |
|
11 | 11 | from any_chatbot.indexing import embed_and_index_all_docs |
12 | | -from any_chatbot.tools import initialize_retrieve_tool |
| 12 | +from any_chatbot.tools import initialize_retrieve_tool, initialize_sql_toolkit |
| 13 | +from any_chatbot.prompts import system_message |
13 | 14 |
|
14 | 15 | load_dotenv() |
15 | 16 |
|
16 | 17 | BASE = Path(__file__).parent.parent.parent |
17 | 18 | DATA = BASE / "data" |
18 | 19 | OUTPUTS = BASE / "outputs" |
| 20 | +DATABASE = DATA / "csv_excel_to_db" / "my_data.duckdb" |
19 | 21 |
|
20 | 22 | # INDEXING |
21 | | -embeddings, vector_store = embed_and_index_all_docs(DATA) |
| 23 | +embeddings, vector_store = embed_and_index_all_docs(DATA, DATABASE) |
22 | 24 |
|
23 | 25 | # BUILD LLM |
24 | 26 | if not os.environ.get("GOOGLE_API_KEY"): |
|
27 | 29 |
|
28 | 30 | # LOAD TOOLS |
29 | 31 | retrieve_tool = initialize_retrieve_tool(vector_store) |
| 32 | +sql_tools = initialize_sql_toolkit(llm, DATABASE) |
30 | 33 |
|
31 | 34 | # BUILD AGENT |
32 | 35 | # build checkpointer |
33 | 36 | memory = MemorySaver() |
34 | 37 | # build agent |
35 | | -agent_executor = create_react_agent(llm, [retrieve_tool], checkpointer=memory) |
| 38 | +agent_executor = create_react_agent( |
| 39 | + llm, [retrieve_tool, *sql_tools], prompt=system_message, checkpointer=memory |
| 40 | +) |
36 | 41 | # save architecture graph image |
37 | 42 | png_bytes = agent_executor.get_graph().draw_mermaid_png() |
38 | 43 | # save to file |
|
52 | 57 | # ) |
53 | 58 |
|
54 | 59 | input_message = ( |
55 | | - "What colums does the excel have? once you found the answer, tell me there types too.\n\n" |
| 60 | + "How many employees were working for Nike? The informaton is in the pdf.\n\n" |
| 61 | + # "What colums does the excel have? once you found the answer, tell me there types too.\n\n" |
| 62 | + # "Once you have that answer, I want you to calculate the median for each column.\n\n" |
56 | 63 | "When you don't know while files the user is talking about, use the functional call to retrieve what data is available with a general prompt.\n\n" |
| 64 | + "You can refine your semantic search queries and try multiple times with different queries until you resonably determine the data is not available on the given documents.\n\n" |
57 | 65 | "Base your answers only on the retrieved information thorugh the functional call you have. You can retreive MULTIPLE TIMES" |
58 | 66 | ) |
59 | 67 |
|
|
0 commit comments