Post Configuration
- -Generated Post
- -Your generated post will appear here
-Fill in the form and click Generate
-First, install the required packages:
-
-pip install openai langchain llama-index
-
- Load your documents using LlamaIndex:
-
-from llama_index.core import SimpleDirectoryReader
-documents = SimpleDirectoryReader("./data").load_data()
-
- Create a searchable index from your documents:
-
-from llama_index.core import VectorStoreIndex
-index = VectorStoreIndex.from_documents(documents)
-
- Set up the query interface:
-
-query_engine = index.as_query_engine()
-response = query_engine.query("What are the main types of AI agents?")
-
- Once you have a basic agent working, consider:
-- Fill the fields, click Generate to draft a - LinkedIn post, edit if needed, then Publish. -
-
- {JSON.stringify(publishResponse, null, 2)}
-
- /api/generate and
- /api/publish with server-to-server calls to n8n.
- {result}
-(Last {history_limit} operations)
- """, unsafe_allow_html=True) - - for i, entry in enumerate(st.session_state.processing_history, 1): - with st.expander( - f"Operation {i}: {entry['type']} - {entry['timestamp']}", - expanded=(i == 1) # Auto-expand most recent operation - ): - col1, col2 = st.columns([2, 1]) - with col1: - st.write("**Input:**", entry['input']) - st.write("**Result:**", entry['result']) - - with col2: - st.write("**Configurations:**") - st.write("- Character limit:", entry['configs']['character_limit']) - st.write("- Show steps:", "Yes" if entry['configs']['show_steps'] else "No") \ No newline at end of file diff --git a/Parag_Doshi/SummarizerInGradio/MedicalSummarizer_pdoshi.ipynb b/Parag_Doshi/SummarizerInGradio/MedicalSummarizer_pdoshi.ipynb deleted file mode 100644 index 2998e0e..0000000 --- a/Parag_Doshi/SummarizerInGradio/MedicalSummarizer_pdoshi.ipynb +++ /dev/null @@ -1,96 +0,0 @@ -import gradio as gr -from transformers import pipeline -import os - -# Step 1: Load model from Hugging Face (replace with your own) -# Example: "facebook/bart-large-cnn" or "t5-small" -MODEL_NAME = "Falconsai/medical_summarization" - -# Step 2: Use your Hugging Face token (you can set as environment variable) -# HF_TOKEN = os.getenv("HF_TOKEN") # or paste your token directly here (not recommended for public repos) - -from google.colab import userdata -# userdata.get('HF_TOKEN') - - -# Step 3: Define summarization function -# Load summarization pipeline -summarizer = pipeline("summarization", model=MODEL_NAME) - -def summarize_text(text): - if not text.strip(): - return "โ ๏ธ Please provide a medical document to summarize." - summary = summarizer(text, max_length=130, min_length=30, do_sample=False) - return summary[0]["summary_text"] - -# Step 4: Define export functionality -def export_summary(summary): - with open("medical_summary.txt", "w", encoding="utf-8") as f: - f.write(summary) - return "medical_summary.txt" - -# Step 4.1: Generate an image from text -#TTS = pipeline("text-to-image", model="SDXL LoRA Fine-tuning - ZB-Tech/Text-To-Image") # Corrected model name -#def generate_image(text): -# if not text.strip(): -# return "โ ๏ธ Please provide a medical document to generate an image from." - -# image = TTS(text)[0] # Access the image from the output list -# return image - -from huggingface_hub import InferenceClient - -client = InferenceClient( - provider="auto", - token=userdata.get('HF_TOKEN'),) - -# output is a PIL.Image object -#image = client.text_to_image( -# "Astronaut riding a horse", -# model="Mridulmadhok/Mridul_flux", -#) - - - -# Step 5: Build Gradio interface -with gr.Blocks(theme=gr.themes.Soft()) as demo: - gr.Markdown("## ๐ Medical Report Summarizer ") - gr.Markdown("Enter your medical report below and click **Summarize**.") - - with gr.Row(): - text_input = gr.Textbox( - label="Input Text", - placeholder="Paste a medical report here...", - lines=10 - ) - with gr.Row(): - summarize_button = gr.Button("๐ Summarize") - clear_button = gr.Button("๐งน Clear") - with gr.Row(): - output_box = gr.Textbox(label="Summary Output", lines=10) - - with gr.Row(): - gr.Markdown("## ๐ผ๏ธ Generate Image from Summary") # Added new section title - genimage_button = gr.Button("๐จ Generate Image - Not working") - # image_output = gr.Image(label="Generated Image") # Added image output component - - with gr.Row(): - export_button = gr.Button("๐พ Export Summary") - gr.Markdown("---") # Added separator - - # Actions - summarize_button.click(fn=summarize_text, inputs=text_input, outputs=output_box) - clear_button.click(fn=lambda: "", inputs=None, outputs=[text_input, output_box]) - export_button.click(fn=export_summary, inputs=output_box, outputs=gr.File()) - # genimage_button.click(fn=generate_image, inputs=output_box, outputs=image_output) # Changed input to output_box - - - - - # Optional: Toggle light/dark theme - gr.Markdown("---") - gr.Markdown("๐ Switch between light and dark themes using the toggle button in the top-right corner.") - -# Launch app -if __name__ == "__main__": - demo.launch(share=False) diff --git a/Pradeep_Kumar_Myakala/README.md b/Pradeep_Kumar_Myakala/README.md deleted file mode 100644 index c861dc0..0000000 --- a/Pradeep_Kumar_Myakala/README.md +++ /dev/null @@ -1 +0,0 @@ -# Pradeep_Kumar_Myakala diff --git a/Pramod_bhagat/Day 11 Agentic RAG/agentic_rag_app.py b/Pramod_bhagat/Day 11 Agentic RAG/agentic_rag_app.py deleted file mode 100644 index 7d28d6c..0000000 --- a/Pramod_bhagat/Day 11 Agentic RAG/agentic_rag_app.py +++ /dev/null @@ -1,309 +0,0 @@ -""" -Agentic RAG System with PDF, Wikipedia, and Web Search -Uses OpenRouter for LLM access -Run with: streamlit run app.py -""" - -import streamlit as st -from langchain.agents import AgentExecutor, create_react_agent -from langchain.tools import Tool -from langchain_openai import ChatOpenAI, OpenAIEmbeddings -from langchain import hub -from langchain_community.document_loaders import PyPDFLoader -from langchain_community.vectorstores import FAISS -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.utilities import WikipediaAPIWrapper -from langchain_community.tools import DuckDuckGoSearchRun -import os -from dotenv import load_dotenv -import tempfile - -# Load environment variables -load_dotenv() - -# ============================================ -# PDF RETRIEVER CLASS -# ============================================ -class PDFRetriever: - def __init__(self, pdf_path): - self.pdf_path = pdf_path - self.vectorstore = None - - def setup_vectorstore(self): - """Load PDF and create searchable vector database""" - try: - # Load PDF - loader = PyPDFLoader(self.pdf_path) - documents = loader.load() - - if not documents: - return False, "PDF is empty or could not be read" - - # Split into chunks - text_splitter = RecursiveCharacterTextSplitter( - chunk_size=1000, - chunk_overlap=200 - ) - splits = text_splitter.split_documents(documents) - - # Create searchable vector store - # Use OpenRouter for embeddings - embeddings = OpenAIEmbeddings( - openai_api_key=os.getenv("OPENROUTER_API_KEY"), - openai_api_base="https://openrouter.ai/api/v1" - ) - self.vectorstore = FAISS.from_documents(splits, embeddings) - - return True, f"Successfully loaded {len(documents)} pages from PDF" - except Exception as e: - return False, f"Error loading PDF: {str(e)}" - - def search(self, query: str, k: int = 3) -> str: - """Search PDF for relevant information""" - if not self.vectorstore: - return "NO_ANSWER: PDF not loaded properly" - - try: - # Find most relevant chunks - docs = self.vectorstore.similarity_search(query, k=k) - - if not docs: - return "NO_ANSWER: Could not find relevant information in PDF" - - # Combine results - context = "\n\n".join([doc.page_content for doc in docs]) - return f"FOUND IN PDF:\n{context}" - except Exception as e: - return f"NO_ANSWER: Search error - {str(e)}" - -# ============================================ -# TOOL FUNCTIONS (Simple and Clear) -# ============================================ - -def search_pdf(query: str) -> str: - """ - Tool 1: Search the uploaded PDF document - This is the PRIMARY source - always check here first - """ - if 'pdf_retriever' not in st.session_state or st.session_state.pdf_retriever is None: - return "NO_ANSWER: No PDF loaded. Please upload a PDF first." - - result = st.session_state.pdf_retriever.search(query) - return result - -def search_wikipedia(query: str) -> str: - """ - Tool 2: Search Wikipedia - Use this when PDF doesn't have the answer - """ - try: - wikipedia = WikipediaAPIWrapper() - result = wikipedia.run(query) - return f"FOUND ON WIKIPEDIA:\n{result}" - except Exception as e: - return f"NO_ANSWER: Wikipedia search failed - {str(e)}" - -def search_web(query: str) -> str: - """ - Tool 3: Search the web using DuckDuckGo - Use this as a last resort when PDF and Wikipedia don't help - """ - try: - search = DuckDuckGoSearchRun() - result = search.run(query) - return f"FOUND ON WEB:\n{result}" - except Exception as e: - return f"NO_ANSWER: Web search failed - {str(e)}" - -# ============================================ -# AGENT SETUP (The Brain of the System) -# ============================================ - -def create_agentic_rag(): - """ - Create an agent that intelligently decides which tool to use - - Decision Logic: - 1. First try PDF (primary source) - 2. If PDF has no answer, try Wikipedia - 3. If Wikipedia has no answer, try web search - """ - - # Setup LLM with OpenRouter - llm = ChatOpenAI( - model="openai/gpt-4o", - openai_api_key=os.getenv("OPENROUTER_API_KEY"), - openai_api_base="https://openrouter.ai/api/v1", - temperature=0 - ) - - # Define tools for the agent - tools = [ - Tool( - name="SearchPDF", - func=search_pdf, - description="""ALWAYS USE THIS FIRST! Search the PDF document for information. - If the result contains 'NO_ANSWER', then try other tools.""" - ), - Tool( - name="SearchWikipedia", - func=search_wikipedia, - description="""Use this when SearchPDF returns 'NO_ANSWER'. - Search Wikipedia for general knowledge and factual information.""" - ), - Tool( - name="SearchWeb", - func=search_web, - description="""Use this as a LAST RESORT when both SearchPDF and SearchWikipedia return 'NO_ANSWER'. - Search the web for current information.""" - ) - ] - - # Get the ReAct prompt template - prompt = hub.pull("hwchase17/react") - - # Create the agent - agent = create_react_agent(llm, tools, prompt) - - # Create agent executor - agent_executor = AgentExecutor( - agent=agent, - tools=tools, - verbose=True, - handle_parsing_errors=True, - max_iterations=5 - ) - - return agent_executor - -# ============================================ -# STREAMLIT UI -# ============================================ - -def main(): - st.set_page_config( - page_title="Agentic RAG System", - page_icon="๐ค", - layout="wide" - ) - - st.title("๐ค Agentic RAG System") - st.markdown(""" - This intelligent system searches for answers in this order: - 1. **Your PDF** (primary source) - 2. **Wikipedia** (if PDF doesn't have the answer) - 3. **Web Search** (if Wikipedia doesn't have the answer) - """) - - # Initialize session state - if 'pdf_retriever' not in st.session_state: - st.session_state.pdf_retriever = None - if 'agent' not in st.session_state: - st.session_state.agent = None - if 'messages' not in st.session_state: - st.session_state.messages = [] - - # Sidebar for PDF upload and sample PDFs - with st.sidebar: - st.header("๐ Document Upload") - - # Sample PDFs section - st.subheader("Sample PDFs") - st.markdown(""" - Download sample PDFs to test: - - [AI Research Paper](https://arxiv.org/pdf/1706.03762.pdf) - "Attention is All You Need" - - [Python Tutorial](https://www.python.org/ftp/python/doc/3.11.0/python-3.11.0-docs-pdf-a4.zip) - - [Climate Report](https://www.ipcc.ch/report/ar6/wg1/downloads/report/IPCC_AR6_WGI_SPM.pdf) - """) - - st.divider() - - # File upload - uploaded_file = st.file_uploader( - "Upload your PDF", - type=['pdf'], - help="Upload a PDF document to search" - ) - - if uploaded_file is not None: - # Save uploaded file temporarily - with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file: - tmp_file.write(uploaded_file.getvalue()) - tmp_path = tmp_file.name - - # Load PDF - with st.spinner("Loading PDF..."): - pdf_retriever = PDFRetriever(tmp_path) - success, message = pdf_retriever.setup_vectorstore() - - if success: - st.session_state.pdf_retriever = pdf_retriever - st.success(message) - - # Create agent - st.session_state.agent = create_agentic_rag() - st.success("โ Agent ready!") - else: - st.error(message) - - # Clean up temp file - os.unlink(tmp_path) - - st.divider() - - # API Key status - if os.getenv("OPENROUTER_API_KEY"): - st.success("โ OpenRouter API Key loaded") - else: - st.error("โ OpenRouter API Key not found in .env") - - - # Main chat interface - st.divider() - - # Display chat messages - for message in st.session_state.messages: - with st.chat_message(message["role"]): - st.markdown(message["content"]) - - # Chat input - if prompt := st.chat_input("Ask a question..."): - # Check if agent is ready - if st.session_state.agent is None: - st.error("โ ๏ธ Please upload a PDF first!") - return - - # Add user message to chat - st.session_state.messages.append({"role": "user", "content": prompt}) - with st.chat_message("user"): - st.markdown(prompt) - - # Get agent response - with st.chat_message("assistant"): - with st.spinner("๐ค Thinking..."): - try: - # Show agent's thinking process - with st.expander("๐ Agent's Decision Process", expanded=False): - response = st.session_state.agent.invoke({"input": prompt}) - st.text(response.get("intermediate_steps", "")) - - # Get final answer - answer = response["output"] - st.markdown(answer) - - # Add to messages - st.session_state.messages.append({"role": "assistant", "content": answer}) - - except Exception as e: - error_msg = f"โ Error: {str(e)}" - st.error(error_msg) - st.session_state.messages.append({"role": "assistant", "content": error_msg}) - - # Clear chat button - if st.session_state.messages: - if st.button("๐๏ธ Clear Chat History"): - st.session_state.messages = [] - st.rerun() - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/Pramod_bhagat/Day 11 Agentic RAG/readme.md b/Pramod_bhagat/Day 11 Agentic RAG/readme.md deleted file mode 100644 index e33c194..0000000 --- a/Pramod_bhagat/Day 11 Agentic RAG/readme.md +++ /dev/null @@ -1,199 +0,0 @@ -# ๐ค Agentic RAG System - -An intelligent Retrieval-Augmented Generation (RAG) system that automatically searches multiple sources to answer your questions. The agent intelligently decides which source to query based on availability and relevance. - -## ๐ Features - -- **Intelligent Multi-Source Search**: Automatically searches PDF documents, Wikipedia, and the web in order of priority -- **Agentic Decision Making**: Uses a ReAct agent that thinks through which tools to use -- **PDF Processing**: Upload and search through your own PDF documents -- **Vector Search**: Uses FAISS for semantic similarity search within PDFs -- **Interactive Chat Interface**: Beautiful Streamlit UI with chat history -- **Transparent Reasoning**: See the agent's decision-making process in real-time - -## ๐ How It Works - -The system follows an intelligent search hierarchy: - -1. **Primary Source - Your PDF**: Always checks uploaded PDF first -2. **Secondary Source - Wikipedia**: Falls back to Wikipedia for general knowledge -3. **Tertiary Source - Web Search**: Uses DuckDuckGo as last resort for current information - -The agent automatically decides when to move to the next source based on whether the current source has relevant information. - -## ๐ Prerequisites - -- Python 3.8 or higher -- OpenRouter API key (for LLM access) -- OpenAI API key (for embeddings) - -## ๐ Installation - -1. **Clone the repository** -```bash -git clone{st.session_state.current_persona}
-First, install the required packages:
-
-pip install openai langchain llama-index
-
- Load your documents using LlamaIndex:
-
-from llama_index.core import SimpleDirectoryReader
-documents = SimpleDirectoryReader("./data").load_data()
-
- Create a searchable index from your documents:
-
-from llama_index.core import VectorStoreIndex
-index = VectorStoreIndex.from_documents(documents)
-
- Set up the query interface:
-
-query_engine = index.as_query_engine()
-response = query_engine.query("What are the main types of AI agents?")
-
- Once you have a basic agent working, consider:
-- {current_p.get('description', 'Custom personality')} | Model: {st.session_state.model_choice} -
-- {current_p.get('description', 'Custom personality')} | Model: {st.session_state.model_choice} -
-Create engaging LinkedIn posts that resonate with your audience. Choose your style, tone, and let AI craft the perfect post.
-Your generated post will appear here
-Fill in the form and click Generate
-- Professional-grade Retrieval-Augmented Generation with full configurability -
-`) โ *Confidence:* <0โ1 or Low/Med/High>
-
- ๐ฏ **Translation (โ ):**
-
-
- ๐ **Alternative translations (if relevant):**
- -
- -
- -
-
- ๐ก **Cultural / Regional note (if relevant):**
-
- ```
-
-3. **Language Selection UI**
- - Sidebar selectbox for **Target language** (e.g., English, Spanish, French, German, Hindi, Japanese, etc.). Map to ISO codes.
- - Toggle for **Bidirectional (Auto)** vs **Source โ Target**. When fixed, show a source selector.
-
-4. **Confidence Display**
- - Show numeric confidence or a mapped label (โฅ0.85=High, 0.6โ0.84=Medium, <0.6=Low) next to the detected language.
-
-5. **Edge Cases**
- - Low confidence โ ask user to confirm source language.
- - Nonโtranslatable input (code, numbers, URLs) โ respond briefly and skip translation.
- - Preserve punctuation, emojis, and line breaks. Keep code blocks unchanged.
-
----
-
-### **Instruction 3 โ Challenge 2: Personality Selector (Presets + Custom)**
-1. **Personas (at least 4 presets + Custom)**
- - **professional** โ Formal, structured, businessโfocused; expertise: strategy, comms; tone: polite, efficient, resultsโoriented.
- - **creative** โ Imaginative, expressive, inspiring; expertise: storytelling, content; tone: enthusiastic, artistic, encouraging.
- - **technical** โ Precise, detailed, codeโfocused; expertise: programming, systems; tone: analytical, educational, stepโwise.
- - **friend** โ Casual, supportive, conversational; expertise: general chat; tone: warm, empathetic, upbeat.
- - **custom** โ User-specified `style`, `expertise`, `tone` (text inputs).
-
-2. **Dynamic System Prompts**
- - Each persona defines its own style directives, formatting preferences, and guardrails.
- - Allow **midโconversation switching**; when persona changes, refresh the system prompt for subsequent turns while preserving prior history.
-
-3. **UI & Persistence**
- - Sidebar dropdown for persona selection + short descriptions (tooltip or help text).
- - Top of main chat shows current persona badge (icon + name).
- - Store `personality_key` and `custom_persona` in `meta` and persist per chat.
-
-4. **Example Behavior**
- - *User:* โHow do I handle a difficult client meeting?โ
- - **Professional:** numbered strategy steps, clear action items.
- - **Creative:** metaphorical, vivid, motivational.
- - **Technical:** root-cause analysis, checklists, structured plan.
- - **Friend:** empathetic, conversational tips.
-
----
-
-### **Instruction 4 โ Challenge 3: Export Functionality (TXT / JSON / CSV)**
-1. **Data Processing**
- - Convert `st.session_state.messages` into:
- - **TXT** (humanโreadable transcript + metadata + stats).
- - **JSON** (structured `export_metadata`, `conversation[]`, `statistics`).
- - **CSV** (`Message_ID, Timestamp, Role, Content, Character_Count, Word_Count`).
-
-2. **Metadata & Statistics**
- - Include: `export_timestamp`, `format_version`, `session_id`, `total_messages`, `session_duration_minutes`, `model`, `mode`, `target_language`, `personality_key`, `bidirectional`, `last_detected_source`, `last_confidence`.
- - Stats: `user_messages`, `assistant_messages`, `total_characters`, `average_message_length`.
-
-3. **Download Interface**
- - `st.download_button` per chosen format; filenames like `chat__.txt|json|csv`.
- - Optional filters: date range and role (user/assistant/all).
-
-4. **Formatting Rules**
- - **TXT**: human timestamps, separators, preserve markdown & line breaks.
- - **JSON**: valid JSON; machineโreadable timestamps (ISO 8601).
- - **CSV**: proper quoting for commas/quotes/newlines.
-
-5. **Edge Cases**
- - Empty conversation โ disable export with hint.
- - Large files โ build content in memory safely; avoid excessive RAM.
- - Always handle exceptions and surface message in **Output / Errors** textbox.
-
----
-
-## **Getting Started (Implementation Order)**
-1. Copy your existing workshop chatbot to this structure.
-2. Implement **Base App (Instruction 1)**: page config, client, session state, persistence helpers, sidebar history (aligned list), chat loop with streaming + errors.
-3. Add **Mode switch** + **Target language** + **Direction toggle**.
-4. Implement **Translation Mode (Instruction 2)**: twoโstage detect โ translate, cultural notes, alternatives, confidence & history.
-5. Add **Personality Selector (Instruction 3)** with dynamic system prompts and midโchat switching.
-6. Implement **Export (Instruction 4)** with TXT โ JSON โ CSV progression.
-7. Final polish: stable rename UX, active persona badge, tidy status/error box, model dropdown.
-
----
-
-## **Example Interactions (for QA)**
-
-**A) Translation Mode**
-- **Input:** `Bonjour, comment allez-vous ?`
- **Output:**
- - ๐ Detected Language: *French (fr)* โ Confidence: High (โ0.95)
- - ๐ฏ Translation (English): โHello, how are you?โ
- - ๐ Alternative: โHi, how are you doing?โ
- - ๐ก Cultural Note: Formal register; casual is โSalut, รงa va ?โ.
-
-- **Input:** `I love this weather` (Target: Spanish, Auto/Bidirectional)
- **Output:**
- - ๐ Detected Language: *English (en)* โ Confidence: High
- - ๐ฏ Translation (Spanish): โMe encanta este clima.โ
- - ๐ Alternative: โAdoro este tiempo.โ (more emphatic)
- - ๐ก Regional Note: In Mexico you might hear โEstรก padrรญsimo el clima.โ
-
-**B) Personality Selector**
-- **Professional:** Provide 4โstep structured plan, crisp action items.
-- **Creative:** Vivid metaphors, inspiring tone, playful emojis allowed.
-- **Technical:** Stepโbyโstep, precise, code blocks where relevant.
-- **Friend:** Warm, encouraging, simple language.
-
-**C) Export**
-- TXT example header:
- ```text
- Chat Export โ 2024โ01โ15 14:55
- ========================================
- Session Information:
- - Total Messages: 12
- - Duration: 25 minutes
- - Model: openrouter/anthropic/claude-3.5-sonnet
- - Mode: translation (bidirectional)
- - Target Language: English
- - Last Detected Source: French (0.92)
- ----------------------------------------
- [14:30:15] You: Hello! How can I help you today?
- [14:30:22] Assistant: Hello! I'm here to help youโฆ
- ```
-
----
-
-## **Success Criteria**
-- โ
Automatically detects input language and shows confidence.
-- โ
Translates accurately to the selected target language.
-- โ
Supports **bidirectional** flow and remembers the previous other language.
-- โ
Provides **cultural/regional notes** and **alternative translations** when relevant.
-- โ
Maintains multiโchat persistent history with stable, aligned sidebar list and inline rename.
-- โ
Exports clean **TXT/JSON/CSV** with metadata and statistics; supports optional filters.
-- โ
Handles all errors gracefully and displays them in the **Output / Errors** area.
-- โ
Clear, modern UI/UX using Streamlit chat primitives.
-
----
-
-## **Notes:**
-- No background/asynchronous claims โ all work is performed within the current request/response cycle.
-- Keep outputs concise in normal chat; expand detail when explicitly asked.
-- Use safe content practices; refuse disallowed content with a brief explanation and safer alternatives where relevant.
-- Preserve code blocks and formatting in translations; donโt hallucinate facts; mark uncertainties.
-- Sidebar **chat history** must be a simple, clean list (no misaligned icons; no shifting on rename).
-- Provide the complete `app.py`, `requirements.txt`, and example `.streamlit/secrets.toml` in your reply.
diff --git a/Vivek_Trivedi/ChatBot/app.py b/Vivek_Trivedi/ChatBot/app.py
deleted file mode 100644
index 207b654..0000000
--- a/Vivek_Trivedi/ChatBot/app.py
+++ /dev/null
@@ -1,1167 +0,0 @@
-import os
-import json
-import uuid
-from datetime import datetime, timezone
-from typing import Any, Dict, List, Optional, Tuple
-
-import streamlit as st
-import re
-import csv
-import html
-try:
- from slugify import slugify # from python-slugify
-except Exception:
- def slugify(text: str) -> str:
- s = re.sub(r"\s+", "-", text.strip().lower())
- s = re.sub(r"[^a-z0-9\-]+", "", s)
- return s or "chat"
-try:
- from dateutil import parser as dtparser
-except Exception:
- class _DTParser:
- @staticmethod
- def isoparse(s: str):
- return datetime.fromisoformat(s.replace("Z", "+00:00"))
- dtparser = _DTParser()
-try:
- import pandas as pd
-except Exception:
- pd = None
-# Note: we import the OpenAI client below as _Client for compatibility
-
-apptitle = "Manthan"
-# === PART 1: Bootstrap ===
-st.set_page_config(page_title=apptitle, page_icon="๐ค", layout="wide")
-
-APP_TITLE = apptitle
-HISTORY_DIR = "chat-history"
-DEFAULT_MODEL = "anthropic/claude-3.5-sonnet"
-MODEL_OPTIONS = [
- "anthropic/claude-3.5-sonnet",
- "openai/gpt-4o-mini",
- "meta-llama/llama-3.1-405b-instruct",
-]
-
-# Languages
-LANG_MAP = {
- "English": "en",
- "Spanish": "es",
- "French": "fr",
- "German": "de",
- "Hindi": "hi",
- "Japanese": "ja",
- "Chinese (Simplified)": "zh",
- "Portuguese": "pt",
- "Italian": "it",
- "Korean": "ko",
- "Russian": "ru",
-}
-CODE_TO_NAME = {v: k for k, v in LANG_MAP.items()}
-
-def utcnow_iso() -> str:
- return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
-
-def ensure_dirs():
- os.makedirs(HISTORY_DIR, exist_ok=True)
-
-def file_slug(title: str) -> str:
- s = slugify(title or "New chat")
- return s or "chat"
-
-def chat_filepath(chat_id: str, title: str) -> str:
- return os.path.join(HISTORY_DIR, f"{chat_id}__{file_slug(title)}.json")
-
-def list_chats() -> List[Dict[str, Any]]:
- ensure_dirs()
- rows: List[Dict[str, Any]] = []
- for name in os.listdir(HISTORY_DIR):
- if not name.endswith(".json"):
- continue
- path = os.path.join(HISTORY_DIR, name)
- try:
- with open(path, "r", encoding="utf-8") as f:
- data = json.load(f)
- rows.append({
- "id": data.get("id"),
- "title": data.get("title", "New chat"),
- "created": data.get("created", ""),
- "path": path,
- })
- except Exception:
- continue
- # Stable ordering bound to creation time only (newest first).
- # Do NOT re-order on rename or message updates.
- rows.sort(key=lambda r: (r.get("created", ""), r.get("id") or ""), reverse=True)
- return rows
-
-def load_chat_file(path: str) -> Dict[str, Any]:
- with open(path, "r", encoding="utf-8") as f:
- return json.load(f)
-
-def find_chat_path_by_id(chat_id: str) -> Optional[str]:
- for it in list_chats():
- if it.get("id") == chat_id:
- return it.get("path")
- return None
-
-def load_chat(chat_id: str) -> Optional[Dict[str, Any]]:
- p = find_chat_path_by_id(chat_id)
- if not p:
- return None
- try:
- return load_chat_file(p)
- except Exception as e:
- set_status_text(e)
- return None
-
-def save_chat(state: Dict[str, Any]) -> None:
- ensure_dirs()
- chat_id = state.get("id") or str(uuid.uuid4())
- title = state.get("title") or "New chat"
- path = chat_filepath(chat_id, title)
- try:
- with open(path, "w", encoding="utf-8") as f:
- json.dump(state, f, ensure_ascii=False, indent=2)
- except Exception as e:
- set_status_text(e)
-
-def rename_chat_file(old_id: str, new_title: str) -> Optional[str]:
- old_path = find_chat_path_by_id(old_id)
- if not old_path:
- return None
- new_path = chat_filepath(old_id, new_title)
- try:
- with open(old_path, "r", encoding="utf-8") as f:
- data = json.load(f)
- data["title"] = new_title
- with open(new_path, "w", encoding="utf-8") as f:
- json.dump(data, f, ensure_ascii=False, indent=2)
- if os.path.abspath(new_path) != os.path.abspath(old_path):
- try:
- os.remove(old_path)
- except Exception:
- pass
- return new_path
- except Exception as e:
- set_status_text(e)
- return None
-
-def delete_chat(chat_id: str) -> None:
- p = find_chat_path_by_id(chat_id)
- if p and os.path.exists(p):
- try:
- os.remove(p)
- except Exception as e:
- set_status_text(e)
-
-def set_status_text(exc: Exception | str):
- msg = f"{type(exc).__name__}: {exc}" if isinstance(exc, Exception) else str(exc)
- st.session_state["status_text"] = (st.session_state.get("status_text", "").strip() + "\n" + msg).strip()
-
-def init_state():
- ensure_dirs()
- st.session_state.setdefault("chat_id", None)
- st.session_state.setdefault("messages", [])
- st.session_state.setdefault("meta", {
- "model": DEFAULT_MODEL,
- "mode": "standard",
- "target_language": "English",
- "last_detected_source": None,
- "last_confidence": None,
- "bidirectional": True,
- "previous_other_lang": None,
- "personality_key": "professional",
- "custom_persona": {"style": "", "expertise": "", "tone": ""},
- })
- st.session_state.setdefault("all_chats", list_chats())
- st.session_state.setdefault("rename_mode", False)
- st.session_state.setdefault("rename_value", "")
- st.session_state.setdefault("status_text", "")
- st.session_state.setdefault("source_language_fixed", None)
- st.session_state.setdefault("mode_radio", st.session_state["meta"].get("mode", "standard"))
- # Inline title edit state for main header
- st.session_state.setdefault("edit_title_mode", False)
- st.session_state.setdefault("title_edit_input", "")
- # Inline editing states
- st.session_state.setdefault("edit_title_mode", False)
- st.session_state.setdefault("edit_message_index", None)
- st.session_state.setdefault("edit_message_buffer", "")
-
-# === PART 2: Personas and prompts ===
-PERSONAS = {
- "professional": {"label": "Professional", "icon": "๐ผ", "desc": "Formal, structured, business-focused.", "style": "You are formal, concise, and business-oriented. Provide numbered steps and clear action items."},
- "creative": {"label": "Creative", "icon": "๐จ", "desc": "Imaginative, expressive, inspiring.", "style": "You are imaginative and expressive. Use vivid examples and an encouraging tone. Emojis allowed sparingly."},
- "technical": {"label": "Technical", "icon": "๐ง ", "desc": "Precise, detailed, code-focused.", "style": "You are precise and analytical. Provide step-by-step explanations and fenced code blocks when needed."},
- "friend": {"label": "Friend", "icon": "๐ค", "desc": "Casual, supportive, conversational.", "style": "You are warm, empathetic, and practical, with simple language."},
- "custom": {"label": "Custom", "icon": "โจ", "desc": "User-defined style/tone/expertise.", "style": ""},
-}
-
-def persona_block(meta: Dict[str, Any]) -> str:
- key = meta.get("personality_key", "professional")
- if key == "custom":
- c = meta.get("custom_persona", {}) or {}
- return "\n".join([
- "Persona: Custom.",
- f"Style: {c.get('style') or 'unspecified'}.",
- f"Expertise: {c.get('expertise') or 'unspecified'}.",
- f"Tone: {c.get('tone') or 'neutral'}.",
- ])
- p = PERSONAS.get(key, PERSONAS["professional"])
- return f"Persona: {p['label']}. {p['style']}"
-
-def core_guardrails_block() -> str:
- return (
- "You are a careful, concise assistant. Be accurate and do not hallucinate."
- " Show errors verbatim when asked. Never claim background/asynchronous work."
- " Refuse unsafe content briefly with safer alternatives. Preserve formatting and code blocks."
- )
-
-def translator_block(meta: Dict[str, Any]) -> str:
- target_name = meta.get("target_language", "English")
- return (
- "You are a professional translator. Detect source language and translate to the target.\n"
- "Goals: detect language, estimate confidence, translate, propose 1-3 alternatives, add a short cultural/usage note."
- " Preserve meaning, tone, register, line breaks, punctuation; keep code blocks unchanged.\n"
- "Output template (markdown):\n"
- "๐ **Detected Language:** (``) โ *Confidence:* <0โ1 or Low/Med/High>\n\n"
- f"๐ฏ **Translation (โ {target_name}):**\n\n\n"
- "๐ **Alternative translations (if relevant):**\n- \n- \n- \n\n"
- "๐ก **Cultural / Regional note (if relevant):**\n\n"
- )
-
-def build_system_prompt(meta: Dict[str, Any]) -> str:
- parts = [core_guardrails_block(), persona_block(meta)]
- if meta.get("mode", "standard") == "translation":
- parts.append(translator_block(meta))
- else:
- parts.append("You are a helpful assistant. Be brief by default. Use triple backticks for code. Avoid purple prose.")
- return "\n\n".join(parts)
-
-# === PART 3: Client and API helpers ===
-try:
- from openai import OpenAI as _Client
-except Exception:
- _Client = None
-
-def get_client() -> Optional[_Client]:
- try:
- api_key = st.secrets.get("OPENROUTER_API_KEY")
- except Exception:
- api_key = None
- if not api_key:
- set_status_text("Missing OPENROUTER_API_KEY in st.secrets.")
- return None
- if _Client is None:
- set_status_text("openai package not available.")
- return None
- try:
- return _Client(
- base_url="https://www.openrouter.ai/api/v1",
- api_key=api_key,
- default_headers={"HTTP-Referer": "http://localhost:8501", "X-Title": "My ChatBot"},
- )
- except Exception as e:
- set_status_text(e)
- return None
-
-def to_api_messages(system_prompt: str, history: List[Dict[str, Any]], user_content: Optional[str] = None) -> List[Dict[str, str]]:
- msgs: List[Dict[str, str]] = []
- if system_prompt:
- msgs.append({"role": "system", "content": system_prompt})
- # Avoid duplicating prior system prompts; we already injected the fresh system_prompt above.
- for m in history:
- r = m.get("role")
- if r in ("user", "assistant"):
- msgs.append({"role": r, "content": m.get("content", "")})
- if user_content is not None:
- msgs.append({"role": "user", "content": user_content})
- return msgs
-
-def _extract_delta_text(chunk: Any) -> Optional[str]:
- try:
- choice = chunk.choices[0]
- delta = getattr(choice, "delta", None)
- if isinstance(delta, dict):
- return delta.get("content")
- return getattr(delta, "content", None)
- except Exception:
- return None
-
-
-def stream_completion(client: _Client, model: str, messages: List[Dict[str, str]]) -> str:
- full = ""
- try:
- stream = client.chat.completions.create(model=model, messages=messages, stream=True)
- chat_ctx = None
- area = None
- for chunk in stream:
- delta_text = _extract_delta_text(chunk)
- if delta_text:
- if chat_ctx is None:
- # Create the assistant bubble only on first token to avoid empty bubbles
- chat_ctx = st.chat_message("assistant")
- area = chat_ctx.empty()
- full += delta_text
- if area is not None:
- area.markdown(full)
- except Exception as e:
- set_status_text(e)
- # Fallback: if nothing streamed (SDK/compat quirk), do a non-stream call
- if not full:
- try:
- resp = client.chat.completions.create(model=model, messages=messages, stream=False)
- full = getattr(resp.choices[0].message, "content", "") or ""
- if full:
- with st.chat_message("assistant"):
- st.markdown(full)
- except Exception as e:
- set_status_text(e)
- return ""
- return full
-
-
-# -----------------------------
-# Summarizer helpers
-# -----------------------------
-def _truncate_by_chars(msgs: List[Dict[str, str]], max_chars: int = 16000) -> List[Dict[str, str]]:
- total = sum(len(m.get("content", "")) for m in msgs)
- if total <= max_chars:
- return msgs
- out: List[Dict[str, str]] = []
- running = 0
- # Keep the last messages, ensure we keep the initial system
- sys = [m for m in msgs if m.get("role") == "system"]
- rest = [m for m in msgs if m.get("role") != "system"]
- # Start with the latest ones
- rest_rev = list(reversed(rest))
- chosen_rev: List[Dict[str, str]] = []
- for m in rest_rev:
- c = len(m.get("content", ""))
- if running + c > max_chars:
- break
- chosen_rev.append(m)
- running += c
- # Rebuild ordered: system first, then chronologically for chosen
- chosen = list(reversed(chosen_rev))
- out.extend(sys[:1])
- out.extend(chosen)
- return out
-
-
-def build_summary_messages(client_meta: Dict[str, Any], user_prompt: str) -> List[Dict[str, str]]:
- # Use current persona/mode by default
- system_prompt = build_system_prompt(client_meta)
- # Include full conversation (system + user + assistant)
- msgs: List[Dict[str, str]] = []
- if system_prompt:
- msgs.append({"role": "system", "content": system_prompt})
- for m in st.session_state.get("messages", []):
- role = m.get("role")
- if role in ("system", "user", "assistant"):
- msgs.append({"role": role, "content": m.get("content", "")})
- # Append summarizer instruction + user prompt
- final_user = (
- "Summarize the entire conversation so far. Follow the current persona and mode unless this prompt says otherwise.\n"
- f"User request: {user_prompt}"
- )
- msgs.append({"role": "user", "content": final_user})
- return _truncate_by_chars(msgs)
-
-def non_stream_completion(client: _Client, model: str, messages: List[Dict[str, str]]) -> str:
- try:
- resp = client.chat.completions.create(model=model, messages=messages, stream=False)
- return resp.choices[0].message.content or ""
- except Exception as e:
- set_status_text(e)
- return ""
-
-def main():
- init_state()
- ensure_current_chat()
- client = get_client()
-
- # Sidebar
- # Compact sidebar list look: smaller buttons and icons
- st.markdown(
- """
-
- """,
- unsafe_allow_html=True,
- )
- sidebar_history_latest()
- sidebar_controls()
- if st.session_state.get("show_delete_modal") and st.session_state.get("delete_chat_id"):
- open_delete_dialog()
-
- st.title(APP_TITLE)
- render_chat_area(client)
-
- errors = (st.session_state.get("status_text", "") or "").strip()
- if errors:
- with st.expander("Output / Errors", expanded=True):
- st.text_area("", value=errors, height=150)
-
- # Perform deferred rerun only after rendering to avoid interrupting first turns
- if st.session_state.get("do_rerun"):
- st.session_state["do_rerun"] = False
- st.rerun()
-
-# MAIN CALL MOVED TO END
-
-# === PART 4: Translation + UI wiring ===
-
-def confidence_label(conf: Optional[float]) -> str:
- if conf is None:
- return "Unknown"
- if conf >= 0.85:
- return f"High ({conf:.2f})"
- if conf >= 0.60:
- return f"Medium ({conf:.2f})"
- return f"Low ({conf:.2f})"
-
-def looks_untranslatable(text: str) -> bool:
- t = text.strip()
- if not t:
- return True
- if t.startswith("http://") or t.startswith("https://"):
- return True
- if t.isnumeric():
- return True
- if "```" in t:
- return True
- letters = sum(ch.isalpha() for ch in t)
- return (letters / max(len(t), 1)) < 0.2
-
-def detect_language(client: Any, model: str, text: str) -> Tuple[Optional[str], Optional[str], Optional[float]]:
- sys = (
- "You detect the language of the user's message. Respond with strict JSON only:"
- " {\"language_name\":,\"language_code\":,\"confidence\":<0..1>}"
- )
- msgs = [{"role": "system", "content": sys}, {"role": "user", "content": text}]
- out = non_stream_completion(client, st.session_state.meta.get("model", DEFAULT_MODEL), msgs).strip()
- if out.startswith("```") or out.lower().startswith("```json"):
- out = out.strip("`\n ")
- if out.lower().startswith("json"):
- out = out[4:].strip()
- try:
- data = json.loads(out)
- return data.get("language_name"), data.get("language_code"), float(data.get("confidence")) if data.get("confidence") is not None else None
- except Exception:
- return None, None, None
-
-def translate_message(client: Any, meta: Dict[str, Any], user_text: str,
- detected_name: Optional[str], detected_code: Optional[str]) -> Tuple[str, Dict[str, Any]]:
- target_name = meta.get("target_language", "English")
- target_code = LANG_MAP.get(target_name, "en")
- bidirectional = meta.get("bidirectional", True)
- previous_other = meta.get("previous_other_lang")
- source_fixed = st.session_state.get("source_language_fixed")
-
- src_code = detected_code or None
- src_name = detected_name or CODE_TO_NAME.get(src_code or "", "Unknown")
- if not bidirectional and source_fixed:
- src_code = LANG_MAP.get(source_fixed, "en")
- src_name = source_fixed
-
- direction = {"from": src_code or "", "to": target_code}
-
- if bidirectional and src_code == target_code:
- if previous_other:
- prev_code = LANG_MAP.get(previous_other, None)
- direction = {"from": target_code, "to": prev_code or target_code}
- else:
- prompt = (
- f"๐ **Detected Language:** {src_name} (`{src_code}`) โ *Confidence:* {confidence_label(meta.get('last_confidence'))}\n\n"
- "Detected language matches target. Choose the other language in the sidebar to enable bidirectional translation."
- )
- return prompt, {"pair": {"from": src_code or "", "to": target_code}, "src": user_text, "tgt": "", "alts": []}
-
- if looks_untranslatable(user_text):
- md = (
- f"๐ **Detected Language:** {src_name} (`{src_code}`) โ *Confidence:* {confidence_label(meta.get('last_confidence'))}\n\n"
- f"๐ฏ **Translation (โ {CODE_TO_NAME.get(direction['to'], target_name)}):**\n"
- "Content appears non-translatable (code, URL, or numbers). Skipping translation.\n\n"
- "๐ก **Cultural / Regional note (if relevant):**\nNone"
- )
- return md, {"pair": direction, "src": user_text, "tgt": "", "alts": []}
-
- system_prompt = build_system_prompt({**meta, "mode": "translation", "target_language": CODE_TO_NAME.get(direction['to'], target_name)})
- user_instructions = f"Translate from `{direction['from']}` to `{direction['to']}`.\nText:\n{user_text}"
- msgs = to_api_messages(system_prompt, [], user_instructions)
- content = stream_completion(client, meta.get("model", DEFAULT_MODEL), msgs)
- alts: List[str] = []
- for line in content.splitlines():
- s = line.strip()
- if s.startswith("- "):
- alts.append(s[2:].strip())
- return content, {"pair": direction, "src": user_text, "tgt": content, "alts": alts}
-
-def new_chat(meta_override: Optional[Dict[str, Any]] = None) -> str:
- chat_id = str(uuid.uuid4())
- created = utcnow_iso()
- meta = st.session_state.get("meta", {}).copy()
- if meta_override:
- meta.update(meta_override)
- system_prompt = build_system_prompt(meta)
- # Generate a unique default title to avoid multiple identical 'New chat' labels
- def _generate_title() -> str:
- base = "New chat"
- titles = {c.get("title") for c in list_chats()}
- if base not in titles:
- return base
- i = 2
- while f"{base} {i}" in titles:
- i += 1
- return f"{base} {i}"
- default_title = _generate_title()
- state = {
- "id": chat_id,
- "title": default_title,
- "created": created,
- "updated": created,
- "messages": [{"role": "system", "content": system_prompt, "ts": created, "tokens": 0}],
- "meta": meta,
- }
- save_chat(state)
- st.session_state.chat_id = chat_id
- st.session_state.messages = state["messages"]
- st.session_state.meta = meta
- st.session_state.all_chats = list_chats()
- return chat_id
-
-def open_chat(chat_id: str):
- data = load_chat(chat_id)
- if not data:
- return
- st.session_state.chat_id = chat_id
- # Drop any historical empty assistant messages
- def _clean(msgs: List[Dict[str, Any]]):
- return [m for m in msgs if not (m.get("role") == "assistant" and not (m.get("content") or "").strip())]
- st.session_state.messages = _clean(data.get("messages", []))
- st.session_state.meta = data.get("meta", st.session_state.get("meta", {}))
- st.session_state.all_chats = list_chats()
- # Keep the mode radio in sync with persisted meta so refresh reflects the saved mode
- try:
- st.session_state["mode_radio"] = st.session_state.meta.get("mode", "standard")
- except Exception:
- pass
- # Reset transient header title override and inline edit flags when switching chats
- try:
- if "current_title_override" in st.session_state:
- st.session_state.pop("current_title_override", None)
- st.session_state["edit_title_mode"] = False
- st.session_state["_title_edit_initialized"] = False
- st.session_state["title_edit_input"] = ""
- except Exception:
- pass
-
-def persist_current():
- cid = st.session_state.get("chat_id")
- if not cid:
- return
- p = find_chat_path_by_id(cid)
- if not p:
- return
- try:
- with open(p, "r", encoding="utf-8") as f:
- data = json.load(f)
- # Clean out any empty assistant messages before persisting
- msgs = st.session_state.get("messages", [])
- msgs = [m for m in msgs if not (m.get("role") == "assistant" and not (m.get("content") or "").strip())]
- data["messages"] = msgs
- data["meta"] = st.session_state.get("meta", {})
- data["updated"] = utcnow_iso()
- with open(p, "w", encoding="utf-8") as f:
- json.dump(data, f, ensure_ascii=False, indent=2)
- st.session_state.all_chats = list_chats()
- except Exception as e:
- set_status_text(e)
-
-def persona_badge(meta: Dict[str, Any]):
- key = meta.get("personality_key", "professional")
- p = PERSONAS.get(key, PERSONAS["professional"])
- st.markdown(f"{p['icon']} **Persona:** {p['label']}")
-
-def handle_rename_confirm(cid: str):
- new_title = st.session_state.get(f"rename_input_{cid}", "").strip() or "New chat"
- rename_chat_file(cid, new_title)
- st.session_state.rename_mode = False
- st.session_state.rename_value = ""
- st.session_state.all_chats = list_chats()
- # Clear transient title override when switching chats
- if "current_title_override" in st.session_state:
- st.session_state.pop("current_title_override", None)
- st.session_state["do_rerun"] = True
-
-def handle_title_save():
- """Save chat title from the header inline editor on Enter.
- Keeps display text as typed (capped at 80 chars), sanitizes only for filename.
- """
- import streamlit as st
- try:
- cid = st.session_state.get("chat_id")
- if not cid:
- return
- new_title = (st.session_state.get("title_edit_input", "") or "").strip() or "New chat"
- if len(new_title) > 80:
- new_title = new_title[:80]
- rename_chat_file(cid, new_title)
- st.session_state["current_title_override"] = new_title
- st.session_state.all_chats = list_chats()
- except Exception as e:
- set_status_text(e)
- finally:
- st.session_state["edit_title_mode"] = False
- st.session_state["do_rerun"] = True
-
-def sidebar_controls():
- st.sidebar.subheader("Mode")
- # Mode + inline Translation settings
- mode = st.sidebar.radio("", ["standard", "translation"], horizontal=True, key="mode_radio")
- st.session_state.meta["mode"] = mode
- # Persist immediately so refresh keeps the selected mode
- try:
- persist_current()
- except Exception:
- pass
-
- if mode == "translation":
- # Show language selection directly under the mode toggle
- tgt = st.sidebar.selectbox(
- "Target language",
- list(LANG_MAP.keys()),
- index=list(LANG_MAP.keys()).index(st.session_state.meta.get("target_language", "English"))
- if st.session_state.meta.get("target_language", "English") in LANG_MAP else 0,
- key="tgt_lang",
- )
- st.session_state.meta["target_language"] = tgt
- direction = st.sidebar.radio(
- "Direction",
- ["Auto/Bidirectional", "Source โ Target"],
- index=0 if st.session_state.meta.get("bidirectional", True) else 1,
- key="direction_radio_inline",
- )
- st.session_state.meta["bidirectional"] = (direction == "Auto/Bidirectional")
- if direction == "Source โ Target":
- st.session_state.source_language_fixed = st.sidebar.selectbox("Source language", list(LANG_MAP.keys()), key="src_lang_fixed")
- else:
- st.session_state.source_language_fixed = None
- try:
- persist_current()
- except Exception:
- pass
-
- # Divider after Mode section
- st.sidebar.divider()
-
- st.sidebar.subheader("Model")
- model_val = st.sidebar.text_input(
- "Model ID",
- value=st.session_state.meta.get("model", DEFAULT_MODEL),
- placeholder="e.g., anthropic/claude-3.5-sonnet",
- help="Paste any valid OpenRouter model ID (see openrouter.ai/models)",
- )
- def _valid_model(s: str) -> bool:
- if not s or any(ch.isspace() for ch in s):
- return False
- # require vendor/name pattern
- return "/" in s and all(len(part) > 0 for part in s.split("/"))
-
- if model_val:
- model_val = model_val.strip()
- st.session_state.meta["model"] = model_val
- if not _valid_model(model_val):
- st.sidebar.warning("Model looks invalid. Use vendor/name, no spaces.")
- try:
- persist_current()
- except Exception:
- pass
-
- # Divider after Model section
- st.sidebar.divider()
-
- # (Removed duplicate Translation Settings block below Mode)
-
- st.sidebar.subheader("Personality")
- persona_keys = list(PERSONAS.keys())
- pkey = st.sidebar.selectbox("Persona", persona_keys, index=persona_keys.index(st.session_state.meta.get("personality_key", "professional")))
- st.session_state.meta["personality_key"] = pkey
- if pkey == "custom":
- cp = st.session_state.meta.get("custom_persona", {})
- cp["style"] = st.sidebar.text_input("Custom style", cp.get("style", ""))
- cp["expertise"] = st.sidebar.text_input("Custom expertise", cp.get("expertise", ""))
- cp["tone"] = st.sidebar.text_input("Custom tone", cp.get("tone", ""))
- st.session_state.meta["custom_persona"] = cp
- try:
- persist_current()
- except Exception:
- pass
-
- # Divider after Personality section
- st.sidebar.divider()
-
- # Export controls
- st.sidebar.subheader("Export")
- role_filter = st.sidebar.selectbox("Role filter", ["all", "user", "assistant"])
- date_range = st.sidebar.date_input("Date range (optional)", [])
- start_dt: Optional[datetime] = None
- end_dt: Optional[datetime] = None
- if isinstance(date_range, list) and len(date_range) == 2:
- start_dt = datetime.combine(date_range[0], datetime.min.time(), tzinfo=timezone.utc)
- end_dt = datetime.combine(date_range[1], datetime.max.time(), tzinfo=timezone.utc)
- if st.session_state.get("chat_id"):
- path = find_chat_path_by_id(st.session_state.chat_id)
- if path and os.path.exists(path):
- try:
- chat = load_chat_file(path)
- msgs = filter_messages_by_date_role(chat.get("messages", []), start_dt, end_dt, role_filter)
- if len([m for m in msgs if m.get("role") in ("user", "assistant")]) == 0:
- st.sidebar.info("No messages to export with current filters.")
- else:
- slug = slugify(chat.get("title", "chat")) or "chat"
- ts = datetime.now().strftime("%Y%m%d_%H%M")
- try:
- st.sidebar.download_button("Download TXT", to_txt_export(chat, msgs).encode("utf-8"), file_name=f"chat_{slug}_{ts}.txt", mime="text/plain")
- except Exception as e:
- set_status_text(e)
- try:
- st.sidebar.download_button("Download JSON", to_json_export(chat, msgs).encode("utf-8"), file_name=f"chat_{slug}_{ts}.json", mime="application/json")
- except Exception as e:
- set_status_text(e)
- try:
- st.sidebar.download_button("Download CSV", to_csv_export(msgs).encode("utf-8"), file_name=f"chat_{slug}_{ts}.csv", mime="text/csv")
- except Exception as e:
- set_status_text(e)
- except Exception as e:
- set_status_text(e)
- # Divider after Export section
- st.sidebar.divider()
-
-def filter_messages_by_date_role(messages: List[Dict[str, Any]], start: Optional[datetime], end: Optional[datetime], role: str) -> List[Dict[str, Any]]:
- def in_range(ts: str) -> bool:
- try:
- t = dtparser.isoparse(ts)
- except Exception:
- return True
- if start and t < start:
- return False
- if end and t > end:
- return False
- return True
- out: List[Dict[str, Any]] = []
- for m in messages:
- r = m.get("role")
- if role != "all" and r != role:
- continue
- if in_range(m.get("ts", utcnow_iso())):
- out.append(m)
- return out
-
-def export_stats(messages: List[Dict[str, Any]]) -> Dict[str, Any]:
- user_msgs = [m for m in messages if m.get("role") == "user"]
- asst_msgs = [m for m in messages if m.get("role") == "assistant"]
- total_chars = sum(len((m.get("content") or "")) for m in messages)
- avg_len = total_chars / max(len(messages), 1)
- return {"user_messages": len(user_msgs), "assistant_messages": len(asst_msgs), "total_characters": total_chars, "average_message_length": round(avg_len, 2)}
-
-def build_export_metadata(chat: Dict[str, Any]) -> Dict[str, Any]:
- meta = chat.get("meta", {})
- created = chat.get("created")
- updated = chat.get("updated")
- dur_min = None
- try:
- if created and updated:
- dur = dtparser.isoparse(updated) - dtparser.isoparse(created)
- dur_min = round(dur.total_seconds() / 60.0, 2)
- except Exception:
- dur_min = None
- return {
- "export_timestamp": utcnow_iso(),
- "format_version": "1.0",
- "session_id": chat.get("id"),
- "total_messages": len(chat.get("messages", [])),
- "session_duration_minutes": dur_min,
- "model": meta.get("model"),
- "mode": meta.get("mode"),
- "target_language": meta.get("target_language"),
- "personality_key": meta.get("personality_key"),
- "bidirectional": meta.get("bidirectional"),
- "last_detected_source": meta.get("last_detected_source"),
- "last_confidence": meta.get("last_confidence"),
- }
-
-def to_txt_export(chat: Dict[str, Any], messages: List[Dict[str, Any]]) -> str:
- meta = build_export_metadata(chat)
- header = [
- f"Chat Export โ {datetime.now().strftime('%Y-%m-%d %H:%M')}",
- "=" * 39,
- "Session Information:",
- f"- Total Messages: {meta['total_messages']}",
- f"- Duration: {meta['session_duration_minutes']} minutes",
- f"- Model: {meta['model']}",
- f"- Mode: {meta['mode']}",
- f"- Target Language: {meta['target_language']}",
- f"- Last Detected Source: {meta['last_detected_source']} ({meta['last_confidence']})",
- "-" * 40,
- ]
- lines = ["\n".join(header)]
- for m in messages:
- ts = m.get("ts", "")
- try:
- hts = datetime.fromisoformat(ts.replace("Z", "+00:00")).strftime("%H:%M:%S")
- except Exception:
- hts = ts
- who = "You" if m.get("role") == "user" else ("Assistant" if m.get("role") == "assistant" else "System")
- content = m.get("content") or ""
- lines.append(f"[{hts}] {who}: {content}")
- return "\n".join(lines)
-
-def to_json_export(chat: Dict[str, Any], messages: List[Dict[str, Any]]) -> str:
- data = {"export_metadata": build_export_metadata(chat), "conversation": messages, "statistics": export_stats(messages)}
- return json.dumps(data, ensure_ascii=False, indent=2)
-
-def to_csv_export(messages: List[Dict[str, Any]]) -> str:
- headers = ["Message_ID", "Timestamp", "Role", "Content", "Character_Count", "Word_Count"]
- if pd is not None:
- rows = []
- for i, m in enumerate(messages, start=1):
- content = m.get("content") or ""
- rows.append({
- "Message_ID": i,
- "Timestamp": m.get("ts", ""),
- "Role": m.get("role", ""),
- "Content": content,
- "Character_Count": len(content),
- "Word_Count": len(content.split()),
- })
- df = pd.DataFrame(rows, columns=headers)
- return df.to_csv(index=False)
- # Fallback without pandas
- from io import StringIO
- buf = StringIO()
- writer = csv.writer(buf, quoting=csv.QUOTE_MINIMAL)
- writer.writerow(headers)
- for i, m in enumerate(messages, start=1):
- content = m.get("content") or ""
- writer.writerow([i, m.get("ts", ""), m.get("role", ""), content, len(content), len(content.split())])
- return buf.getvalue()
-
-def ensure_current_chat():
- if st.session_state.get("chat_id") is None:
- lst = list_chats()
- if lst:
- open_chat(lst[0]["id"])
- else:
- new_chat()
-
-# Global delete confirmation dialog
-try:
- from streamlit.runtime.scriptrunner import add_script_run_ctx # noqa: F401
- DIALOG_AVAILABLE = True
-except Exception:
- DIALOG_AVAILABLE = True # Streamlit >=1.25 has st.dialog
-
-@st.dialog("Confirm deletion")
-def open_delete_dialog():
- cid = st.session_state.get("delete_chat_id")
- if not cid:
- st.write("No chat selected.")
- return
- # Find title for display
- title = None
- for it in st.session_state.get("all_chats", []):
- if it.get("id") == cid:
- title = it.get("title")
- break
- st.warning(f"This will permanently delete: {title or cid}")
- c1, c2 = st.columns(2)
- if c1.button("Delete permanently", type="primary"):
- delete_chat(cid)
- # Clear selection and refresh chat list
- st.session_state["delete_chat_id"] = None
- st.session_state["show_delete_modal"] = False
- newlist = list_chats()
- st.session_state.all_chats = newlist
- # If the current chat was deleted, open most recent or create new
- if st.session_state.get("chat_id") == cid:
- if newlist:
- open_chat(newlist[0]["id"])
- else:
- new_chat()
- # Close dialog and refresh UI immediately
- st.rerun()
- if c2.button("Cancel"):
- st.session_state["delete_chat_id"] = None
- st.session_state["show_delete_modal"] = False
- st.rerun()
-
-def render_chat_area(client: Any):
- meta = st.session_state.get("meta", {})
- # Show current chat title prominently
- try:
- chat_title = None
- cid = st.session_state.get("chat_id")
- if cid:
- p = find_chat_path_by_id(cid)
- if p and os.path.exists(p):
- with open(p, "r", encoding="utf-8") as f:
- chat_title = (json.load(f) or {}).get("title")
- # Prefer transient override if present (immediate UI update after rename)
- chat_title = st.session_state.get("current_title_override", chat_title)
- # Inline title editor with pencil toggle and live counter
- col_title, col_btn = st.columns([0.88, 0.12])
- with col_title:
- if st.session_state.get("edit_title_mode"):
- # Preload current title into input if first time entering edit mode
- if not st.session_state.get("_title_edit_initialized"):
- st.session_state["title_edit_input"] = chat_title or "New chat"
- st.session_state["_title_edit_initialized"] = True
- val = st.text_input(
- label="Rename chat",
- key="title_edit_input",
- max_chars=80,
- label_visibility="collapsed",
- placeholder="Enter chat title (max 80 chars)",
- on_change=handle_title_save,
- )
- # Live character counter
- try:
- count = len(val or "")
- except Exception:
- count = 0
- counter_style = "color:#f87171;" if count > 80 else "opacity:0.75;"
- st.markdown(f"{count}/80", unsafe_allow_html=True)
- else:
- if chat_title:
- st.subheader(chat_title)
- else:
- st.subheader("New chat")
- with col_btn:
- if st.button("โ", key="toggle_edit_title"):
- # Toggle edit mode; second click cancels without saving
- editing = st.session_state.get("edit_title_mode", False)
- st.session_state["edit_title_mode"] = not editing
- # Reset initializer so input value is refreshed when opening
- if not editing:
- st.session_state["_title_edit_initialized"] = False
- except Exception as e:
- set_status_text(e)
- persona_badge(meta)
- # Summarizer panel (available throughout; collapsed by default)
- with st.expander("Summarize your chat", expanded=False):
- sprompt = st.text_area(
- "",
- key="summarizer_prompt",
- height=60,
- placeholder="e.g., Summarize key decisions and action items",
- label_visibility="collapsed",
- )
- # Place the Summarize button directly under the input, with a right-side spinner area
- _btn_col, _spin_col = st.columns([0.2, 0.8])
- with _btn_col:
- gen = st.button("Summarize", use_container_width=True, key="summarize_btn")
-
- # Output area (fixed height, scrollable) using a placeholder so we can replace content
- output_box = st.empty()
-
- # Show the latest saved summary (if any) below the button
- last_summary = None
- for m in reversed(st.session_state.get("messages", [])):
- if m.get("is_summary"):
- last_summary = m.get("content")
- break
- if last_summary:
- safe = html.escape(last_summary)
- output_box.markdown(
- f''
- f'{safe}',
- unsafe_allow_html=True,
- )
-
- if gen:
- if not client:
- st.warning("No API client configured. Check API key in secrets.")
- else:
- try:
- model = meta.get("model", DEFAULT_MODEL)
- if not model or (" " in model) or ("/" not in model):
- st.warning("Please provide a valid model ID in the sidebar (vendor/name).")
- else:
- msgs = build_summary_messages(meta, sprompt or "")
- # Show spinner on the right of the button row
- with _spin_col:
- with st.spinner("Summarizingโฆ"):
- summary_text = non_stream_completion(client, model, msgs)
- if summary_text:
- # Replace any previous summary in UI and state; keep only the latest
- safe = html.escape(summary_text)
- output_box.empty()
- output_box.markdown(
- f''
- f'{safe}',
- unsafe_allow_html=True,
- )
- # Save as assistant message (flagged), but do not render in main chat loop
- st.session_state.messages = [m for m in st.session_state.messages if not m.get("is_summary")]
- st.session_state.messages.append({
- "role": "assistant",
- "content": summary_text,
- "ts": utcnow_iso(),
- "tokens": 0,
- "is_summary": True,
- })
- persist_current()
- else:
- st.info("No summary text returned. Try a different prompt or check errors.")
- except Exception as e:
- set_status_text(e)
-
- st.divider()
- for m in st.session_state.get("messages", []):
- if m.get("role") in ("user", "assistant"):
- if m.get("is_summary"):
- # Do not clutter main chat with summary entries; they are visible in the panel and exports
- continue
- content_to_show = m.get("content", "")
- # Skip rendering blank assistant bubbles
- if m.get("role") == "assistant" and not (content_to_show or "").strip():
- continue
- with st.chat_message(m.get("role")):
- st.markdown(content_to_show)
- prompt = st.chat_input("Ask or paste textโฆ")
- if not prompt:
- return
- now = utcnow_iso()
- st.session_state.messages.append({"role": "user", "content": prompt, "ts": now, "tokens": 0})
- persist_current()
- # Render the just-submitted user message immediately
- with st.chat_message("user"):
- st.markdown(prompt)
- # Auto-rename chat to first user message preview if still default title
- try:
- cid = st.session_state.get("chat_id")
- if cid:
- path = find_chat_path_by_id(cid)
- if path and os.path.exists(path):
- with open(path, "r", encoding="utf-8") as f:
- data = json.load(f)
- current_title = (data or {}).get("title", "")
- if current_title and current_title.lower().startswith("new chat"):
- first_line = prompt.strip().splitlines()[0]
- preview = first_line.strip()
- if len(preview) > 40:
- preview = preview[:40].rstrip() + "โฆ"
- if preview:
- rename_chat_file(cid, preview)
- st.session_state["current_title_override"] = preview
- st.session_state.all_chats = list_chats()
- # Schedule a rerun at end of this cycle so sidebar reflects new title now
- st.session_state["do_rerun"] = True
- except Exception as e:
- set_status_text(e)
- if not client:
- st.warning("No API client configured. Check API key in secrets.")
- return
- try:
- model = meta.get("model", DEFAULT_MODEL)
- # Validate model id before API call
- if not model or (" " in model) or ("/" not in model):
- st.warning("Please provide a valid model ID in the sidebar (vendor/name).")
- return
- if meta.get("mode") == "translation":
- with st.spinner("Detecting languageโฆ"):
- name, code, conf = detect_language(client, model, prompt)
- st.session_state.meta["last_detected_source"] = name
- st.session_state.meta["last_confidence"] = conf
- persist_current()
- with st.spinner("Translatingโฆ"):
- content, tmeta = translate_message(client, st.session_state.meta, prompt, name, code)
- st.session_state.messages.append({"role": "assistant", "content": content, "ts": utcnow_iso(), "tokens": 0, "translation_meta": tmeta})
- pair = tmeta.get("pair", {})
- src = pair.get("from")
- tgt = pair.get("to")
- if src and tgt and src != tgt:
- target_code = LANG_MAP.get(meta.get("target_language", "English"), "en")
- other = src if src != target_code else tgt
- st.session_state.meta["previous_other_lang"] = CODE_TO_NAME.get(other, other)
- persist_current()
- else:
- system_prompt = build_system_prompt(meta)
- msgs = to_api_messages(system_prompt, st.session_state.messages[:-1], prompt)
- with st.spinner("Thinkingโฆ"):
- content = stream_completion(client, model, msgs)
- if content and content.strip():
- st.session_state.messages.append({"role": "assistant", "content": content, "ts": utcnow_iso(), "tokens": 0})
- persist_current()
- else:
- st.warning("The model returned no text. Check model ID and errors panel.")
- except Exception as e:
- set_status_text(e)
-
-# Move main call to the end so all functions are defined first
-def sidebar_history_latest():
- st.sidebar.subheader("Chats")
- st.sidebar.markdown(
- """
-
- """,
- unsafe_allow_html=True,
- )
- if st.sidebar.button("+ New chat", key="new_chat_btn_clean", use_container_width=True):
- new_chat()
- st.session_state["do_rerun"] = True
- st.sidebar.caption("Chat History")
- chats = list_chats()
- st.session_state.all_chats = chats
- current_id = st.session_state.get("chat_id")
- chat_list = st.sidebar.container()
- def select_chat(cid: str):
- for it in chats:
- key = f"sel_{it.get('id')}"
- if key in st.session_state:
- st.session_state[key] = (it.get('id') == cid)
- open_chat(cid)
- st.session_state["do_rerun"] = True
- with chat_list:
- for item in chats:
- cid = item.get("id")
- title = item.get("title") or "Untitled"
- row = st.container()
- col_sel, col_title, col_del = row.columns([0.12, 0.73, 0.15])
- col_sel.checkbox(
- label="",
- value=(current_id == cid),
- key=f"sel_{cid}",
- label_visibility="collapsed",
- on_change=lambda cid=cid: select_chat(cid),
- )
- new_title = col_title.text_input(
- label="",
- value=title,
- key=f"title_inline_{cid}",
- label_visibility="collapsed",
- placeholder="Title",
- help="Click to rename; selection stays unchanged",
- )
- if new_title.strip() and new_title != title:
- st.session_state[f"rename_input_{cid}"] = new_title
- handle_rename_confirm(cid)
- if col_del.button("๐๏ธ", key=f"del_inline_{cid}"):
- st.session_state["delete_chat_id"] = cid
- st.session_state["show_delete_modal"] = True
- # dialog handled centrally via show_delete_modal
-
-
-
-if __name__ == "__main__":
- main()
diff --git a/Vivek_Trivedi/ChatBot/chat-history/06f9961e-0e21-4d67-946f-c4f6878b52be__forth.json b/Vivek_Trivedi/ChatBot/chat-history/06f9961e-0e21-4d67-946f-c4f6878b52be__forth.json
deleted file mode 100644
index e3a4657..0000000
--- a/Vivek_Trivedi/ChatBot/chat-history/06f9961e-0e21-4d67-946f-c4f6878b52be__forth.json
+++ /dev/null
@@ -1,119 +0,0 @@
-{
- "id": "06f9961e-0e21-4d67-946f-c4f6878b52be",
- "title": "forth",
- "created": "2025-10-31T11:19:07Z",
- "updated": "2025-10-31T11:38:44Z",
- "messages": [
- {
- "role": "system",
- "content": "You are a careful, concise assistant. Be accurate and do not hallucinate. Show errors verbatim when asked. Never claim background/asynchronous work. Refuse unsafe content briefly with safer alternatives. Preserve formatting and code blocks.\n\nPersona: Professional. You are formal, concise, and business-oriented. Provide numbered steps and clear action items.\n\nYou are a helpful assistant. Be brief by default. Use triple backticks for code. Avoid purple prose.",
- "ts": "2025-10-31T11:19:07Z",
- "tokens": 0
- },
- {
- "role": "user",
- "content": "forth",
- "ts": "2025-10-31T11:19:16Z",
- "tokens": 0
- },
- {
- "role": "assistant",
- "content": "**Forthโฏat a glance**\n\nForth is a stackโbased, extensible language. You write *words* (functions) that manipulate the data stack, and you can define new words on the fly. A typical Forth line looks like:\n\n```\n \n```\n\nThe operation consumes the needed items from the stack and pushes the result back.\n\n---\n\n### โHello, World!โ in Forth\n\n```forth\n: hello ( -- ) \\ define a new word called HELLO\n .\" Hello, World!\" cr ; \\ .\\\" prints a string, cr = newline\n\nhello \\ run it\n```\n\n**Explanation**\n\n| Symbol | Meaning |\n|--------|---------|\n| `:` โฆ `;` | Definition of a new word. |\n| `.\" โฆ \"` | Compileโtime string literal that is printed when the word runs. |\n| `cr` | Print a carriageโreturn/lineโfeed. |\n| `hello` | Calls the word we just defined. |\n\n---\n\n### A tiny calculator example\n\n```forth\n: add ( n1 n2 -- sum ) + ;\n: sub ( n1 n2 -- diff ) - ;\n: mul ( n1 n2 -- prod ) * ;\n: div ( n1 n2 -- quot ) / ;\n\n5 3 add . \\ โ 8\n10 4 sub . \\ โ 6\n7 6 mul . \\ โ 42\n20 5 div . \\ โ 4\n```\n\n- Numbers are pushed onto the stack.\n- Each word (`add`, `sub`, โฆ) consumes the appropriate number of arguments, performs the operation, and leaves the result on the stack.\n- `.` prints the topโofโstack value.\n\n---\n\n### Quick cheatโsheet for common Forth words\n\n| Word | Stack effect | Description |\n|------|--------------|-------------|\n| `+` | `n1 n2 -- sum` | addition |\n| `-` | `n1 n2 -- diff`| subtraction |\n| `*` | `n1 n2 -- prod`| multiplication |\n| `/` | `n1 n2 -- quot`| integer division |\n| `dup`| `x -- x x` | duplicate top |\n| `swap`| `x y -- y x` | swap top two |\n| `drop`| `x --` | discard top |\n| `.` | `x --` | print top (numeric) |\n| `cr` | `--` | newline |\n| `:` โฆ `;`| `--` | define a new word |\n| `.`\"`| `--` | compileโtime string literal (prints when executed) |\n\n---\n\nFeel free to ask for more examplesโe.g., loops, conditionals, or custom words! ๐",
- "ts": "2025-10-31T11:19:29Z",
- "tokens": 0
- },
- {
- "role": "user",
- "content": "hi",
- "ts": "2025-10-31T11:33:57Z",
- "tokens": 0
- },
- {
- "role": "user",
- "content": "hi",
- "ts": "2025-10-31T11:34:44Z",
- "tokens": 0
- },
- {
- "role": "assistant",
- "content": "Hi there! How can I help you today? ๐",
- "ts": "2025-10-31T11:34:48Z",
- "tokens": 0
- },
- {
- "role": "user",
- "content": "who are u? who is ur creater?",
- "ts": "2025-10-31T11:35:09Z",
- "tokens": 0
- },
- {
- "role": "assistant",
- "content": "Iโm ChatGPT, an AI language model designed to assist with information, writing, coding, and more. I was created byโฏOpenAI, which trained me on a broad range of publicly available data up throughโฏ2024. How can I help you today?",
- "ts": "2025-10-31T11:35:12Z",
- "tokens": 0
- },
- {
- "role": "user",
- "content": "i m vivek",
- "ts": "2025-10-31T11:37:42Z",
- "tokens": 0
- },
- {
- "role": "assistant",
- "content": "Nice to meet you, Vivek! How can I help you today? ๐ค",
- "ts": "2025-10-31T11:37:44Z",
- "tokens": 0
- },
- {
- "role": "user",
- "content": "what is ur model name?",
- "ts": "2025-10-31T11:37:58Z",
- "tokens": 0
- },
- {
- "role": "assistant",
- "content": "Iโm ChatGPT, powered by OpenAIโs GPTโ4 architecture.",
- "ts": "2025-10-31T11:38:00Z",
- "tokens": 0
- },
- {
- "role": "user",
- "content": "Can you give the exact model that you are working on? ",
- "ts": "2025-10-31T11:38:22Z",
- "tokens": 0
- },
- {
- "role": "assistant",
- "content": "Iโm based on OpenAIโs GPTโ4 model.",
- "ts": "2025-10-31T11:38:23Z",
- "tokens": 0
- },
- {
- "role": "user",
- "content": "And on how many parameters this model works on. ",
- "ts": "2025-10-31T11:38:39Z",
- "tokens": 0
- },
- {
- "role": "assistant",
- "content": "Iโm built on OpenAIโs GPTโ4 architecture. \nOpenAI has not released a precise parameter count for GPTโ4. The company has only indicated that itโs larger than GPTโ3 (which has 175โฏbillion parameters) and is available in a few size variations, but the exact number of weights in the โfullโsizeโ GPTโ4 model isnโt publicly disclosed.",
- "ts": "2025-10-31T11:38:44Z",
- "tokens": 0
- }
- ],
- "meta": {
- "model": "openai/gpt-oss-20b:free",
- "mode": "standard",
- "target_language": "English",
- "last_detected_source": null,
- "last_confidence": null,
- "bidirectional": true,
- "previous_other_lang": null,
- "personality_key": "creative",
- "custom_persona": {
- "style": "",
- "expertise": "",
- "tone": ""
- }
- }
-}
\ No newline at end of file
diff --git a/Vivek_Trivedi/ChatBot/chat-history/0cf588cf-c50c-4e59-8cfb-59d3f71d4b77__vivek-here.json b/Vivek_Trivedi/ChatBot/chat-history/0cf588cf-c50c-4e59-8cfb-59d3f71d4b77__vivek-here.json
deleted file mode 100644
index 4bc44b9..0000000
--- a/Vivek_Trivedi/ChatBot/chat-history/0cf588cf-c50c-4e59-8cfb-59d3f71d4b77__vivek-here.json
+++ /dev/null
@@ -1,97 +0,0 @@
-{
- "id": "0cf588cf-c50c-4e59-8cfb-59d3f71d4b77",
- "title": "Vivek Here",
- "created": "2025-10-31T09:44:39Z",
- "updated": "2025-10-31T11:19:49Z",
- "messages": [
- {
- "role": "system",
- "content": "You are a careful, concise assistant. Be accurate and do not hallucinate. Show errors verbatim when asked. Never claim background/asynchronous work. Refuse unsafe content briefly with safer alternatives. Preserve formatting and code blocks.\n\nPersona: Professional. You are formal, concise, and business-oriented. Provide numbered steps and clear action items.\n\nYou are a helpful assistant. Be brief by default. Use triple backticks for code. Avoid purple prose.",
- "ts": "2025-10-31T09:44:39Z",
- "tokens": 0
- },
- {
- "role": "user",
- "content": "Vivek Here",
- "ts": "2025-10-31T09:44:45Z",
- "tokens": 0
- },
- {
- "role": "assistant",
- "content": "Hello Vivek,\n\nHow can I assist you today?",
- "ts": "2025-10-31T09:44:46Z",
- "tokens": 0
- },
- {
- "role": "user",
- "content": "First",
- "ts": "2025-10-31T09:48:19Z",
- "tokens": 0
- },
- {
- "role": "assistant",
- "content": "Sure, could you please specify what youโd like help with?",
- "ts": "2025-10-31T09:48:20Z",
- "tokens": 0
- },
- {
- "role": "user",
- "content": "I m from Mumbai",
- "ts": "2025-10-31T10:28:59Z",
- "tokens": 0
- },
- {
- "role": "assistant",
- "content": "๐ **Detected Language:** English (`en`) โ *Confidence:* High \n\n๐ฏ **Translation (โ French):** \nJe viens de Mumbai \n\n๐ **Alternative translations (if relevant):** \n- Je suis de Mumbai. \n- Je viens de la ville de Mumbai. \n- J'habite ร Mumbai. \n\n๐ก **Cultural / Regional note (if relevant):** \nIn French, the city name โMumbaiโ is used unchanged; the former name โBombayโ may still appear in historical contexts. ",
- "ts": "2025-10-31T10:29:04Z",
- "tokens": 0,
- "translation_meta": {
- "pair": {
- "from": "en",
- "to": "fr"
- },
- "src": "I m from Mumbai",
- "tgt": "๐ **Detected Language:** English (`en`) โ *Confidence:* High \n\n๐ฏ **Translation (โ French):** \nJe viens de Mumbai \n\n๐ **Alternative translations (if relevant):** \n- Je suis de Mumbai. \n- Je viens de la ville de Mumbai. \n- J'habite ร Mumbai. \n\n๐ก **Cultural / Regional note (if relevant):** \nIn French, the city name โMumbaiโ is used unchanged; the former name โBombayโ may still appear in historical contexts. ",
- "alts": [
- "Je suis de Mumbai.",
- "Je viens de la ville de Mumbai.",
- "J'habite ร Mumbai."
- ]
- }
- },
- {
- "role": "user",
- "content": "What is Capital of france?",
- "ts": "2025-10-31T10:30:25Z",
- "tokens": 0
- },
- {
- "role": "assistant",
- "content": "The capital of France is **Paris**.",
- "ts": "2025-10-31T10:30:27Z",
- "tokens": 0
- },
- {
- "role": "assistant",
- "content": "1. เคเคชเคฏเฅเคเคเคฐเฅเคคเคพ เคจเฅ เค
เคชเคจเคพ เคชเคฐเคฟเคเคฏ เคฆเคฟเคฏเคพ โ เคตเคน เคฎเฅเคเคฌเค เคธเฅ เคนเฅเคเฅค \n2. เคเคชเคฏเฅเคเคเคฐเฅเคคเคพ เคจเฅ เคซเคผเฅเคฐเคพเคเคธ เคเฅ เคฐเคพเคเคงเคพเคจเฅ เคเฅ เคฌเคพเคฐเฅ เคฎเฅเค เคชเฅเคเคพเฅค \n3. เคธเคนเคพเคฏเค เคจเฅ เคเคคเฅเคคเคฐ เคฆเคฟเคฏเคพ เคเคฟ เคซเคผเฅเคฐเคพเคเคธ เคเฅ เคฐเคพเคเคงเคพเคจเฅ **เคชเฅเคฐเคฟเคธ** เคนเฅเฅค \n\n(เคธเคเคเฅเคทเคฟเคชเฅเคค เคธเคพเคฐเคพเคเคถ)",
- "ts": "2025-10-31T10:31:20Z",
- "tokens": 0,
- "is_summary": true
- }
- ],
- "meta": {
- "model": "openai/gpt-oss-120b",
- "mode": "standard",
- "target_language": "French",
- "last_detected_source": "English",
- "last_confidence": 0.99,
- "bidirectional": true,
- "previous_other_lang": "English",
- "personality_key": "professional",
- "custom_persona": {
- "style": "",
- "expertise": "",
- "tone": ""
- }
- }
-}
\ No newline at end of file
diff --git a/Vivek_Trivedi/ChatBot/chat-history/275d600f-7d74-4a37-b14b-11d2d5fb670c__hello.json b/Vivek_Trivedi/ChatBot/chat-history/275d600f-7d74-4a37-b14b-11d2d5fb670c__hello.json
deleted file mode 100644
index e162556..0000000
--- a/Vivek_Trivedi/ChatBot/chat-history/275d600f-7d74-4a37-b14b-11d2d5fb670c__hello.json
+++ /dev/null
@@ -1,48 +0,0 @@
-{
- "id": "275d600f-7d74-4a37-b14b-11d2d5fb670c",
- "title": "hello",
- "created": "2025-10-31T10:11:34Z",
- "updated": "2025-10-31T11:21:41Z",
- "messages": [
- {
- "role": "system",
- "content": "You are a careful, concise assistant. Be accurate and do not hallucinate. Show errors verbatim when asked. Never claim background/asynchronous work. Refuse unsafe content briefly with safer alternatives. Preserve formatting and code blocks.\n\nPersona: Professional. You are formal, concise, and business-oriented. Provide numbered steps and clear action items.\n\nYou are a helpful assistant. Be brief by default. Use triple backticks for code. Avoid purple prose.",
- "ts": "2025-10-31T10:11:34Z",
- "tokens": 0
- },
- {
- "role": "user",
- "content": "hello",
- "ts": "2025-10-31T10:51:23Z",
- "tokens": 0
- },
- {
- "role": "assistant",
- "content": "Hello! How can I assist you today?",
- "ts": "2025-10-31T10:51:24Z",
- "tokens": 0
- },
- {
- "role": "assistant",
- "content": "Mumbai, India's financial hub, blends bustling markets, iconic landmarks like Gateway, Bollywood, diverse cultures, coastal charm, rapid growth, vibrant life.",
- "ts": "2025-10-31T11:21:17Z",
- "tokens": 0,
- "is_summary": true
- }
- ],
- "meta": {
- "model": "openai/gpt-oss-120b",
- "mode": "standard",
- "target_language": "English",
- "last_detected_source": null,
- "last_confidence": null,
- "bidirectional": true,
- "previous_other_lang": null,
- "personality_key": "professional",
- "custom_persona": {
- "style": "",
- "expertise": "",
- "tone": ""
- }
- }
-}
\ No newline at end of file
diff --git a/Vivek_Trivedi/ChatBot/chat-history/be84880e-27f1-40f1-9468-f8b467b1f8f4__first-chats.json b/Vivek_Trivedi/ChatBot/chat-history/be84880e-27f1-40f1-9468-f8b467b1f8f4__first-chats.json
deleted file mode 100644
index 545b3f2..0000000
--- a/Vivek_Trivedi/ChatBot/chat-history/be84880e-27f1-40f1-9468-f8b467b1f8f4__first-chats.json
+++ /dev/null
@@ -1,48 +0,0 @@
-{
- "id": "be84880e-27f1-40f1-9468-f8b467b1f8f4",
- "title": "First chats",
- "created": "2025-10-31T10:51:33Z",
- "updated": "2025-10-31T11:21:38Z",
- "messages": [
- {
- "role": "system",
- "content": "You are a careful, concise assistant. Be accurate and do not hallucinate. Show errors verbatim when asked. Never claim background/asynchronous work. Refuse unsafe content briefly with safer alternatives. Preserve formatting and code blocks.\n\nPersona: Professional. You are formal, concise, and business-oriented. Provide numbered steps and clear action items.\n\nYou are a helpful assistant. Be brief by default. Use triple backticks for code. Avoid purple prose.",
- "ts": "2025-10-31T10:51:33Z",
- "tokens": 0
- },
- {
- "role": "user",
- "content": "First chat",
- "ts": "2025-10-31T10:51:40Z",
- "tokens": 0
- },
- {
- "role": "assistant",
- "content": "Hello. How can I assist you today?",
- "ts": "2025-10-31T10:51:42Z",
- "tokens": 0
- },
- {
- "role": "assistant",
- "content": "**Conversation Summary**\n\n1. **User Initiation** โ The user said โFirst chat.โ\n2. **Assistant Response** โ The assistant greeted the user and asked how it could help. \n3. **User Request** โ The user asked for a summary of the entire conversation. \n\nThat is the complete dialogue to date.",
- "ts": "2025-10-31T11:20:47Z",
- "tokens": 0,
- "is_summary": true
- }
- ],
- "meta": {
- "model": "openai/gpt-oss-120b",
- "mode": "standard",
- "target_language": "English",
- "last_detected_source": null,
- "last_confidence": null,
- "bidirectional": true,
- "previous_other_lang": null,
- "personality_key": "professional",
- "custom_persona": {
- "style": "",
- "expertise": "",
- "tone": ""
- }
- }
-}
\ No newline at end of file
diff --git a/Vivek_Trivedi/ChatBot/requirements.txt b/Vivek_Trivedi/ChatBot/requirements.txt
deleted file mode 100644
index 8e5f442..0000000
--- a/Vivek_Trivedi/ChatBot/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-streamlit==1.38.0
-openai==1.50.2
-python-slugify==8.0.4
-python-dateutil==2.9.0.post0
-pandas==2.2.3
diff --git a/Vivek_Trivedi/README.md b/Vivek_Trivedi/README.md
deleted file mode 100644
index 46e90a2..0000000
--- a/Vivek_Trivedi/README.md
+++ /dev/null
@@ -1 +0,0 @@
-# Vivek_Trivedi
diff --git a/Seshagiri_Rao_Vaidya/TestFile.txt b/__init__.py
similarity index 100%
rename from Seshagiri_Rao_Vaidya/TestFile.txt
rename to __init__.py
diff --git a/chaitanyaI/Assign2_challenge1_translation/chat_app_personality.py b/chaitanyaI/Assign2_challenge1_translation/chat_app_personality.py
deleted file mode 100644
index b310c5e..0000000
--- a/chaitanyaI/Assign2_challenge1_translation/chat_app_personality.py
+++ /dev/null
@@ -1,195 +0,0 @@
-import os
-import streamlit as st
-
-st.set_page_config(page_title="Personality Chatbot", page_icon="๐ง ")
-
-# -------------------------------------------------
-# 1. API KEY LOADER (from secrets or env)
-# -------------------------------------------------
-def get_openrouter_key() -> str:
- if "OPENROUTER_API_KEY" in st.secrets:
- return st.secrets["OPENROUTER_API_KEY"]
- env_key = os.getenv("OPENROUTER_API_KEY")
- if env_key:
- return env_key
- st.error(
- "โ ๏ธ `OPENROUTER_API_KEY` not found.\n\n"
- "Add it to `.streamlit/secrets.toml`:\n"
- "```toml\nOPENROUTER_API_KEY = \"sk-or-...\"\n```"
- "or set env var `OPENROUTER_API_KEY`."
- )
- st.stop()
-
-api_key = get_openrouter_key()
-
-# -------------------------------------------------
-# 2. PERSONALITY DEFINITIONS
-# -------------------------------------------------
-PERSONALITIES = {
- "pro": {
- "label": "๐งโ๐ผ Professional Business Assistant",
- "desc": "Formal, structured, business-focused. Great for emails, meetings, strategy.",
- "system": (
- "You are a professional business assistant. "
- "You write in a formal, concise, business tone. "
- "You focus on clarity, action items, and professional etiquette. "
- "Always structure answers with headings or bullets when helpful."
- ),
- },
- "creative": {
- "label": "๐จ Creative Writing Helper",
- "desc": "Imaginative, expressive, inspiring. Great for stories, hooks, prompts.",
- "system": (
- "You are a creative writing assistant. "
- "You write in an imaginative, vivid, playful tone. "
- "You encourage the user, offer alternative phrasings, and can add light emojis. "
- "You help with storytelling, character, and style."
- ),
- },
- "tech": {
- "label": "๐งช Technical Expert",
- "desc": "Precise, detailed, code-focused. Great for debugging, APIs, Streamlit.",
- "system": (
- "You are a senior technical expert. "
- "You explain concepts step-by-step, include code blocks where useful, "
- "and avoid unnecessary fluff. "
- "Prefer clarity, correctness, and best practices."
- ),
- },
- "friendly": {
- "label": "๐ Friendly Companion",
- "desc": "Casual, warm, supportive. Great for general chat.",
- "system": (
- "You are a friendly, warm, encouraging companion. "
- "You speak casually, like a supportive friend. "
- "You validate the user's feelings and keep things light."
- ),
- },
- "custom": {
- "label": "๐งฉ Custom Personality",
- "desc": "User-defined style and tone.",
- "system": "", # will be filled from user input
- },
-}
-
-# -------------------------------------------------
-# 3. SESSION STATE INIT
-# -------------------------------------------------
-if "messages" not in st.session_state:
- st.session_state.messages = []
-if "personality" not in st.session_state:
- st.session_state.personality = "pro" # default
-if "custom_personality_text" not in st.session_state:
- st.session_state.custom_personality_text = ""
-
-
-# -------------------------------------------------
-# 4. SIDEBAR: PERSONALITY SELECTOR
-# -------------------------------------------------
-st.sidebar.title("๐ง Personality Selector")
-
-# Build options list
-personality_options = {p_id: data["label"] for p_id, data in PERSONALITIES.items()}
-
-selected_personality = st.sidebar.selectbox(
- "Choose a mode:",
- options=list(personality_options.keys()),
- format_func=lambda x: personality_options[x],
- index=list(personality_options.keys()).index(st.session_state.personality),
-)
-
-# If user picked custom, show text box
-custom_prompt = ""
-if selected_personality == "custom":
- custom_prompt = st.sidebar.text_area(
- "Define your custom personality / system prompt:",
- value=st.session_state.custom_personality_text,
- height=140,
- help="Example: 'You are a concise AI product manager who always returns bullet points and asks one follow-up question.'",
- )
- # store it so we don't lose it on rerun
- st.session_state.custom_personality_text = custom_prompt
-
-# Show description
-st.sidebar.markdown("**Personality description:**")
-st.sidebar.info(PERSONALITIES[selected_personality]["desc"])
-
-# Show preview style (nice-to-have)
-st.sidebar.markdown("**Response style preview:**")
-if selected_personality == "pro":
- st.sidebar.markdown("> โHereโs a structured approach to solve thatโฆโ")
-elif selected_personality == "creative":
- st.sidebar.markdown("> โOoh, letโs make this fun โจโฆโ")
-elif selected_personality == "tech":
- st.sidebar.markdown("> โLetโs debug this step-by-stepโฆโ")
-elif selected_personality == "friendly":
- st.sidebar.markdown("> โHey! Totally get what youโre saying ๐โฆโ")
-else:
- st.sidebar.markdown("> โYour custom assistant will respond like thisโฆโ")
-
-# detect personality change mid-conversation
-personality_changed = selected_personality != st.session_state.personality
-st.session_state.personality = selected_personality
-
-# -------------------------------------------------
-# 5. PAGE HEADER
-# -------------------------------------------------
-current_label = PERSONALITIES[st.session_state.personality]["label"]
-st.markdown(f"### ๐ฌ Current mode: **{current_label}**")
-
-# -------------------------------------------------
-# 6. RENDER CHAT HISTORY
-# -------------------------------------------------
-for msg in st.session_state.messages:
- with st.chat_message(msg["role"]):
- st.write(msg["content"])
-
-# If personality changed in the middle, tell the model (and the user)
-if personality_changed and st.session_state.messages:
- note = f"๐ Personality changed to **{current_label}**. Future responses should follow this style."
- with st.chat_message("assistant"):
- st.write(note)
- st.session_state.messages.append({"role": "assistant", "content": note})
-
-# -------------------------------------------------
-# 7. GET USER INPUT
-# -------------------------------------------------
-user_prompt = st.chat_input("Ask me something...")
-if user_prompt:
- # show user msg
- st.session_state.messages.append({"role": "user", "content": user_prompt})
- with st.chat_message("user"):
- st.write(user_prompt)
-
- # -------------------------------------------------
- # 8. BUILD SYSTEM PROMPT FROM PERSONALITY
- # -------------------------------------------------
- if st.session_state.personality == "custom" and st.session_state.custom_personality_text.strip():
- system_prompt = st.session_state.custom_personality_text.strip()
- else:
- system_prompt = PERSONALITIES[st.session_state.personality]["system"]
-
- # build messages with system at the top
- full_messages = [{"role": "system", "content": system_prompt}]
- full_messages.extend(st.session_state.messages)
-
- # -------------------------------------------------
- # 9. CALL OPENROUTER / MODEL (placeholder)
- # -------------------------------------------------
- # Here you should call your LLM with `full_messages`
- # I'm returning a fake response to keep this runnable.
- # Replace this block with your real OpenRouter call.
- assistant_answer = (
- f"(Personality: {current_label})\n"
- f"You said: {user_prompt}\n\n"
- f"(System prompt was:\n`{system_prompt[:180]}...`)"
- )
-
- # show assistant
- with st.chat_message("assistant"):
- st.write(assistant_answer)
-
- # store in history
- st.session_state.messages.append(
- {"role": "assistant", "content": assistant_answer}
- )
diff --git a/chaitanyaI/Assign2_challenge1_translation/chat_app_trans.py b/chaitanyaI/Assign2_challenge1_translation/chat_app_trans.py
deleted file mode 100644
index 19bedc8..0000000
--- a/chaitanyaI/Assign2_challenge1_translation/chat_app_trans.py
+++ /dev/null
@@ -1,294 +0,0 @@
-# Open router API key for using a free model , paid models
-# Challenge 1: Translation Mode
-# Features added:
-# - Sidebar target language selection
-# - Two-stage flow: detect language -> translate
-# - Cultural notes and alternative translations
-# - Translation history in session_state
-# - Still keeps original chat history & OpenRouter streaming style (for final display)
-
-import streamlit as st
-from openai import OpenAI
-import json
-
-# ------------------------------------------------------------
-# PAGE CONFIG
-# ------------------------------------------------------------
-st.set_page_config(page_title="My Translation ChatBot", page_icon="๐ค")
-
-# ------------------------------------------------------------
-# OPENROUTER CLIENT SETUP
-# ------------------------------------------------------------
-if "OPENROUTER_API_KEY" in st.secrets:
- api_key = st.secrets["OPENROUTER_API_KEY"]
-else:
- api_key = st.sidebar.text_input("Enter OpenRouter API Key", type="password")
-
-if not api_key:
- st.warning("Please enter your OpenRouter API key to continue.")
- st.stop()
-
-client = OpenAI(
- base_url="https://openrouter.ai/api/v1",
- api_key=api_key,
- default_headers={
- "HTTP-Referer": "http://localhost:8501", # Optional: shows on OpenRouter rankings
- "X-Title": "My ChatBot", # Optional: shows on OpenRouter rankings
- }
-)
-
-# ------------------------------------------------------------
-# MODELS (adjust to what you have on OpenRouter)
-# ------------------------------------------------------------
-# detection / translation model (must support following instructions well)
-DETECTION_MODEL = "openai/gpt-4o-mini" # change if not enabled
-TRANSLATION_MODEL = "openai/gpt-4o-mini" # can be same as detection
-# if those don't work for your key, try the free one you used:
-# DETECTION_MODEL = "mistralai/mistral-7b-instruct:free"
-# TRANSLATION_MODEL = "mistralai/mistral-7b-instruct:free"
-
-# ------------------------------------------------------------
-# SESSION STATE INIT
-# ------------------------------------------------------------
-# chat history (original)
-if "messages" not in st.session_state:
- st.session_state.messages = []
-
-# target language for translation (new)
-if "target_language" not in st.session_state:
- st.session_state.target_language = "English"
-
-# translation history (new)
-if "translation_history" not in st.session_state:
- # each item: {
- # "source_text": ...,
- # "detected_language": ...,
- # "target_language": ...,
- # "translation": ...,
- # "alternatives": [...],
- # "cultural_note": ...,
- # "confidence": ...
- # }
- st.session_state.translation_history = []
-
-# ------------------------------------------------------------
-# SIDEBAR (new UI for challenge)
-# ------------------------------------------------------------
-with st.sidebar:
- st.header("๐ Translation Settings")
-
- target = st.selectbox(
- "Translate INTO:",
- [
- "English",
- "Spanish",
- "French",
- "German",
- "Portuguese",
- "Italian",
- "Hindi",
- "Japanese",
- "Korean",
- "Chinese (Simplified)",
- ],
- index=0,
- )
- st.session_state.target_language = target
-
- st.divider()
- st.subheader("๐ Translation History")
- if st.session_state.translation_history:
- for item in reversed(st.session_state.translation_history[-10:]):
- st.write(f"**Source ({item['detected_language']}):** {item['source_text']}")
- st.write(f"**โ {item['target_language']}:** {item['translation']}")
- if item.get("confidence") is not None:
- st.caption(f"Confidence: {item['confidence']:.2f}")
- if item.get("cultural_note"):
- st.caption("๐ก " + item["cultural_note"])
- st.markdown("---")
- else:
- st.caption("No translations yet.")
-
- # Clear history
- if st.button("Clear translation history"):
- st.session_state.translation_history = []
-
-# ------------------------------------------------------------
-# APP TITLE
-# ------------------------------------------------------------
-st.title("๐ค My Translation ChatBot")
-
-st.caption(
- "Type in **any language** โ I will detect it โ translate to "
- f"**{st.session_state.target_language}** and add cultural notes."
-)
-
-# ------------------------------------------------------------
-# HELPER: detect language (first call)
-# ------------------------------------------------------------
-def detect_language(text: str) -> dict:
- """
- Ask the model to detect language and return a structured dict.
- We tell it to return ONLY JSON so it's easy to parse.
- """
- try:
- resp = client.chat.completions.create(
- model=DETECTION_MODEL,
- messages=[
- {
- "role": "system",
- "content": (
- "You are a language detector. "
- "Detect the language of the user's text. "
- "Return ONLY valid JSON with fields: "
- "language_name (string), iso_code (string), confidence (0-1), is_mixed (bool)."
- ),
- },
- {"role": "user", "content": text},
- ],
- temperature=0,
- )
- raw = resp.choices[0].message.content.strip()
- # try to parse JSON
- data = json.loads(raw)
- return data
- except Exception as e:
- # fallback if model returns non-JSON or error
- return {
- "language_name": "Unknown",
- "iso_code": "",
- "confidence": 0.0,
- "is_mixed": False,
- "error": str(e),
- }
-
-# ------------------------------------------------------------
-# HELPER: translate text (second call)
-# ------------------------------------------------------------
-def translate_with_context(source_text: str, source_lang: str, target_lang: str) -> dict:
- """
- Ask the model to translate, provide alternatives, and cultural/regional notes.
- Return structured JSON.
- """
- try:
- resp = client.chat.completions.create(
- model=TRANSLATION_MODEL,
- messages=[
- {
- "role": "system",
- "content": (
- "You are a professional translator and cultural advisor. "
- "Translate the user's text from the source language to the target language. "
- "Return ONLY JSON with fields: "
- "main_translation (string), "
- "alternative_translations (array of strings), "
- "cultural_note (string, optional), "
- "tone (string, optional). "
- "Use natural, idiomatic language, not word-for-word."
- ),
- },
- {
- "role": "user",
- "content": json.dumps(
- {
- "source_language": source_lang,
- "target_language": target_lang,
- "text": source_text,
- }
- ),
- },
- ],
- temperature=0.4,
- )
- raw = resp.choices[0].message.content.strip()
- data = json.loads(raw)
- return data
- except Exception as e:
- return {
- "main_translation": "",
- "alternative_translations": [],
- "cultural_note": "",
- "error": str(e),
- }
-
-# ------------------------------------------------------------
-# DISPLAY CHAT HISTORY (original behavior)
-# ------------------------------------------------------------
-for message in st.session_state.messages:
- with st.chat_message(message["role"]):
- st.markdown(message["content"])
-
-# ------------------------------------------------------------
-# HANDLE USER INPUT
-# ------------------------------------------------------------
-if prompt := st.chat_input("Type text in ANY language, I'll translate it..."):
- # 1) Add user message to chat history (original behavior)
- st.session_state.messages.append({"role": "user", "content": prompt})
-
- # 2) Show user message
- with st.chat_message("user"):
- st.markdown(prompt)
-
- # 3) DETECT LANGUAGE (first call)
- with st.chat_message("assistant"):
- with st.spinner("๐ Detecting language..."):
- detection = detect_language(prompt)
-
- source_lang = detection.get("language_name", "Unknown")
- confidence = detection.get("confidence", 0.0)
- iso_code = detection.get("iso_code", "")
-
- # If the user typed already in the target language, we can try to be smart
- target_lang = st.session_state.target_language
- if source_lang.lower().startswith(target_lang.lower()):
- # user typed in same language as target -> switch to English as fallback
- # (you can change this logic)
- target_lang = "English" if target_lang != "English" else "Spanish"
-
- # 4) TRANSLATE (second call)
- with st.spinner(f"๐ Translating from {source_lang} โ {target_lang}..."):
- translation = translate_with_context(prompt, source_lang, target_lang)
-
- main_translation = translation.get("main_translation", "").strip()
- alternatives = translation.get("alternative_translations", []) or []
- cultural_note = translation.get("cultural_note", "")
-
- # 5) RENDER STRUCTURED OUTPUT
- st.markdown(f"๐ **Detected Language:** {source_lang}")
- st.markdown(f"๐ **Confidence:** {confidence:.2f}")
- if iso_code:
- st.markdown(f"๐ **ISO Code:** `{iso_code}`")
-
- st.markdown(f"๐ฏ **Translation โ {target_lang}:** {main_translation or 'โ'}")
-
- if alternatives:
- st.markdown("๐ **Alternative translations:**")
- for alt in alternatives:
- st.markdown(f"- {alt}")
-
- if cultural_note:
- st.markdown(f"๐ก **Cultural / Regional note:** {cultural_note}")
-
- # 6) ADD TO TRANSLATION HISTORY (for sidebar)
- st.session_state.translation_history.append(
- {
- "source_text": prompt,
- "detected_language": source_lang,
- "target_language": target_lang,
- "translation": main_translation,
- "alternatives": alternatives,
- "cultural_note": cultural_note,
- "confidence": confidence,
- }
- )
-
- # 7) Add assistant message to chat history (so original flow is preserved)
- # We store a compact version here
- assistant_compact = (
- f"Detected: {source_lang} (conf {confidence:.2f})\n"
- f"Translation ({target_lang}): {main_translation}\n"
- )
- if cultural_note:
- assistant_compact += f"Cultural note: {cultural_note}"
- st.session_state.messages.append({"role": "assistant", "content": assistant_compact})
-
diff --git a/chaitanyaI/README.md b/chaitanyaI/README.md
deleted file mode 100644
index 8b13789..0000000
--- a/chaitanyaI/README.md
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/chaitanyaI/text_processor.py b/chaitanyaI/text_processor.py
deleted file mode 100644
index 96f432c..0000000
--- a/chaitanyaI/text_processor.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import streamlit as st
-
-# ---------- PAGE CONFIG ----------
-st.set_page_config(page_title="Interactive Text Processor", page_icon="๐")
-
-# ---------- MAIN INTERFACE ----------
-st.title("๐ Interactive Text Processor")
-st.subheader("A simple app to analyze and transform text interactively.")
-
-st.write(
- """
- This app lets you input any text and process it in different ways โ
- such as counting words, reversing text, or changing the case.
- Use the **sidebar** to configure how you want the text processed.
- """
-)
-
-# ---------- SIDEBAR ----------
-st.sidebar.header("โ๏ธ Configuration Options")
-st.sidebar.write("Youโll add interactive controls here in the next tasks.")
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..cb51a65
--- /dev/null
+++ b/config.py
@@ -0,0 +1,28 @@
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+class Config:
+ # LLM Configuration - CHANGE THESE BASED ON YOUR LLM PROVIDER
+ LLM_PROVIDER = os.getenv("LLM_PROVIDER", "openai") # Options: openai, anthropic, local
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
+ ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
+
+ # Model Configuration
+ DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "gpt-4")
+
+ # API Configuration
+ API_TIMEOUT = int(os.getenv("API_TIMEOUT", "30"))
+ MAX_RETRIES = int(os.getenv("MAX_RETRIES", "3"))
+
+ # Output Configuration
+ OUTPUT_DIR = os.getenv("OUTPUT_DIR", "./reports")
+
+ @classmethod
+ def validate_config(cls):
+ """Validate that required configuration is present"""
+ if cls.LLM_PROVIDER == "openai" and not cls.OPENAI_API_KEY:
+ raise ValueError("OPENAI_API_KEY is required when using OpenAI")
+ elif cls.LLM_PROVIDER == "anthropic" and not cls.ANTHROPIC_API_KEY:
+ raise ValueError("ANTHROPIC_API_KEY is required when using Anthropic")
\ No newline at end of file
diff --git a/insight_agent.py b/insight_agent.py
new file mode 100644
index 0000000..ebb2697
--- /dev/null
+++ b/insight_agent.py
@@ -0,0 +1,251 @@
+import json
+import re
+import requests
+from typing import List, Dict, Any, Optional
+from dataclasses import dataclass
+from enum import Enum
+from config import Config
+
+class LLMClient:
+ """Unified LLM client for multiple providers"""
+
+ def __init__(self):
+ self.config = Config()
+ self.setup_client()
+
+ def setup_client(self):
+ """Setup the appropriate LLM client based on configuration"""
+ if self.config.LLM_PROVIDER == "openai":
+ from openai import OpenAI
+ self.client = OpenAI(api_key=self.config.OPENAI_API_KEY)
+ self.model = self.config.DEFAULT_MODEL
+ elif self.config.LLM_PROVIDER == "anthropic":
+ # For Anthropic Claude
+ self.client = None # You would initialize Anthropic client here
+ self.model = "claude-3-sonnet-20240229"
+ else:
+ # For local models
+ self.client = None
+ self.model = "local"
+
+ def generate_text(self, prompt: str, system_message: str = None, max_tokens: int = 1000) -> str:
+ """Generate text using the configured LLM"""
+ try:
+ if self.config.LLM_PROVIDER == "openai":
+ messages = []
+ if system_message:
+ messages.append({"role": "system", "content": system_message})
+ messages.append({"role": "user", "content": prompt})
+
+ response = self.client.chat.completions.create(
+ model=self.model,
+ messages=messages,
+ max_tokens=max_tokens,
+ temperature=0.7
+ )
+ return response.choices[0].message.content
+ else:
+ # Fallback for other providers or local models
+ return self._fallback_generation(prompt)
+ except Exception as e:
+ print(f"LLM Error: {e}")
+ return self._fallback_generation(prompt)
+
+ def _fallback_generation(self, prompt: str) -> str:
+ """Fallback generation when LLM is not available"""
+ # Simple rule-based fallback - REPLACE WITH ACTUAL LLM CALLS
+ if "hypothesis" in prompt.lower():
+ return "Based on the analysis, there appears to be a correlation between the key variables that warrants further investigation."
+ elif "trend" in prompt.lower():
+ return "The data shows a consistent upward trend over the observed period with seasonal variations."
+ else:
+ return "Analysis suggests significant patterns in the data that require additional validation."
+
+class ReasoningType(Enum):
+ CAUSAL = "causal"
+ CORRELATIONAL = "correlational"
+ TEMPORAL = "temporal"
+ COMPARATIVE = "comparative"
+ PREDICTIVE = "predictive"
+
+@dataclass
+class Hypothesis:
+ statement: str
+ reasoning_chain: List[str]
+ confidence: float
+ evidence_sources: List[str]
+ reasoning_type: ReasoningType
+ testable_implications: List[str]
+
+class InsightGenerationAgent:
+ def __init__(self, model_name: str = None):
+ self.llm = LLMClient()
+ self.model_name = model_name or Config.DEFAULT_MODEL
+ self.reasoning_patterns = self._initialize_reasoning_patterns()
+
+ def _initialize_reasoning_patterns(self) -> Dict[str, List[str]]:
+ """Initialize common reasoning patterns for insight generation"""
+ return {
+ "causal": [
+ "If {factor} increases/decreases, then {outcome} changes because...",
+ "{Event} likely causes {effect} through mechanism: {mechanism}",
+ "The relationship between {A} and {B} suggests {A} influences {B} via {pathway}"
+ ],
+ "correlational": [
+ "When {X} occurs, {Y} tends to co-occur, suggesting potential relationship: {relationship}",
+ "Patterns in {dataset} show correlation between {var1} and {var2} with implications for {domain}"
+ ],
+ "temporal": [
+ "Following {event}, we observe {trend} over time, indicating {conclusion}",
+ "Historical pattern: {pattern} suggests future {prediction}"
+ ],
+ "comparative": [
+ "Compared to {baseline}, {subject} shows {difference} because {reason}",
+ "Analysis of {group1} vs {group2} reveals key differences: {differences}"
+ ]
+ }
+
+ def generate_insights(self,
+ analysis_data: Dict[str, Any],
+ contradictions: List[str],
+ source_validations: Dict[str, bool]) -> List[Hypothesis]:
+ """
+ Generate insights and hypotheses from analyzed data using LLM
+ """
+ print("๐ Generating insights with LLM...")
+
+ # Prepare context for LLM
+ context = self._prepare_llm_context(analysis_data, contradictions, source_validations)
+
+ # Generate insights using LLM
+ llm_insights = self._generate_insights_with_llm(context)
+
+ # Parse and structure the insights
+ structured_insights = self._parse_llm_insights(llm_insights)
+
+ return structured_insights
+
+ def _prepare_llm_context(self, analysis_data: Dict, contradictions: List[str], validations: Dict[str, bool]) -> str:
+ """Prepare context for LLM processing"""
+ context = f"""
+ RESEARCH ANALYSIS CONTEXT:
+
+ KEY FINDINGS:
+ {json.dumps(analysis_data.get('key_findings', []), indent=2)}
+
+ CONTRADICTIONS IDENTIFIED:
+ {chr(10).join(f"- {contradiction}" for contradiction in contradictions)}
+
+ SOURCE VALIDATIONS:
+ {json.dumps(validations, indent=2)}
+
+ ADDITIONAL CONTEXT:
+ {analysis_data.get('summary', 'No additional context available')}
+ """
+ return context
+
+ def _generate_insights_with_llm(self, context: str) -> str:
+ """Use LLM to generate insights"""
+
+ system_prompt = """You are an expert research analyst. Your task is to generate insightful hypotheses and trends based on research data.
+
+ Generate 3-5 key insights with:
+ 1. Clear, testable hypotheses
+ 2. Logical reasoning chains
+ 3. Confidence estimates (0.0-1.0)
+ 4. Specific, testable implications
+ 5. Reasoning type (causal, correlational, temporal, comparative, predictive)
+
+ Format your response as JSON with this structure:
+ {
+ "insights": [
+ {
+ "statement": "Clear hypothesis statement",
+ "reasoning_chain": ["step1", "step2", "step3"],
+ "confidence": 0.85,
+ "reasoning_type": "causal",
+ "testable_implications": ["implication1", "implication2"]
+ }
+ ]
+ }"""
+
+ user_prompt = f"""
+ Based on the following research analysis, generate key insights and hypotheses:
+
+ {context}
+
+ Please provide your analysis in the specified JSON format.
+ """
+
+ response = self.llm.generate_text(user_prompt, system_prompt, max_tokens=1500)
+ return response
+
+ def _parse_llm_insights(self, llm_response: str) -> List[Hypothesis]:
+ """Parse LLM response into structured Hypothesis objects"""
+ insights = []
+
+ try:
+ # Try to extract JSON from response
+ json_match = re.search(r'\{.*\}', llm_response, re.DOTALL)
+ if json_match:
+ json_str = json_match.group()
+ data = json.loads(json_str)
+
+ for insight_data in data.get('insights', []):
+ try:
+ hypothesis = Hypothesis(
+ statement=insight_data.get('statement', ''),
+ reasoning_chain=insight_data.get('reasoning_chain', []),
+ confidence=float(insight_data.get('confidence', 0.5)),
+ evidence_sources=['llm_analysis'],
+ reasoning_type=ReasoningType(insight_data.get('reasoning_type', 'correlational')),
+ testable_implications=insight_data.get('testable_implications', [])
+ )
+ insights.append(hypothesis)
+ except (ValueError, KeyError) as e:
+ print(f"Warning: Failed to parse insight: {e}")
+ continue
+
+ # Fallback: if no JSON found, create basic insights
+ if not insights:
+ insights = self._create_fallback_insights()
+
+ except json.JSONDecodeError:
+ print("Warning: Failed to parse LLM response as JSON, using fallback insights")
+ insights = self._create_fallback_insights()
+
+ return insights
+
+ def _create_fallback_insights(self) -> List[Hypothesis]:
+ """Create fallback insights when LLM fails"""
+ return [
+ Hypothesis(
+ statement="Further investigation needed to validate preliminary findings",
+ reasoning_chain=["Initial analysis completed", "Data patterns observed", "Requires additional validation"],
+ confidence=0.5,
+ evidence_sources=["fallback_analysis"],
+ reasoning_type=ReasoningType.CORRELATIONAL,
+ testable_implications=["Conduct controlled experiments", "Gather additional data sources"]
+ )
+ ]
+
+ def format_insights_report(self, insights: List[Hypothesis]) -> Dict[str, Any]:
+ """Format insights for consumption by Report Builder"""
+ return {
+ "summary": f"Generated {len(insights)} key insights",
+ "insights": [
+ {
+ "statement": insight.statement,
+ "reasoning_chain": insight.reasoning_chain,
+ "confidence": insight.confidence,
+ "reasoning_type": insight.reasoning_type.value,
+ "testable_implications": insight.testable_implications
+ }
+ for insight in insights
+ ],
+ "metadata": {
+ "total_insights": len(insights),
+ "average_confidence": sum(i.confidence for i in insights) / len(insights) if insights else 0,
+ "reasoning_types_used": list(set(i.reasoning_type.value for i in insights))
+ }
+ }
\ No newline at end of file
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..4c13159
--- /dev/null
+++ b/main.py
@@ -0,0 +1,228 @@
+#!/usr/bin/env python3
+"""
+Multi-Agent AI Deep Researcher
+- Prefers JSON input from CLI (--input), else env var (RESEARCH_INPUT_FILE),
+ else local fallback research_input.json / input.json
+- Falls back to the built-in sample if nothing is found/valid
+"""
+
+import json
+import os
+import sys
+from pathlib import Path
+import argparse
+
+from insight_agent import InsightGenerationAgent
+from report_agent import ReportBuilderAgent
+from config import Config
+
+
+def setup_environment():
+ """Setup and validate environment"""
+ print("๐ Setting up Multi-Agent AI Deep Researcher.")
+ try:
+ Config.validate_config()
+ print("โ
Configuration validated successfully")
+ Path(Config.OUTPUT_DIR).mkdir(exist_ok=True)
+ print(f"โ
Output directory ready: {Config.OUTPUT_DIR}")
+ return True
+ except Exception as e:
+ print(f"โ Setup failed: {e}")
+ return False
+
+
+def parse_args(argv=None):
+ """Parse CLI args; ignore unknown (Spyder/IPython often injects extras)."""
+ p = argparse.ArgumentParser(description="Run Multi-Agent AI Deep Researcher")
+ p.add_argument(
+ "--input",
+ dest="input_path",
+ help='Path to JSON file containing {"contextual_data": {...}, "analysis_results": {...}}',
+ )
+ # Ignore unknown args to avoid Spyder/IPython conflicts
+ args, _ = p.parse_known_args(argv)
+ return args
+
+
+def resolve_input_path(args) -> str | None:
+ """Decide which JSON to use: CLI > env var > local defaults."""
+ # 1) CLI
+ if args and args.input_path:
+ return args.input_path
+
+ # 2) ENV
+ env_path = os.getenv("RESEARCH_INPUT_FILE")
+ if env_path:
+ return env_path
+
+ # 3) Local defaults next to main.py
+ here = Path(__file__).resolve().parent
+ for candidate in ["research_input.json", "input.json"]:
+ p = here / candidate
+ if p.exists():
+ return str(p)
+
+ # 4) As a convenience, try the first *.json that has the right keys
+ for jf in (here.glob("*.json")):
+ try:
+ with jf.open("r", encoding="utf-8") as f:
+ data = json.load(f)
+ if isinstance(data, dict) and "contextual_data" in data and "analysis_results" in data:
+ return str(jf)
+ except Exception:
+ pass
+
+ return None
+
+
+def load_research_data(input_path: str | None):
+ """
+ Load research data from JSON if provided, else return the built-in sample.
+ The JSON file must contain keys: 'contextual_data' and 'analysis_results'.
+ """
+ if input_path:
+ p = Path(input_path)
+ print(f"\n๐ Looking for input JSON: {p}")
+ if not p.exists():
+ raise FileNotFoundError(f"Input file not found: {p}")
+
+ print(f"๐ Loading research data from {p}")
+ with p.open("r", encoding="utf-8") as f:
+ data = json.load(f)
+
+ if not isinstance(data, dict) or "contextual_data" not in data or "analysis_results" not in data:
+ raise ValueError("Invalid input JSON. Expected top-level keys 'contextual_data' and 'analysis_results'.")
+
+ return data["contextual_data"], data["analysis_results"]
+
+ # -------- Built-in SAMPLE (unchanged idea) --------
+ print("\n๐ Loading sample research data.")
+ sample_contextual_data = {
+ "sources": [
+ {
+ "type": "research_paper",
+ "title": "The Impact of AI on Productivity",
+ "year": 2024,
+ "authors": ["Smith, J.", "Johnson, A."],
+ "journal": "Journal of AI Research",
+ },
+ {
+ "type": "news_article",
+ "title": "Market Trends in AI Adoption",
+ "year": 2024,
+ "publication": "Tech Review",
+ },
+ {
+ "type": "industry_report",
+ "title": "Q3 2024 AI Market Analysis",
+ "year": 2024,
+ "organization": "AI Research Group",
+ },
+ ],
+ "time_range": "2020-2024",
+ "geographic_scope": "Global",
+ "domain": "Artificial Intelligence",
+ "key_topics": ["productivity", "adoption", "market trends", "implementation"],
+ }
+
+ sample_analysis_results = {
+ "key_findings": [
+ "AI adoption correlates with 25-35% productivity increase in early adopters",
+ "Organizations with structured AI training programs show faster ROI",
+ "Data quality is the primary barrier to successful AI implementation",
+ "SMEs lag in AI adoption due to cost concerns",
+ ],
+ "contradictions": [
+ "Study A shows 40% productivity boost vs Study B shows 20% boost",
+ "Some reports emphasize technical barriers while others emphasize cultural resistance",
+ "Conflicting timelines for ROI across industries",
+ ],
+ "source_validations": {
+ "research_paper_1": True,
+ "news_article_1": True,
+ "industry_report_1": True,
+ "blog_post_1": False,
+ },
+ "confidence_score": 0.82,
+ "summary": "Strong evidence for AI productivity benefits, tempered by implementation challenges.",
+ "patterns": [
+ "Early adopters consistently outperform laggards",
+ "Training investment correlates with success metrics",
+ "Industry-specific variations in implementation success",
+ ],
+ }
+
+ return sample_contextual_data, sample_analysis_results
+
+
+def main():
+ """Main execution function"""
+ # Debug info to confirm args & context (very helpful in Spyder!)
+ print(f"๐ Python: {sys.executable}")
+ print(f"๐ CWD: {os.getcwd()}")
+ print(f"๐งต argv: {sys.argv}")
+
+ args = parse_args()
+ chosen_input = resolve_input_path(args)
+ print(f"๐๏ธ Resolved input path: {chosen_input or '(none; will use sample)'}")
+
+ if not setup_environment():
+ return
+
+ print("\n๐ค Initializing AI Agents.")
+ insight_agent = InsightGenerationAgent()
+ report_agent = ReportBuilderAgent()
+ print("โ
Agents initialized successfully")
+
+ # Load data (JSON if provided, else sample)
+ contextual_data, analysis_results = load_research_data(chosen_input)
+
+ print(
+ f"๐ Data ready: {len(contextual_data.get('sources', []))} sources, "
+ f"{len(analysis_results.get('key_findings', []))} findings"
+ )
+
+ # Step 1: Generate Insights
+ print("\n๐ก Generating insights with Insight Generation Agent.")
+ insights = insight_agent.generate_insights(
+ analysis_data=analysis_results,
+ contradictions=analysis_results.get("contradictions", []),
+ source_validations=analysis_results.get("source_validations", {}),
+ )
+ print(f"โ
Generated {len(insights)} insights")
+
+ # Format insights for reporting
+ formatted_insights = insight_agent.format_insights_report(insights)
+
+ # Step 2: Build Comprehensive Report
+ print("\n๐ Building comprehensive report with Report Builder Agent.")
+ report = report_agent.build_comprehensive_report(
+ contextual_data=contextual_data,
+ analysis_results=analysis_results,
+ insights=formatted_insights["insights"],
+ report_type="research_report",
+ )
+
+ # Step 3: Save Report
+ print("\n๐พ Saving report.")
+ report_agent.save_report(report)
+
+ # Display summary
+ print("\n๐ MULTI-AGENT RESEARCH COMPLETED!")
+ print("=" * 50)
+ print(f"๐ Insights Generated: {len(insights)}")
+ print(f"๐ Report ID: {report['metadata']['report_id']}")
+ print(f"๐ Output Formats: {', '.join(report['formats'].keys())}")
+ print(f"๐ Saved to: {Config.OUTPUT_DIR}/")
+ print("=" * 50)
+
+ if insights:
+ sample_insight = insights[0]
+ print(f"\n๐ Sample Insight:")
+ print(f"Statement: {sample_insight.statement}")
+ print(f"Confidence: {sample_insight.confidence:.2f}")
+ print(f"Type: {sample_insight.reasoning_type.value}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/multi_agent_researcher/README.md b/multi_agent_researcher/README.md
new file mode 100644
index 0000000..0fbb08d
--- /dev/null
+++ b/multi_agent_researcher/README.md
@@ -0,0 +1,102 @@
+# ai-accelerator-C2
+# Hackathon - assignment
+
+Project structure:
+multi_agent_researcher/
+โโ main.py
+โโ home_ui.py
+โโ components.py
+โโ agents_stub.py
+โโ rag.py
+โโ utils.py
+โโ requirements.txt
+โโ README.md
+
+# Multi-Agent Deep Researcher โ Streamlit UI (Phase 1, updated)
+
+## Key fixes in this version
+- Fixed Clear button (no recursion; uses session state).
+- Export sidebar button now shows an alert directing users to the Report panel where a proper Download button exists.
+- Added action handler functions for Run / Clear / Export to keep code modular.
+- Replaced external lightbulb URL with embedded SVG data URI to avoid broken image fetches.
+- Documented all files and functions.
+
+## Quickstart
+1. Create venv and install:
+ ```bash
+ python -m venv .venv
+ ```
+
+ Activate the virtual environment (choose the command for your OS/shell):
+
+ - macOS / Linux (bash/zsh):
+ ```bash
+ source .venv/bin/activate
+ ```
+
+ - Windows (PowerShell):
+ ```powershell
+ .\.venv\Scripts\Activate.ps1
+ ```
+
+ - Windows (cmd.exe):
+ ```bat
+ .venv\Scripts\activate.bat
+ ```
+
+ Then install requirements:
+ ```bash
+ pip install -r requirements.txt
+ ```
+
+2. Run:
+streamlit run main.py
+
+
+## Integration notes for backend team
+- Replace `rag.run_research` internals with the actual orchestrator.
+- Keep the returned dict keys exactly as: `retrieval`, `analysis`, `insights`, `report`.
+- The UI expects `report['report_markdown']` for the final markdown content.
+
+## Troubleshooting
+- If buttons appear unresponsive: confirm no long-blocking network call is placed in the main UI thread (move to an async/job queue).
+- Avoid using `st.experimental_rerun()` inside handlers โ this version removed such calls to fix the RecursionError you saw.
+
+
+## Notes & Next steps (phase 2)
+Planned UI enhancements (phase 2):
+- Theme toggles: Dark / Light / Neon (use `st.set_page_config` + custom CSS)
+- Multiple loading indicators (Lottie + progress bar)
+- Export to PDF, PPTX
+- Post compiled report to LinkedIn (requires OAuth + LinkedIn API)
+- Animations and nicer UX polish
+
+## References
+- Streamlit docs: https://docs.streamlit.io/ ... :contentReference[oaicite:6]{index=6}
+- streamlit-lottie: https://github.com/andfanilo/streamlit-lottie :contentReference[oaicite:7]{index=7}
+- LangChain / LangGraph docs: https://docs.langchain.com/ ... :contentReference[oaicite:8]{index=8}
+- LlamaIndex docs: https://developers.llamaindex.ai/ ... :contentReference[oaicite:9]{index=9}
+- FAISS docs: https://faiss.ai/ ... :contentReference[oaicite:10]{index=10}
+
+Testing plan (so we can demo Saturday 2 PM)
+Install requirements and run streamlit run main.py.
+Enter a query, tweak temperature/model, click Run Research.
+Inspect collapsible agent outputs. Try Download Report (Markdown) to verify file contents.
+
+Ask backend teammates to:
+Replace agents_stub calls inside rag.run_research with real orchestration calls (LangGraph or LangChain flows).
+Ensure returned dict keys match expected names.
+Integration notes & pointers for backend teammates
+
+Thomas (FAISS & SQLite layer): create a microservice function retrieve_docs(query, depth) returning a list of docs: {title, snippet, source, embedding_meta}. The UI will expect retrieval["docs"].
+FAISS docs & quickstart:
+faiss.ai
++1
+
+Kalyan (LangChain / LangGraph orchestration): return final structured dict with retrieval, analysis, insights, report. Use streaming to update UI later.
+LangChain overview & LangGraph:
+LangChain Docs
++1
+
+Mamta (nodes / prompts): ensure each node outputs a small JSON structure (easy to display).
+Testing: the UI works with stubs now; replacing rag.run_research with real async/external calls might block Streamlit. For long-running tasks, expose an async/queue API or background job (e.g., actor model or store state in DB and poll). Later we can integrate durable LangGraph execution.
\ No newline at end of file
diff --git a/multi_agent_researcher/agents_stub.py b/multi_agent_researcher/agents_stub.py
new file mode 100644
index 0000000..f3af5e8
--- /dev/null
+++ b/multi_agent_researcher/agents_stub.py
@@ -0,0 +1,49 @@
+"""
+agents_stub.py
+--------------
+Stubbed agent implementations to simulate the backend pipeline.
+
+Why:
+- UI testing should not depend on backend development.
+- Each function simulates latency and returns minimal structured outputs
+ expected by the UI and the `rag.run_research` orchestrator.
+"""
+
+import time
+import random
+
+def fake_retriever(query: str, depth: int = 3):
+ """Simulate a retrieval agent returning 'depth' docs and a short summary."""
+ time.sleep(1.0 + random.random()*0.8)
+ docs = [
+ {"title": f"Paper {i+1} about {query[:30]}...", "snippet": f"Summary snippet {i+1}", "source": f"https://example.org/doc/{i+1}"}
+ for i in range(depth)
+ ]
+ return {"docs": docs, "summary": f"Retrieved {depth} documents for query: '{query}'"}
+
+def fake_critical_analysis(retrieval_output):
+ """Simulate an analysis agent which finds contradictions and key insights."""
+ time.sleep(1.0 + random.random()*1.0)
+ contradictions = ["Doc 2 claims X while Doc 3 claims not X"]
+ insights = ["Key theme: increased computational power will pressure cryptography migration."]
+ return {"analysis": "Critical analysis complete", "contradictions": contradictions, "insights": insights}
+
+def fake_insight_generation(analysis_output):
+ """Simulate an insight generation agent producing hypotheses and confidence."""
+ time.sleep(1.0 + random.random()*0.8)
+ hypotheses = [
+ "Hypothesis 1: Post-quantum adoption will accelerate in 5-8 years.",
+ "Hypothesis 2: Hybrid classical-quantum-resistant stacks will be common."
+ ]
+ return {"hypotheses": hypotheses, "confidence": 0.72}
+
+def fake_report_builder(retrieval, analysis, insights):
+ """Simulate a report builder that outputs markdown content."""
+ time.sleep(0.6)
+ report_md = "# Research Report\n\n"
+ report_md += f"**Retrieval summary:** {retrieval['summary']}\n\n"
+ report_md += f"**Analysis:** {analysis['analysis']}\n\n"
+ report_md += "## Hypotheses\n"
+ for h in insights['hypotheses']:
+ report_md += f"- {h}\n"
+ return {"report_markdown": report_md}
diff --git a/multi_agent_researcher/components.py b/multi_agent_researcher/components.py
new file mode 100644
index 0000000..71640e7
--- /dev/null
+++ b/multi_agent_researcher/components.py
@@ -0,0 +1,46 @@
+"""
+components.py
+-------------
+Small UI helper components used by the Streamlit frontend.
+
+Why:
+- Encapsulates Lottie loading and rendering logic (safely).
+- Keeps main UI code clean by centralizing external requests and fallbacks.
+"""
+
+import streamlit as st
+from streamlit_lottie import st_lottie
+import requests
+
+def load_lottie_url(url: str, timeout: float = 6.0):
+ """
+ Load a Lottie JSON given a URL. Returns JSON or None.
+ This function is defensive: any network error returns None so the UI does not crash.
+ """
+ try:
+ r = requests.get(url, timeout=timeout)
+ if r.status_code == 200:
+ return r.json()
+ except Exception as e:
+ # Use Streamlit debug logging so the UI doesn't show a traceback to the user.
+ st.debug(f"components.load_lottie_url: failed to load {url}: {e}")
+ return None
+
+def lottie_spinner(url: str = None, lottie_json: dict = None, height: int = 150, key: str = None):
+ """
+ Render a Lottie animation as a spinner area.
+ - url: optional URL to load the lottie JSON from.
+ - lottie_json: optional preloaded JSON (preferred for tests/offline).
+ Returns True if a Lottie was rendered, False otherwise (so the caller can show fallback UI).
+ """
+ try:
+ if lottie_json is None and url:
+ lottie_json = load_lottie_url(url)
+ if lottie_json:
+ st_lottie(lottie_json, height=height, key=key)
+ return True
+ except Exception as e:
+ st.debug(f"components.lottie_spinner: render failed: {e}")
+ # fallback: simple message
+ st.write("Loading...")
+ return False
diff --git a/multi_agent_researcher/docs/dummy.txt b/multi_agent_researcher/docs/dummy.txt
new file mode 100644
index 0000000..e69de29
diff --git a/multi_agent_researcher/flowdiagram.jpeg b/multi_agent_researcher/flowdiagram.jpeg
new file mode 100644
index 0000000..7ca3fea
Binary files /dev/null and b/multi_agent_researcher/flowdiagram.jpeg differ
diff --git a/multi_agent_researcher/guard.py b/multi_agent_researcher/guard.py
new file mode 100644
index 0000000..5fdef45
--- /dev/null
+++ b/multi_agent_researcher/guard.py
@@ -0,0 +1,12 @@
+import os, sys
+from hmac import compare_digest
+
+expected = os.getenv("HACKATHON_ENTRY_KEY") # secret kept OUTSIDE code
+if not expected:
+ print("๐ Entry key not configured. Set HACKATHON_ENTRY_KEY.")
+ sys.exit(1)
+
+entry = input("Enter entry key: ").strip()
+if not compare_digest(entry, expected):
+ print("๐ Invalid key โ exiting.")
+ sys.exit(1)
diff --git a/multi_agent_researcher/home_ui.py b/multi_agent_researcher/home_ui.py
new file mode 100644
index 0000000..7d4c766
--- /dev/null
+++ b/multi_agent_researcher/home_ui.py
@@ -0,0 +1,64 @@
+"""
+home_ui.py
+----------
+Holds functions to build the top-level page header and side-bar controls.
+
+Why:
+- Keeps all Streamlit widget creation in one place so the main file can focus on orchestration.
+- Exposes `sidebar_controls()` which returns a plain dict of control values (pure data).
+- Exposes `page_header()` for consistent header rendering.
+"""
+
+import streamlit as st
+import base64
+
+# Embedded small lightbulb SVG as a data URI to avoid remote image failures.
+_LIGHTBULB_SVG_DATA_URI = (
+ "data:image/svg+xml;utf8,"
+ "%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' width='64' height='64'%3E"
+ "%3Cpath fill='%23f5c542' d='M9 21h6v-1a2 2 0 0 0-2-2H11a2 2 0 0 0-2 2v1z'/%3E"
+ "%3Cpath fill='%23f5c542' d='M12 2a7 7 0 0 0-4 12.9V17a1 1 0 0 0 1 1h6a1 1 0 0 0 1-1v-2.1A7 7 0 0 0 12 2z'/%3E"
+ "%3C/svg%3E"
+)
+
+def sidebar_controls():
+ """
+ Renders the sidebar controls and returns their values in a dictionary.
+ Important: this function only creates widgets and returns plain data (no side-effects).
+ """
+ st.sidebar.title("Multi-Agent Research Controls")
+ query = st.sidebar.text_area("Research Query", value="Impact of quantum computing on cryptography", height=80)
+ model = st.sidebar.selectbox("Model", ["local-mock", "gpt-4o", "gpt-4", "openai/gpt-4"], index=0)
+ temperature = st.sidebar.slider("Temperature", 0.0, 1.0, 0.2, 0.05)
+ max_tokens = st.sidebar.slider("Max tokens", 256, 4096, 1024, step=256)
+ num_agents = st.sidebar.selectbox("Number of agents to run", [1,2,3,4,5], index=4)
+ retrieval_depth = st.sidebar.slider("Retrieval depth (docs per query)", 1, 10, 3)
+ # Buttons return True only on the event that triggers them.
+ run_button = st.sidebar.button("Run Research", key="run_research")
+ clear_button = st.sidebar.button("Clear Results", key="clear_results")
+ export_button = st.sidebar.button("Export Request (sidebar)", key="export_sidebar")
+
+ return {
+ "query": query,
+ "model": model,
+ "temperature": temperature,
+ "max_tokens": max_tokens,
+ "num_agents": num_agents,
+ "retrieval_depth": retrieval_depth,
+ "run": run_button,
+ "clear": clear_button,
+ "export": export_button
+ }
+
+def page_header():
+ """
+ Renders a consistent page header (logo + title).
+ Uses an embedded SVG (data URI) to avoid external image failures.
+ """
+ col1, col2 = st.columns([9,1])
+ with col1:
+ st.title("Multi-Agent AI Deep Researcher โ UI Prototype")
+ st.markdown("A modular Streamlit frontend for multi-agent RAG research (UI-only, stubs in place).")
+ with col2:
+ # Use embedded svg image (safe) to avoid 3rd-party fetch issues.
+ st.image(_LIGHTBULB_SVG_DATA_URI, width=48)
diff --git a/multi_agent_researcher/main.py b/multi_agent_researcher/main.py
new file mode 100644
index 0000000..b12c74d
--- /dev/null
+++ b/multi_agent_researcher/main.py
@@ -0,0 +1,170 @@
+"""
+main.py
+-------
+Streamlit app entry for the Multi-Agent Deep Researcher UI prototype.
+
+Important changes in this version:
+- All button actions use dedicated handler functions.
+- `Clear` no longer calls `st.experimental_rerun()` (which previously caused recursion).
+ We use session-state flags and present non-blocking alerts instead.
+- `Export` sidebar button shows an alert directing the user to the report's Download button.
+- The report panel contains the actual `st.download_button` for a deterministic export flow.
+
+Keep the same module names and data shapes as previous version to ensure backend compatibility.
+"""
+
+# To ensure without our secret key we will not be able to run the app
+# rest of your app code below
+from dotenv import load_dotenv
+load_dotenv()
+import guard
+
+import streamlit as st
+from home_ui import sidebar_controls, page_header
+from components import lottie_spinner
+from rag import run_research
+from utils import compile_report_md
+
+st.set_page_config(page_title="Multi-Agent Deep Researcher", layout="wide")
+
+# Page header (kept the same place as before to preserve "core answer")
+page_header()
+
+# Initialize session_state keys used by handlers
+if "results" not in st.session_state:
+ st.session_state["results"] = None
+if "last_action" not in st.session_state:
+ st.session_state["last_action"] = None
+if "alert_message" not in st.session_state:
+ st.session_state["alert_message"] = None
+
+# Lottie animation url (optional). The components loader will fallback safely if it can't fetch.
+LOTTIE_URL = "https://assets5.lottiefiles.com/packages/lf20_V9t630.json"
+
+# --- Action Handlers (clear, run, export) ---------------------------------
+def handle_clear():
+ """
+ Handler for 'Clear Results' button.
+ Important: Do NOT force an experimental rerun here. Instead, clear session data and set an alert.
+ This prevents multiple scheduled event-loop callbacks (which caused recursion).
+ """
+ st.session_state["results"] = None
+ st.session_state["last_action"] = "clear"
+ st.session_state["alert_message"] = "Results cleared. You can run another research."
+
+def handle_export_sidebar():
+ """
+ Handler for the sidebar 'Export' button.
+ For now this simply informs the user where to export from (the Report panel contains the download).
+ """
+ st.session_state["last_action"] = "export_sidebar"
+ st.session_state["alert_message"] = (
+ "To export the compiled report, expand the 'Report Builder Agent' panel and click 'Download Report (Markdown)'."
+ )
+
+def handle_run(query: str, num_agents: int, retrieval_depth: int, model: str, temperature: float):
+ """
+ Handler for the 'Run Research' action. Runs the orchestrator (stubbed) and stores results.
+ We display a spinner + Lottie while the background call executes.
+ """
+ st.session_state["last_action"] = "run"
+ st.session_state["alert_message"] = f"Running research for query: '{query[:80]}'"
+
+ # run the orchestrator (blocking for this prototype). Backend can implement async/queue later.
+ results = run_research(
+ query=query,
+ num_agents=num_agents,
+ retrieval_depth=retrieval_depth,
+ model=model,
+ temperature=temperature
+ )
+ st.session_state["results"] = results
+ st.session_state["alert_message"] = "Research completed successfully."
+
+# --- Build sidebar and react to controls -----------------------------------
+controls = sidebar_controls()
+
+# React to actions by calling handlers (do not experimental_rerun)
+if controls["clear"]:
+ handle_clear()
+
+if controls["export"]:
+ handle_export_sidebar()
+
+if controls["run"]:
+ q = controls["query"].strip()
+ if not q:
+ st.sidebar.error("Please provide a research query.")
+ else:
+ # Show Lottie spinner and message while running the handler
+ with st.spinner("Launching agents & running research pipeline..."):
+ col1, col2 = st.columns([3,7])
+ with col1:
+ # render spinner (components handles fallback)
+ lottie_spinner(url=LOTTIE_URL, height=150, key="main_spinner")
+ with col2:
+ st.info("Agents running: Retriever โ Analysis โ Insights โ Report Builder")
+ handle_run(
+ query=q,
+ num_agents=controls["num_agents"],
+ retrieval_depth=controls["retrieval_depth"],
+ model=controls["model"],
+ temperature=controls["temperature"]
+ )
+
+# Show an alert banner if available (non-modal).
+if st.session_state.get("alert_message"):
+ st.info(st.session_state["alert_message"])
+
+# --- Display results (same structure as before, with download support) -----
+if st.session_state["results"]:
+ results = st.session_state["results"]
+ st.success("Research run finished โ see agent outputs below.")
+
+ # Retrieval Agent
+ with st.expander("Contextual Retriever Agent (click to expand)", expanded=True):
+ st.write(results["retrieval"]["summary"])
+ for doc in results["retrieval"]["docs"]:
+ st.markdown(f"**{doc['title']}** \n{doc['snippet']} \nSource: {doc['source']}")
+
+ # Critical Analysis Agent
+ with st.expander("Critical Analysis Agent"):
+ st.write(results["analysis"]["analysis"])
+ st.write("Contradictions:")
+ for c in results["analysis"]["contradictions"]:
+ st.warning(c)
+ st.write("Insights:")
+ for i in results["analysis"]["insights"]:
+ st.write(f"- {i}")
+
+ # Insight Generation Agent
+ with st.expander("Insight Generation Agent"):
+ for h in results["insights"]["hypotheses"]:
+ st.write(f"- {h} ")
+ st.write(f"Confidence: {results['insights']['confidence']:.2f}")
+
+ # Report Builder Agent (contains the actual download button)
+ with st.expander("Report Builder Agent (compiled report)"):
+ report_md = results["report"]["report_markdown"]
+ st.markdown(report_md)
+
+ # Provide final compiled markdown with timestamp
+ final_md = compile_report_md(results)
+
+ # The actual export/download is provided as a download_button here.
+ # Sidebar export only shows an alert that points the user here.
+ st.download_button(
+ "Download Report (Markdown)",
+ final_md,
+ file_name="research_report.md",
+ mime="text/markdown"
+ )
+
+# If no results, show a friendly placeholder and short instructions
+if not st.session_state["results"]:
+ st.write("---")
+ st.header("Ready to run research")
+ st.write(
+ "Enter your research query and tweak model/moderation options in the sidebar. "
+ "Click **Run Research** to start. Use **Clear Results** to reset the UI."
+ )
diff --git a/multi_agent_researcher/multi_agent_pipeline_full_with_memory.py b/multi_agent_researcher/multi_agent_pipeline_full_with_memory.py
new file mode 100644
index 0000000..9918032
--- /dev/null
+++ b/multi_agent_researcher/multi_agent_pipeline_full_with_memory.py
@@ -0,0 +1,465 @@
+"""
+Multi-Agent AI Pipeline with Memory & Structured Outputs
+-----------------------------------------------------------------
+Agents:
+1. Contextual Retriever
+2. Critical Analysis
+3. Insight Generation
+4. Report Builder
+
+Features:
+- Memory to store past runs
+- Structured JSON outputs
+- Prompt templates for consistency
+- Data flow management between nodes
+- Caching for LLM outputs
+"""
+
+import os, time, json, re, hashlib
+from typing import List, Dict, Any
+
+# Import Pydantic for state schema
+from pydantic import BaseModel, Field
+
+# LangGraph
+# Try to import LangGraph; provide a minimal fallback if unavailable
+try:
+ from langgraph.graph import StateGraph, START, END
+except Exception:
+ # Minimal fallback implementation so the rest of the script can run without langgraph installed.
+ class _FallbackStateGraph:
+ def __init__(self, state_schema=None): # Added state_schema argument
+ self.nodes = {}
+ self.edges = []
+ self.state_schema = state_schema # Store schema if provided
+
+ def add_node(self, name, func):
+ self.nodes[name] = func
+
+ def add_edge(self, a, b):
+ self.edges.append((a, b))
+
+ # Add __class_getitem__ to support type hinting syntax
+ def __class_getitem__(cls, item):
+ return cls
+
+ def compile(self): # Added a dummy compile method
+ # In a real fallback, this would just return self or a simple executor
+ print("Warning: Running with fallback StateGraph. Full LangGraph features unavailable.")
+ return self
+
+ def run(self, inputs, config=None, **kwargs): # Added a dummy run method
+ print("Warning: Running dummy run method.")
+ # In a real fallback, you would simulate the execution flow
+ # For this minimal fallback, we'll just return the initial state with a log
+ inputs.setdefault("logs", []).append("Dummy run executed with fallback StateGraph.")
+ return inputs
+
+ StateGraph = _FallbackStateGraph
+ START = "__START__"
+ END = "__END__"
+
+# LangChain / LLM and related imports with safe fallbacks when packages are missing
+try:
+ from langchain_openai import ChatOpenAI
+except Exception:
+ try:
+ # Newer langchain installs use chat_models
+ from langchain.chat_models import ChatOpenAI # type: ignore
+ except Exception:
+ # Minimal mock ChatOpenAI to allow offline runs / static analysis
+ class ChatOpenAI:
+ def __init__(self, model="gpt-4o", temperature=2.0):
+ self.model = model
+ self.temperature = temperature
+
+ def invoke(self, prompt: str):
+ # Return a simple object with a content attribute like the real client
+ class _R:
+ pass
+ r = _R()
+ # Heuristic simple mock outputs to satisfy downstream JSON parsing attempts
+ if "Critical Analysis Agent" in prompt or "Summarize the main findings" in prompt:
+ r.content = json.dumps({
+ "summary": "Mock summary (langchain not installed).",
+ "contradictions": [],
+ "source_evaluation": []
+ })
+ elif "Insight Generation Agent" in prompt or "Identify trends" in prompt:
+ r.content = json.dumps([{
+ "insight": "Mock insight",
+ "reasoning_chain": "Generated from mock LLM",
+ "confidence": "low",
+ "supporting_evidence": []
+ }])
+ elif "professional report writer" in prompt or "Compile the following" in prompt:
+ r.content = json.dumps({
+ "executive_summary": "Mock report generated because LangChain/OpenAI packages are not installed.",
+ "key_findings": [],
+ "contradictions": [],
+ "insights": [],
+ "recommendations": []
+ })
+ else:
+ r.content = prompt # fallback: echo
+ return r
+
+# Document class fallback (used in several places)
+try:
+ from langchain.schema import Document
+except Exception:
+ class Document:
+ def __init__(self, page_content: str, metadata: Dict[str, Any] = None):
+ self.page_content = page_content
+ self.metadata = metadata or {}
+
+# Vectorstore / embeddings / text splitter fallbacks
+try:
+ from langchain.vectorstores import LanceDB
+except Exception:
+ class LanceDB:
+ @staticmethod
+ def from_documents(docs, embeddings, connection=None, table_name=None):
+ # Very small wrapper that provides similarity_search by returning original docs
+ class _VS:
+ def __init__(self, docs):
+ self._docs = docs
+
+ def similarity_search(self, query, k=5):
+ return self._docs[:k]
+ return _VS(docs)
+
+try:
+ from langchain.embeddings.openai import OpenAIEmbeddings
+except Exception:
+ class OpenAIEmbeddings:
+ def __init__(self, *args, **kwargs):
+ pass
+
+ def embed_documents(self, docs: List[str]):
+ # return dummy vectors
+ return [[0.0] * 1 for _ in docs]
+
+try:
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
+except Exception:
+ class RecursiveCharacterTextSplitter:
+ def __init__(self, chunk_size=1000, chunk_overlap=150):
+ self.chunk_size = chunk_size
+ self.chunk_overlap = chunk_overlap
+
+ def split_documents(self, docs: List[Document]):
+ # naive splitter: return docs unchanged (safe fallback)
+ return docs
+
+# lancedb lightweight fallback
+try:
+ import lancedb
+except Exception:
+ class _FakeConn:
+ def __init__(self, path):
+ self._tables = {}
+
+ def table_names(self):
+ return list(self._tables.keys())
+
+ def drop_table(self, name):
+ if name in self._tables:
+ del self._tables[name]
+
+ class lancedb:
+ @staticmethod
+ def connect(path):
+ return _FakeConn(path)
+
+# requests fallback for environments without requests installed
+try:
+ import requests
+except Exception:
+ class _FakeResponse:
+ def __init__(self, data=None):
+ self._data = data or {}
+
+ def json(self):
+ return self._data
+
+ class requests:
+ @staticmethod
+ def get(*args, **kwargs):
+ return _FakeResponse({"articles": []})
+
+import pickle
+
+# ----------------------------
+# Shared Pipeline State
+# ----------------------------
+class PipelineState(BaseModel):
+ query: str = Field(default="")
+ sources: List[Dict[str, Any]] = Field(default_factory=list)
+ raw_docs: List[Document] = Field(default_factory=list)
+ chunks: List[Document] = Field(default_factory=list)
+ vectorstore: Any = Field(default=None)
+ critical_analysis: Dict[str, Any] = Field(default_factory=dict)
+ insights: List[Dict[str, Any]] = Field(default_factory=list)
+ report_text: str = Field(default="")
+ report_json: Dict[str, Any] = Field(default_factory=dict)
+ logs: List[str] = Field(default_factory=list)
+ memory: Dict[str, Any] = Field(default_factory=dict)
+
+def log(state: Dict[str, Any], msg: str):
+ state.setdefault("logs", []).append(f"{time.strftime('%H:%M:%S')} {msg}")
+
+# ----------------------------
+# Simple caching utility
+# ----------------------------
+def llm_cache(llm_func, prompt: str, cache_dir="./cache"):
+ os.makedirs(cache_dir, exist_ok=True)
+ key = hashlib.sha256(prompt.encode()).hexdigest()
+ cache_path = os.path.join(cache_dir, f"{key}.pkl")
+ if os.path.exists(cache_path):
+ return pickle.load(open(cache_path, "rb"))
+ res = llm_func(prompt)
+ pickle.dump(res, open(cache_path, "wb"))
+ return res
+
+# ===============================================================
+# 1๏ธโฃ Contextual Retriever
+# ===============================================================
+def retrieve_data(state: PipelineState) -> PipelineState:
+ # Create a mutable copy of the state
+ state_dict = state.model_dump()
+ log(state_dict, "Starting data retrieval...")
+ all_docs = []
+
+ for src in (state_dict.get("sources") or []):
+ if src["type"] == "research":
+ try:
+ from langchain.document_loaders import ArxivLoader
+ loader = ArxivLoader(search_query=src["query"], max_results=3)
+ all_docs.extend(loader.load())
+ log(state_dict, f"Fetched {len(all_docs)} research docs for '{src['query']}'")
+ except Exception as e:
+ log(state_dict, f"[ERROR] ArxivLoader: {e}")
+ elif src["type"] == "news":
+ NEWS_KEY = os.getenv("NEWS_API_KEY")
+ if not NEWS_KEY:
+ log(state_dict, "NEWS_API_KEY missing; skipping news source.")
+ continue
+ try:
+ r = requests.get("https://newsapi.org/v2/everything",
+ params={"q": src["query"], "pageSize": 5, "apiKey": NEWS_KEY})
+ for art in r.json().get("articles", []):
+ text = f"{art['title']} - {art['description']} - {art['content']}"
+ all_docs.append(Document(page_content=text, metadata={"source": "news", "url": art["url"]}))
+ log(state_dict, f"Fetched {len(all_docs)} news docs for '{src['query']}'")
+ except Exception as e:
+ log(state_dict, f"[ERROR] News fetch failed: {e}")
+
+ if not all_docs:
+ log(state_dict, "No documents retrieved.")
+ # Return the updated state dictionary
+ return PipelineState(**state_dict)
+
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
+ chunks = splitter.split_documents(all_docs)
+ state_dict["chunks"] = chunks
+
+ # LanceDB vector store
+ os.makedirs("./vector_db", exist_ok=True)
+ conn = lancedb.connect("./vector_db")
+ if "retriever_data" in conn.table_names():
+ conn.drop_table("retriever_data")
+ vs = LanceDB.from_documents(chunks, OpenAIEmbeddings(), connection=conn, table_name="retriever_data")
+ state_dict["vectorstore"] = vs
+ log(state_dict, "Contextual retrieval complete.")
+ # Return the updated state dictionary
+ return PipelineState(**state_dict)
+
+# ===============================================================
+# 2๏ธโฃ Critical Analysis Agent
+# ===============================================================
+CRIT_ANALYSIS_PROMPT = """
+You are a Critical Analysis Agent.
+Summarize the main findings, highlight contradictions, and validate sources.
+
+Context:
+{context}
+
+Return structured JSON:
+{{
+ "summary": "...",
+ "contradictions": [...],
+ "source_evaluation": [...]
+}}
+"""
+
+def critical_analysis(state: PipelineState) -> PipelineState:
+ # Create a mutable copy of the state
+ state_dict = state.model_dump()
+ vs = state_dict.get("vectorstore")
+ query = state_dict.get("query", "")
+ log(state_dict, "Starting critical analysis...")
+
+ if not vs:
+ log(state_dict, "Vectorstore missing; skipping critical analysis.")
+ # Return the updated state dictionary
+ return PipelineState(**state_dict)
+
+ docs = vs.similarity_search(query, k=5)
+ context_text = "\n\n".join([d.page_content for d in docs])
+ prompt = CRIT_ANALYSIS_PROMPT.format(context=context_text)
+
+ llm = ChatOpenAI(model="gpt-4o", temperature=0.2)
+ try:
+ result_text = llm_cache(lambda p: getattr(llm.invoke(p), "content", str(llm.invoke(p))), prompt)
+ state_dict["critical_analysis"] = json.loads(result_text)
+ log(state_dict, "Critical analysis complete.")
+ except Exception as e:
+ log(state_dict, f"[ERROR] Critical analysis failed: {e}")
+ state_dict["critical_analysis"] = {}
+
+ # Return the updated state dictionary
+ return PipelineState(**state_dict)
+
+# ===============================================================
+# 3๏ธโฃ Insight Generation Agent
+# ===============================================================
+INSIGHT_PROMPT = """
+You are an Insight Generation Agent.
+Analyze the critical analysis and identify trends, hypotheses, or implications.
+
+Critical Analysis JSON:
+{crit_analysis}
+
+Return a list of insights in JSON:
+[
+ {{
+ "insight": "...",
+ "reasoning_chain": "...",
+ "confidence": "low | medium | high",
+ "supporting_evidence": ["...", "..."]
+ }}
+]
+"""
+
+def generate_insights(state: PipelineState) -> PipelineState:
+ # Create a mutable copy of the state
+ state_dict = state.model_dump()
+ crit_analysis = state_dict.get("critical_analysis") or {}
+ if not crit_analysis:
+ log(state_dict, "No critical analysis data; skipping insight generation.")
+ # Return the updated state dictionary
+ return PipelineState(**state_dict)
+
+ prompt = INSIGHT_PROMPT.format(crit_analysis=json.dumps(crit_analysis, indent=2))
+ llm = ChatOpenAI(model="gpt-4o", temperature=0.3)
+ try:
+ result_text = llm_cache(lambda p: getattr(llm.invoke(p), "content", str(llm.invoke(p))), prompt)
+ match = re.search(r"\[.*\]", result_text, re.S)
+ insights = json.loads(match.group(0)) if match else []
+ state_dict["insights"] = insights
+ log(state_dict, f"Generated {len(insights)} insights.")
+ except Exception as e:
+ log(state_dict, f"[ERROR] Insight generation failed: {e}")
+ state_dict["insights"] = []
+
+ # Store in memory
+ state_dict.setdefault("memory", {})["last_insights"] = state_dict["insights"]
+ # Return the updated state dictionary
+ return PipelineState(**state_dict)
+
+# ===============================================================
+# 4๏ธโฃ Report Builder Agent
+# ===============================================================
+REPORT_PROMPT = """
+You are a professional report writer.
+Compile the following into a structured report with sections:
+- Executive Summary
+- Key Findings
+- Contradictions
+- Insights
+- Recommendations
+
+Critical Analysis JSON:
+{crit_analysis}
+
+Insights JSON:
+{insights}
+
+Return both TEXT and JSON.
+"""
+
+def build_report(state: PipelineState) -> PipelineState:
+ # Create a mutable copy of the state
+ state_dict = state.model_dump()
+ crit_analysis = state_dict.get("critical_analysis") or {}
+ insights = state_dict.get("insights") or []
+
+ prompt = REPORT_PROMPT.format(
+ crit_analysis=json.dumps(crit_analysis, indent=2),
+ insights=json.dumps(insights, indent=2)
+ )
+
+ llm = ChatOpenAI(model="gpt-4o", temperature=0.2)
+ try:
+ result_text = llm_cache(lambda p: getattr(llm.invoke(p), "content", str(llm.invoke(p))), prompt)
+ match = re.search(r"\{.*\}", result_text, re.S)
+ report_json = json.loads(match.group(0)) if match else {}
+ state_dict["report_text"] = result_text
+ state_dict["report_json"] = report_json
+ state_dict.setdefault("memory", {})["last_report"] = state_dict["report_json"]
+ log(state_dict, "Report built successfully.")
+ except Exception as e:
+ log(state_dict, f"[ERROR] Report building failed: {e}")
+ state_dict["report_text"] = str(e)
+ state_dict["report_json"] = {}
+
+ # Return the updated state dictionary
+ return PipelineState(**state_dict)
+
+# ===============================================================
+# LangGraph Pipeline Setup
+# ===============================================================
+def build_pipeline():
+ graph = StateGraph(PipelineState) # Pass the Pydantic model class directly
+ graph.add_node("retrieve_data", retrieve_data)
+ graph.add_node("critical_analysis", critical_analysis)
+ graph.add_node("generate_insights", generate_insights)
+ graph.add_node("report_builder", build_report)
+
+ graph.add_edge(START, "retrieve_data")
+ graph.add_edge("retrieve_data", "critical_analysis")
+ graph.add_edge("critical_analysis", "generate_insights")
+ graph.add_edge("generate_insights", "report_builder")
+ graph.add_edge("report_builder", END)
+
+ return graph
+
+# ===============================================================
+# Example Run
+# ===============================================================
+if __name__ == "__main__":
+ pipeline = build_pipeline().compile()
+
+ state: PipelineState = PipelineState( # Instantiate the Pydantic model
+ query="Latest developments and ethical concerns in AI governance",
+ sources=[
+ {"type": "research", "query": "AI ethics and governance"},
+ {"type": "news", "query": "AI regulation policy updates"}
+ ]
+ )
+
+ result = pipeline.invoke(state)
+
+ print("\n=== CRITICAL ANALYSIS ===\n", result.get("critical_analysis", "N/A"))
+ print("\n=== INSIGHTS ===")
+ for i, insight in enumerate(result.get("insights", []), 1):
+ print(f"\nInsight {i}: {insight.get('insight')}")
+ print(f"Reasoning: {insight.get('reasoning_chain')}")
+ print(f"Confidence: {insight.get('confidence')}")
+ print("\n=== REPORT TEXT ===\n", result.get("report_text"))
+ print("\n=== REPORT JSON ===\n", json.dumps(result.get("report_json", {}), indent=2))
+ print("\n=== LOGS ===\n")
+ for l in result.get("logs", []):
+ print(l)
+
diff --git a/multi_agent_researcher/rag.py b/multi_agent_researcher/rag.py
new file mode 100644
index 0000000..800ec92
--- /dev/null
+++ b/multi_agent_researcher/rag.py
@@ -0,0 +1,35 @@
+"""
+rag.py
+------
+Simple orchestrator wrapper for UI. For phase 1 it calls the local agent stubs.
+Later, replace the internals of `run_research` with real LangChain / LangGraph orchestration.
+
+API contract:
+- run_research(query, num_agents, retrieval_depth, model, temperature)
+ returns a dict with keys: 'retrieval', 'analysis', 'insights', 'report'
+"""
+from agents_stub import fake_retriever, fake_critical_analysis, fake_insight_generation, fake_report_builder
+
+def run_research(query: str, num_agents: int = 4, retrieval_depth: int = 3, model: str = "local-mock", temperature: float = 0.2):
+ """
+ Execute the current (stubbed) agent pipeline and return the structured results dict.
+ Important: maintain keys exactly as shown so the UI remains compatible.
+ """
+ # 1. Retriever Agent
+ retrieval = fake_retriever(query, depth=retrieval_depth)
+
+ # 2. Critical Analysis
+ analysis = fake_critical_analysis(retrieval)
+
+ # 3. Insight Generation
+ insights = fake_insight_generation(analysis)
+
+ # 4. Report Builder
+ report = fake_report_builder(retrieval, analysis, insights)
+
+ return {
+ "retrieval": retrieval,
+ "analysis": analysis,
+ "insights": insights,
+ "report": report
+ }
diff --git a/multi_agent_researcher/requirements.txt b/multi_agent_researcher/requirements.txt
new file mode 100644
index 0000000..101f04c
--- /dev/null
+++ b/multi_agent_researcher/requirements.txt
@@ -0,0 +1,6 @@
+streamlit>=1.20
+streamlit-lottie>=0.0.3
+pydantic>=1.10
+markdown2>=2.4.0
+requests>=2.28
+python-dotenv
diff --git a/multi_agent_researcher/utils.py b/multi_agent_researcher/utils.py
new file mode 100644
index 0000000..b06bad0
--- /dev/null
+++ b/multi_agent_researcher/utils.py
@@ -0,0 +1,23 @@
+"""
+utils.py
+--------
+Small utility helpers for the UI: timestamps and markdown assembly.
+
+Why:
+- Centralizes small formatting helpers so the UI code remains compact.
+"""
+
+import datetime
+
+def timestamp():
+ """Return a local timestamp string for report footers."""
+ return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+def compile_report_md(report_dict):
+ """
+ Accepts the dict returned by rag.run_research and returns a final markdown string
+ with a generated timestamp footer.
+ """
+ md = report_dict["report"]["report_markdown"]
+ md += f"\n\n---\nGenerated: {timestamp()}\n"
+ return md
diff --git a/neelay_mehta/README.md b/neelay_mehta/README.md
deleted file mode 100644
index 614a1ef..0000000
--- a/neelay_mehta/README.md
+++ /dev/null
@@ -1,148 +0,0 @@
-# neelay_mehta
-
-
----
-
-# ๐ง AI Text Summarizer using Gradio
-
-This project is a **web-based text summarization app** built with **Gradio** and **Hugging Face Transformers**.
-It uses the **Falconsai/text_summarization** model to generate concise summaries of long text passages,
-and includes **dark/light theme toggle** and **summary export functionality**.
-
----
-
-## ๐ Features
-
-* ๐งฉ **Automatic Summarization** โ powered by the pre-trained `facebook/bart-large-cnn` model
-* ๐ **Theme Toggle** โ switch between *Light (gradio/soft)* and *Dark (gradio/dracula)* modes
-* ๐ **Export Option** โ download the generated summary as a `.txt` file
-* โก **Interactive UI** โ built using Gradioโs `Blocks()` API for flexibility and a modern layout
-* ๐ชถ **Clean Design** โ simple Markdown titles, button-based flow, and responsive layout
-
----
-
-## ๐งฐ Requirements
-
-Install the required Python libraries:
-
-```bash
-pip install gradio transformers torch
-```
-
----
-
-## ๐งพ Usage
-
-### โถ๏ธ Run the App
-
-Save the code in a file named `app.py` and run:
-
-```bash
-python app.py
-```
-
-The app will launch locally (by default on `http://127.0.0.1:7860`).
-
----
-
-## ๐ป Code Overview
-
-### 1. **Model Loading**
-
-```python
-summarizer = pipeline("summarization", model="Falconsai/text_summarization")
-```
-
-Uses the **Hugging Face Transformers** pipeline to load the BART model for summarization.
-
----
-
-### 2. **Core Functions**
-
-#### `summarize_text(text)`
-
-* Takes a long paragraph as input
-* Returns a concise summary (120 words max, 30 words min)
-
-#### `export_summary(summary)`
-
-* Saves the generated summary as `summary.txt`
-* Returns a downloadable file path for Gradio
-
----
-
-### 3. **UI with Gradio Blocks**
-
-The app is structured with `gr.Blocks()` for layout flexibility:
-
-* Text input box for the paragraph
-* โSummarizeโ button to trigger the model
-* Output box for the summary
-* โExport Summaryโ button for download
-* Theme toggle (Light/Dark)
-
-```python
-with gr.Blocks(theme=theme) as demo:
- gr.Markdown("## ๐ง AI Text Summarizer")
- ...
-```
-
----
-
-### 4. **Theme Toggle**
-
-The app provides a simple **Light โ Dark** theme switcher:
-
-```python
-theme_toggle = gr.Radio(
- ["gradio/soft", "gradio/dracula"],
- value=theme,
- label="๐ Theme"
-)
-```
-
----
-
-## ๐งฉ App Flow
-
-1. User enters a long text.
-2. Clicks **โSummarizeโ** โ The BART model generates a summary.
-3. Clicks **โExport Summaryโ** โ Downloads the summary as a `.txt` file.
-4. Toggles between **Light/Dark mode** anytime.
-
----
-
-## ๐ผ๏ธ Example
-
-**Input:**
-
-> The industrial revolution began in the 18th century, transforming manufacturing processes and leading to rapid urbanization...
-
-**Output:**
-
-> The 18th-century industrial revolution revolutionized production, driving technological growth and urbanization.
-
----
-
-## ๐จ Themes Used
-
-| Theme | Description |
-| ---------------- | ---------------------------------------- |
-| `gradio/soft` | Default light theme with soft colors |
-| `gradio/dracula` | Dark theme for comfortable nighttime use |
-
----
-
-
-
-
-
----
-
-## ๐จโ๐ป Author
-
-**Neelay Mehta**
-
----
-
-
diff --git a/neelay_mehta/Summarization.ipynb b/neelay_mehta/Summarization.ipynb
deleted file mode 100644
index 7634d1c..0000000
--- a/neelay_mehta/Summarization.ipynb
+++ /dev/null
@@ -1,3340 +0,0 @@
-{
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
- "colab": {
- "provenance": [],
- "gpuType": "T4"
- },
- "kernelspec": {
- "name": "python3",
- "display_name": "Python 3"
- },
- "language_info": {
- "name": "python"
- },
- "accelerator": "GPU",
- "widgets": {
- "application/vnd.jupyter.widget-state+json": {
- "bb25e5cb4e0944f5969b05765403e21b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_6fbc59c811ad4e66957fd3db5180b38c",
- "IPY_MODEL_70ad3e588d974fbeb790aec5abda7f79",
- "IPY_MODEL_8fea7d9429ba4f54bc53594e1862b89f"
- ],
- "layout": "IPY_MODEL_da5406c843b54a128d1cd65d0116310e"
- }
- },
- "6fbc59c811ad4e66957fd3db5180b38c": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_203f31cf430a4d09b81d8e0d35b57b06",
- "placeholder": "โ",
- "style": "IPY_MODEL_9662fb8e73f14f40b7dd293e90e74501",
- "value": "config.json:โ"
- }
- },
- "70ad3e588d974fbeb790aec5abda7f79": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_5a3228900e2440bb8c1535fe06ee5852",
- "max": 1,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_37fbca87288c4a699e2e9e481e0f1c6c",
- "value": 1
- }
- },
- "8fea7d9429ba4f54bc53594e1862b89f": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_ea5000d2ef95461986e49e76cf5a32ce",
- "placeholder": "โ",
- "style": "IPY_MODEL_0e4f4897922245f492109a860a000dc5",
- "value": "โ1.49k/?โ[00:00<00:00,โ128kB/s]"
- }
- },
- "da5406c843b54a128d1cd65d0116310e": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "203f31cf430a4d09b81d8e0d35b57b06": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "9662fb8e73f14f40b7dd293e90e74501": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "5a3228900e2440bb8c1535fe06ee5852": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": "20px"
- }
- },
- "37fbca87288c4a699e2e9e481e0f1c6c": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "ea5000d2ef95461986e49e76cf5a32ce": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "0e4f4897922245f492109a860a000dc5": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "9c7052b0b3b046389250207edea6cbf8": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_f1e2f7a004f447cf80275eefa58a9a79",
- "IPY_MODEL_11d3f537ada7430ab5ef9916e43643ac",
- "IPY_MODEL_c938c1a56cf44e6095534d45d09b2828"
- ],
- "layout": "IPY_MODEL_86314e72b9b64321b998f6ff96310ee3"
- }
- },
- "f1e2f7a004f447cf80275eefa58a9a79": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_bbbf411f48a44d4bad1319af67dcc1f0",
- "placeholder": "โ",
- "style": "IPY_MODEL_8273e737cf49418d833c0b53c6961b20",
- "value": "model.safetensors:โ100%"
- }
- },
- "11d3f537ada7430ab5ef9916e43643ac": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_0227bd065f21424c9ab522fe6d31f091",
- "max": 242042016,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_0b72ae6ebc4542c6b8c7142781a96c6a",
- "value": 242042016
- }
- },
- "c938c1a56cf44e6095534d45d09b2828": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_1b833c2224a14229869235832218e44d",
- "placeholder": "โ",
- "style": "IPY_MODEL_7ac59db8113542c09c923369ecea085b",
- "value": "โ242M/242Mโ[00:02<00:00,โ113MB/s]"
- }
- },
- "86314e72b9b64321b998f6ff96310ee3": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "bbbf411f48a44d4bad1319af67dcc1f0": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "8273e737cf49418d833c0b53c6961b20": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "0227bd065f21424c9ab522fe6d31f091": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "0b72ae6ebc4542c6b8c7142781a96c6a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "1b833c2224a14229869235832218e44d": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "7ac59db8113542c09c923369ecea085b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "b4b27dca25594fc5a6df54ac25fce2bf": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_71cc749f55724184a9a41e6bce17ee38",
- "IPY_MODEL_8bc2248bb8814998afc82edcbe430a90",
- "IPY_MODEL_2c8e1344f5e940cd86251b82d7ee2388"
- ],
- "layout": "IPY_MODEL_95398cc71afb420280282d3845731fcb"
- }
- },
- "71cc749f55724184a9a41e6bce17ee38": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_1d9fa660b6d14aa192d9ae6a594085d4",
- "placeholder": "โ",
- "style": "IPY_MODEL_832e575a69ff458cba913d71efbabd04",
- "value": "generation_config.json:โ100%"
- }
- },
- "8bc2248bb8814998afc82edcbe430a90": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_8dde1474edcc4de9a1f38855f45e5fd4",
- "max": 112,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_d3c5854ffa5a4fe2a1591de7a62d6525",
- "value": 112
- }
- },
- "2c8e1344f5e940cd86251b82d7ee2388": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_de95da44212146aabaf822cffa5d96a3",
- "placeholder": "โ",
- "style": "IPY_MODEL_a1c7f40c86d94406a4ba9463eccf03f6",
- "value": "โ112/112โ[00:00<00:00,โ10.9kB/s]"
- }
- },
- "95398cc71afb420280282d3845731fcb": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "1d9fa660b6d14aa192d9ae6a594085d4": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "832e575a69ff458cba913d71efbabd04": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "8dde1474edcc4de9a1f38855f45e5fd4": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "d3c5854ffa5a4fe2a1591de7a62d6525": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "de95da44212146aabaf822cffa5d96a3": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "a1c7f40c86d94406a4ba9463eccf03f6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "b02ac51fa6c74911aa832bd620ed1025": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_015fcd405e994aa9ae8ddede3d6b9d62",
- "IPY_MODEL_b545e8b75685473cbb4442be6016560a",
- "IPY_MODEL_363d84ca483842c78b5d1e07fa8ebbac"
- ],
- "layout": "IPY_MODEL_6d1ced832b7344bca4654aa09775d5b7"
- }
- },
- "015fcd405e994aa9ae8ddede3d6b9d62": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_ceaa42c8d7464a97b22b3365b432237a",
- "placeholder": "โ",
- "style": "IPY_MODEL_6e51da07cb514f96b4f57301e72e4ba0",
- "value": "tokenizer_config.json:โ"
- }
- },
- "b545e8b75685473cbb4442be6016560a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_8044439995934d52b8772b79944314d4",
- "max": 1,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_266941a0a1c94e058a030f8ee0ed22d3",
- "value": 1
- }
- },
- "363d84ca483842c78b5d1e07fa8ebbac": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_eec4a17b44c340ea9690bed9fefa59fa",
- "placeholder": "โ",
- "style": "IPY_MODEL_01fa92bc197443b7b606312a1207119d",
- "value": "โ2.32k/?โ[00:00<00:00,โ229kB/s]"
- }
- },
- "6d1ced832b7344bca4654aa09775d5b7": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "ceaa42c8d7464a97b22b3365b432237a": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "6e51da07cb514f96b4f57301e72e4ba0": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "8044439995934d52b8772b79944314d4": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": "20px"
- }
- },
- "266941a0a1c94e058a030f8ee0ed22d3": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "eec4a17b44c340ea9690bed9fefa59fa": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "01fa92bc197443b7b606312a1207119d": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "d2f7976f5f834fdf83d8fd56d6539173": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_abe6fd34c502431090df0ee08989a93b",
- "IPY_MODEL_eadef65a2dc34215b7d9e3704cc98bee",
- "IPY_MODEL_2679d888109c4626a9457b23e24712d7"
- ],
- "layout": "IPY_MODEL_06f564ffaf444c6ea728aa51a2d96b2f"
- }
- },
- "abe6fd34c502431090df0ee08989a93b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_b57f9bd772374a35830677b94b0fbe13",
- "placeholder": "โ",
- "style": "IPY_MODEL_b4cd20383a104685af03baac29d362df",
- "value": "spiece.model:โ100%"
- }
- },
- "eadef65a2dc34215b7d9e3704cc98bee": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_2f31209ac8c641a2ab7d4d5fcfab10c3",
- "max": 791656,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_6fe2b79c1588467ab4320c1664f69070",
- "value": 791656
- }
- },
- "2679d888109c4626a9457b23e24712d7": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_78166add75a540b1b5f86133d5a1f383",
- "placeholder": "โ",
- "style": "IPY_MODEL_df698733c2c040818964f482a611d2e4",
- "value": "โ792k/792kโ[00:00<00:00,โ1.91MB/s]"
- }
- },
- "06f564ffaf444c6ea728aa51a2d96b2f": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "b57f9bd772374a35830677b94b0fbe13": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "b4cd20383a104685af03baac29d362df": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "2f31209ac8c641a2ab7d4d5fcfab10c3": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "6fe2b79c1588467ab4320c1664f69070": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "78166add75a540b1b5f86133d5a1f383": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "df698733c2c040818964f482a611d2e4": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "f9c2882be74e4b43b3dd751150fa16e0": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_56b4cf80bfd34055b4ee7a38f38eeec2",
- "IPY_MODEL_ffc3402d7d4a4c23bd34d0af2768728a",
- "IPY_MODEL_4ec6374fc8294c96ba89514e2f97dc03"
- ],
- "layout": "IPY_MODEL_612f0b2bf7414deaadfe4b5a33930e54"
- }
- },
- "56b4cf80bfd34055b4ee7a38f38eeec2": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_8d47cffffc3f4ae7ac2af50592d37cb4",
- "placeholder": "โ",
- "style": "IPY_MODEL_5787b33c823b4423b3e6350738b8b4af",
- "value": "tokenizer.json:โ"
- }
- },
- "ffc3402d7d4a4c23bd34d0af2768728a": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_469de03d0290463f9a3021e8902942ae",
- "max": 1,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_8ac4ab4f21554a499245662f13f7cd0b",
- "value": 1
- }
- },
- "4ec6374fc8294c96ba89514e2f97dc03": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_a9478851d28a4a6880b2a278f104edb1",
- "placeholder": "โ",
- "style": "IPY_MODEL_6a2c954f31b84440b212c1cc411589bf",
- "value": "โ2.42M/?โ[00:00<00:00,โ30.4MB/s]"
- }
- },
- "612f0b2bf7414deaadfe4b5a33930e54": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "8d47cffffc3f4ae7ac2af50592d37cb4": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "5787b33c823b4423b3e6350738b8b4af": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "469de03d0290463f9a3021e8902942ae": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": "20px"
- }
- },
- "8ac4ab4f21554a499245662f13f7cd0b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "a9478851d28a4a6880b2a278f104edb1": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "6a2c954f31b84440b212c1cc411589bf": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "51bebd97237640e3b3387637e324d4e3": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_c78c8dd34f854f90b71af1b99dbedd11",
- "IPY_MODEL_0fd6071457324a39834257e092d8618e",
- "IPY_MODEL_018aa58d48a54287a4a565a3fc8a343b"
- ],
- "layout": "IPY_MODEL_50a2e10ce7104a5aa38ffdc278a4783f"
- }
- },
- "c78c8dd34f854f90b71af1b99dbedd11": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_c0dffb6d1e9e45719229a0f0e628d372",
- "placeholder": "โ",
- "style": "IPY_MODEL_736ae14087ec4c54b4589310fdbe0230",
- "value": "special_tokens_map.json:โ"
- }
- },
- "0fd6071457324a39834257e092d8618e": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_a1271e0e611d448994282a85a8aea0cb",
- "max": 1,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_e7ce38ccb1e845d49e66640c7cd63303",
- "value": 1
- }
- },
- "018aa58d48a54287a4a565a3fc8a343b": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_858d8a88f11747de94df4a70a6f3931c",
- "placeholder": "โ",
- "style": "IPY_MODEL_ee1ae5843b2441dca353ac094c0c1895",
- "value": "โ2.20k/?โ[00:00<00:00,โ190kB/s]"
- }
- },
- "50a2e10ce7104a5aa38ffdc278a4783f": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "c0dffb6d1e9e45719229a0f0e628d372": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "736ae14087ec4c54b4589310fdbe0230": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "a1271e0e611d448994282a85a8aea0cb": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": "20px"
- }
- },
- "e7ce38ccb1e845d49e66640c7cd63303": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "858d8a88f11747de94df4a70a6f3931c": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "ee1ae5843b2441dca353ac094c0c1895": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "913a35e4aee044978a28e36b04805ad6": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HBoxModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HBoxModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HBoxView",
- "box_style": "",
- "children": [
- "IPY_MODEL_35c5449bd31a467f9ba1aa2cebeeeacd",
- "IPY_MODEL_1613b86548154c8fbec768399af76492",
- "IPY_MODEL_12e512c7df644f4b984a5bc8a0c40255"
- ],
- "layout": "IPY_MODEL_40f3106bb1e64b31a6598572cce7ed32"
- }
- },
- "35c5449bd31a467f9ba1aa2cebeeeacd": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_1f8ae1f5be024998ac713f759400b774",
- "placeholder": "โ",
- "style": "IPY_MODEL_8e1a1832c4af41cc9586605b5a2553fe",
- "value": "theme_schema%400.0.3.json:โ"
- }
- },
- "1613b86548154c8fbec768399af76492": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "FloatProgressModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "FloatProgressModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "ProgressView",
- "bar_style": "success",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_53c6f52894fc4029b5c03cb947d97944",
- "max": 1,
- "min": 0,
- "orientation": "horizontal",
- "style": "IPY_MODEL_e0b74c8f94cc4c53984edca89f912c54",
- "value": 1
- }
- },
- "12e512c7df644f4b984a5bc8a0c40255": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "HTMLModel",
- "model_module_version": "1.5.0",
- "state": {
- "_dom_classes": [],
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "HTMLModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/controls",
- "_view_module_version": "1.5.0",
- "_view_name": "HTMLView",
- "description": "",
- "description_tooltip": null,
- "layout": "IPY_MODEL_9a8ae19f01b74841b5e74b4b063932b5",
- "placeholder": "โ",
- "style": "IPY_MODEL_1121693c70684afd83f1e64540efcd62",
- "value": "โ13.1k/?โ[00:00<00:00,โ715kB/s]"
- }
- },
- "40f3106bb1e64b31a6598572cce7ed32": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "1f8ae1f5be024998ac713f759400b774": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "8e1a1832c4af41cc9586605b5a2553fe": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- },
- "53c6f52894fc4029b5c03cb947d97944": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": "20px"
- }
- },
- "e0b74c8f94cc4c53984edca89f912c54": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "ProgressStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "ProgressStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "bar_color": null,
- "description_width": ""
- }
- },
- "9a8ae19f01b74841b5e74b4b063932b5": {
- "model_module": "@jupyter-widgets/base",
- "model_name": "LayoutModel",
- "model_module_version": "1.2.0",
- "state": {
- "_model_module": "@jupyter-widgets/base",
- "_model_module_version": "1.2.0",
- "_model_name": "LayoutModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "LayoutView",
- "align_content": null,
- "align_items": null,
- "align_self": null,
- "border": null,
- "bottom": null,
- "display": null,
- "flex": null,
- "flex_flow": null,
- "grid_area": null,
- "grid_auto_columns": null,
- "grid_auto_flow": null,
- "grid_auto_rows": null,
- "grid_column": null,
- "grid_gap": null,
- "grid_row": null,
- "grid_template_areas": null,
- "grid_template_columns": null,
- "grid_template_rows": null,
- "height": null,
- "justify_content": null,
- "justify_items": null,
- "left": null,
- "margin": null,
- "max_height": null,
- "max_width": null,
- "min_height": null,
- "min_width": null,
- "object_fit": null,
- "object_position": null,
- "order": null,
- "overflow": null,
- "overflow_x": null,
- "overflow_y": null,
- "padding": null,
- "right": null,
- "top": null,
- "visibility": null,
- "width": null
- }
- },
- "1121693c70684afd83f1e64540efcd62": {
- "model_module": "@jupyter-widgets/controls",
- "model_name": "DescriptionStyleModel",
- "model_module_version": "1.5.0",
- "state": {
- "_model_module": "@jupyter-widgets/controls",
- "_model_module_version": "1.5.0",
- "_model_name": "DescriptionStyleModel",
- "_view_count": null,
- "_view_module": "@jupyter-widgets/base",
- "_view_module_version": "1.2.0",
- "_view_name": "StyleView",
- "description_width": ""
- }
- }
- }
- }
- },
- "cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "UB6_9rcKfuWP"
- },
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "markdown",
- "source": [
- "GRADIO SYNTAX"
- ],
- "metadata": {
- "id": "qBdN3hkkfH6R"
- }
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "wR4y7UudewDw",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 612
- },
- "outputId": "2ec8b795-5b10-462c-f91b-14943e6712e1"
- },
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
- "* Running on public URL: https://35485350369c1bf6d6.gradio.live\n",
- "\n",
- "This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- ""
- ],
- "text/html": [
- ""
- ]
- },
- "metadata": {}
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": []
- },
- "metadata": {},
- "execution_count": 6
- }
- ],
- "source": [
- "import gradio as gr\n",
- "\n",
- "def greet(name):\n",
- " return \"Hello, \" + name + \"!\"\n",
- "\n",
- "demo = gr.Interface(fn=greet, inputs=\"text\", outputs=\"text\")\n",
- "\n",
- "# To create a shareable link (valid for 72 hours)\n",
- "demo.launch(share=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "source": [
- "HUGGING FACE TOKEN LOGISTICS"
- ],
- "metadata": {
- "id": "53iZXyE0fFTI"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "!pip install huggingface_hub\n",
- "\n",
- "from huggingface_hub import whoami\n",
- "from google.colab import userdata\n",
- "\n",
- "# Get your Hugging Face token from Colab Secrets\n",
- "hf_token = userdata.get('HF')\n",
- "\n",
- "# Verify the token by checking your identity\n",
- "try:\n",
- " user_info = whoami(token=hf_token)\n",
- " print(f\"Logged in as: {user_info['name']}\")\n",
- "except Exception as e:\n",
- " print(f\"Could not log in: {e}\")\n",
- " print(\"Please make sure you have added your Hugging Face token to Colab Secrets with the name 'HF_TOKEN'\")"
- ],
- "metadata": {
- "id": "BvuK-FRUe4sz",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "6002455d-cdb5-4b0a-abd2-ddd563c54848"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.12/dist-packages (0.35.3)\n",
- "Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (3.20.0)\n",
- "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (2025.3.0)\n",
- "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (25.0)\n",
- "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (6.0.3)\n",
- "Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (2.32.4)\n",
- "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (4.67.1)\n",
- "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (4.15.0)\n",
- "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface_hub) (1.1.10)\n",
- "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface_hub) (3.4.4)\n",
- "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface_hub) (3.11)\n",
- "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface_hub) (2.5.0)\n",
- "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->huggingface_hub) (2025.10.5)\n",
- "Logged in as: neelaym\n"
- ]
- }
- ]
- },
- {
- "cell_type": "markdown",
- "source": [
- "IF YOU WISH TO LOAD SOME DATASET TO TEST ANYTHING"
- ],
- "metadata": {
- "id": "SJ7RyQEXfKqf"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "from datasets import load_dataset\n",
- "\n",
- "# Load a dataset (e.g., the SQuAD dataset for question answering)\n",
- "dataset = load_dataset(\"squad\")\n",
- "\n",
- "# Print information about the dataset\n",
- "print(dataset)\n",
- "\n",
- "# Access an example from the training set\n",
- "print(\"\\nExample from the training set:\")\n",
- "print(dataset[\"train\"][0])"
- ],
- "metadata": {
- "id": "kKofrkg0fCEM"
- },
- "execution_count": null,
- "outputs": []
- },
- {
- "cell_type": "markdown",
- "source": [
- "SAMPLE SUMMARISATION CODE"
- ],
- "metadata": {
- "id": "ixTksrfyfNwe"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "from transformers import pipeline\n",
- "import gradio as gr\n",
- "# Load the summarization pipeline\n",
- "\n",
- "\n",
- "summarizer = pipeline(\"summarization\", model=\"Falconsai/text_summarization\")\n",
- "\n",
- "\n",
- "# Text to summarize\n",
- "text = \"\"\"\n",
- "Hugging Face is a company and open-source platform that provides tools and models for natural language processing (NLP). It has become a central hub for the ML community, offering a wide range of pre-trained models that can be easily used or fine-tuned for specific applications. Key aspects of Hugging Face include the Transformers library, Model Hub, Datasets library, and Tokenizers library. Hugging Face democratizes access to powerful ML models, making it easier for developers and researchers to build and deploy applications.\n",
- "\"\"\"\n",
- "\n",
- "# Summarize the text\n",
- "summary = summarizer(text, max_length=50, min_length=25, do_sample=False)\n",
- "\n",
- "def summarize_text(text):\n",
- " summary = summarizer(text, max_length=50, min_length=25, do_sample=False)\n",
- " return summary[0]['summary_text']\n",
- "\n",
- "\n",
- "\n",
- "demo = gr.Interface(fn=summarize_text, inputs=\"text\", outputs=\"text\", theme=\"gradio/dracula\")\n",
- "print(\"Original Text:\")\n",
- "print(text)\n",
- "print(\"\\nSummary:\")\n",
- "print(summary[0]['summary_text'])"
- ],
- "metadata": {
- "id": "x6UgM-Rse5dw",
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "outputId": "1793c0a6-4f92-4370-be82-dee9bf9329cb"
- },
- "execution_count": null,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Device set to use cuda:0\n",
- "Both `max_new_tokens` (=256) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)\n"
- ]
- },
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "Original Text:\n",
- "\n",
- "Hugging Face is a company and open-source platform that provides tools and models for natural language processing (NLP). It has become a central hub for the ML community, offering a wide range of pre-trained models that can be easily used or fine-tuned for specific applications. Key aspects of Hugging Face include the Transformers library, Model Hub, Datasets library, and Tokenizers library. Hugging Face democratizes access to powerful ML models, making it easier for developers and researchers to build and deploy applications.\n",
- "\n",
- "\n",
- "Summary:\n",
- "Hugging Face is a company and open-source platform that provides tools and models for natural language processing (NLP). It has become a central hub for the ML community, offering a wide range of pre-trained models that can be easily used or fine-tuned for specific applications .\n"
- ]
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.12/dist-packages/gradio/blocks.py:1069: UserWarning: Cannot load gradio/dracula. Caught Exception: The space gradio/dracula does not exist\n",
- " warnings.warn(f\"Cannot load {theme}. Caught Exception: {str(e)}\")\n"
- ]
- }
- ]
- },
- {
- "cell_type": "markdown",
- "source": [
- "ASSIGNMENT GOES BELOW -"
- ],
- "metadata": {
- "id": "2uRw9YNZfP13"
- }
- },
- {
- "cell_type": "code",
- "source": [
- "import gradio as gr\n",
- "from transformers import pipeline\n",
- "\n",
- "# Load a summarization model\n",
- "summarizer = pipeline(\"summarization\", model=\"Falconsai/text_summarization\")\n",
- "\n",
- "# Summarize text\n",
- "def summarize_text(text):\n",
- " if not text.strip():\n",
- " return \"Please enter some text to summarize.\"\n",
- " summary = summarizer(text, max_length=120, min_length=30, do_sample=False)\n",
- " return summary[0]['summary_text']\n",
- "\n",
- "# Export summary to a downloadable file\n",
- "def export_summary(summary):\n",
- " file_path = \"summary.txt\"\n",
- " with open(file_path, \"w\") as f:\n",
- " f.write(summary)\n",
- " return file_path\n",
- "\n",
- "# UI layout\n",
- "def build_app(theme=\"gradio/soft\"):\n",
- " with gr.Blocks(theme=theme) as demo:\n",
- " gr.Markdown(\"## ๐ง AI Text Summarizer\")\n",
- " gr.Markdown(\"Enter any long paragraph below to generate a concise summary.\")\n",
- "\n",
- " with gr.Row():\n",
- " theme_toggle = gr.Radio(\n",
- " [\"gradio/soft\", \"gradio/dracula\"],\n",
- " value=theme,\n",
- " label=\"๐ Theme\"\n",
- " )\n",
- "\n",
- " text_input = gr.Textbox(\n",
- " lines=10,\n",
- " placeholder=\"Paste your text here...\",\n",
- " label=\"Input Text\"\n",
- " )\n",
- "\n",
- " summarize_btn = gr.Button(\"โจ Summarize\")\n",
- " output = gr.Textbox(label=\"Summary\", lines=8)\n",
- " export_btn = gr.Button(\"๐ Export Summary\")\n",
- " download_file = gr.File(label=\"Download Summary\")\n",
- "\n",
- " # Events\n",
- " summarize_btn.click(summarize_text, inputs=text_input, outputs=output)\n",
- " export_btn.click(export_summary, inputs=output, outputs=download_file)\n",
- "\n",
- " # Rebuild app when theme changes\n",
- " def switch_theme(selected_theme):\n",
- " return gr.update(theme=selected_theme)\n",
- "\n",
- " theme_toggle.change(lambda t: None, inputs=theme_toggle, outputs=None) # placeholder\n",
- "\n",
- " return demo\n",
- "\n",
- "\n",
- "# Launch app\n",
- "demo = build_app()\n",
- "demo.launch()\n"
- ],
- "metadata": {
- "id": "bCAb092HfRrj",
- "colab": {
- "base_uri": "https://localhost:8080/",
- "height": 1000,
- "referenced_widgets": [
- "bb25e5cb4e0944f5969b05765403e21b",
- "6fbc59c811ad4e66957fd3db5180b38c",
- "70ad3e588d974fbeb790aec5abda7f79",
- "8fea7d9429ba4f54bc53594e1862b89f",
- "da5406c843b54a128d1cd65d0116310e",
- "203f31cf430a4d09b81d8e0d35b57b06",
- "9662fb8e73f14f40b7dd293e90e74501",
- "5a3228900e2440bb8c1535fe06ee5852",
- "37fbca87288c4a699e2e9e481e0f1c6c",
- "ea5000d2ef95461986e49e76cf5a32ce",
- "0e4f4897922245f492109a860a000dc5",
- "9c7052b0b3b046389250207edea6cbf8",
- "f1e2f7a004f447cf80275eefa58a9a79",
- "11d3f537ada7430ab5ef9916e43643ac",
- "c938c1a56cf44e6095534d45d09b2828",
- "86314e72b9b64321b998f6ff96310ee3",
- "bbbf411f48a44d4bad1319af67dcc1f0",
- "8273e737cf49418d833c0b53c6961b20",
- "0227bd065f21424c9ab522fe6d31f091",
- "0b72ae6ebc4542c6b8c7142781a96c6a",
- "1b833c2224a14229869235832218e44d",
- "7ac59db8113542c09c923369ecea085b",
- "b4b27dca25594fc5a6df54ac25fce2bf",
- "71cc749f55724184a9a41e6bce17ee38",
- "8bc2248bb8814998afc82edcbe430a90",
- "2c8e1344f5e940cd86251b82d7ee2388",
- "95398cc71afb420280282d3845731fcb",
- "1d9fa660b6d14aa192d9ae6a594085d4",
- "832e575a69ff458cba913d71efbabd04",
- "8dde1474edcc4de9a1f38855f45e5fd4",
- "d3c5854ffa5a4fe2a1591de7a62d6525",
- "de95da44212146aabaf822cffa5d96a3",
- "a1c7f40c86d94406a4ba9463eccf03f6",
- "b02ac51fa6c74911aa832bd620ed1025",
- "015fcd405e994aa9ae8ddede3d6b9d62",
- "b545e8b75685473cbb4442be6016560a",
- "363d84ca483842c78b5d1e07fa8ebbac",
- "6d1ced832b7344bca4654aa09775d5b7",
- "ceaa42c8d7464a97b22b3365b432237a",
- "6e51da07cb514f96b4f57301e72e4ba0",
- "8044439995934d52b8772b79944314d4",
- "266941a0a1c94e058a030f8ee0ed22d3",
- "eec4a17b44c340ea9690bed9fefa59fa",
- "01fa92bc197443b7b606312a1207119d",
- "d2f7976f5f834fdf83d8fd56d6539173",
- "abe6fd34c502431090df0ee08989a93b",
- "eadef65a2dc34215b7d9e3704cc98bee",
- "2679d888109c4626a9457b23e24712d7",
- "06f564ffaf444c6ea728aa51a2d96b2f",
- "b57f9bd772374a35830677b94b0fbe13",
- "b4cd20383a104685af03baac29d362df",
- "2f31209ac8c641a2ab7d4d5fcfab10c3",
- "6fe2b79c1588467ab4320c1664f69070",
- "78166add75a540b1b5f86133d5a1f383",
- "df698733c2c040818964f482a611d2e4",
- "f9c2882be74e4b43b3dd751150fa16e0",
- "56b4cf80bfd34055b4ee7a38f38eeec2",
- "ffc3402d7d4a4c23bd34d0af2768728a",
- "4ec6374fc8294c96ba89514e2f97dc03",
- "612f0b2bf7414deaadfe4b5a33930e54",
- "8d47cffffc3f4ae7ac2af50592d37cb4",
- "5787b33c823b4423b3e6350738b8b4af",
- "469de03d0290463f9a3021e8902942ae",
- "8ac4ab4f21554a499245662f13f7cd0b",
- "a9478851d28a4a6880b2a278f104edb1",
- "6a2c954f31b84440b212c1cc411589bf",
- "51bebd97237640e3b3387637e324d4e3",
- "c78c8dd34f854f90b71af1b99dbedd11",
- "0fd6071457324a39834257e092d8618e",
- "018aa58d48a54287a4a565a3fc8a343b",
- "50a2e10ce7104a5aa38ffdc278a4783f",
- "c0dffb6d1e9e45719229a0f0e628d372",
- "736ae14087ec4c54b4589310fdbe0230",
- "a1271e0e611d448994282a85a8aea0cb",
- "e7ce38ccb1e845d49e66640c7cd63303",
- "858d8a88f11747de94df4a70a6f3931c",
- "ee1ae5843b2441dca353ac094c0c1895",
- "913a35e4aee044978a28e36b04805ad6",
- "35c5449bd31a467f9ba1aa2cebeeeacd",
- "1613b86548154c8fbec768399af76492",
- "12e512c7df644f4b984a5bc8a0c40255",
- "40f3106bb1e64b31a6598572cce7ed32",
- "1f8ae1f5be024998ac713f759400b774",
- "8e1a1832c4af41cc9586605b5a2553fe",
- "53c6f52894fc4029b5c03cb947d97944",
- "e0b74c8f94cc4c53984edca89f912c54",
- "9a8ae19f01b74841b5e74b4b063932b5",
- "1121693c70684afd83f1e64540efcd62"
- ]
- },
- "outputId": "29a2f2ee-1a3a-4d9e-853b-20599b4a5e91"
- },
- "execution_count": 1,
- "outputs": [
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n",
- "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
- "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
- "You will be able to reuse this secret in all of your notebooks.\n",
- "Please note that authentication is recommended but still optional to access public models or datasets.\n",
- " warnings.warn(\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "config.json: 0.00B [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "bb25e5cb4e0944f5969b05765403e21b"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "model.safetensors: 0%| | 0.00/242M [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "9c7052b0b3b046389250207edea6cbf8"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "generation_config.json: 0%| | 0.00/112 [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "b4b27dca25594fc5a6df54ac25fce2bf"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "tokenizer_config.json: 0.00B [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "b02ac51fa6c74911aa832bd620ed1025"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "spiece.model: 0%| | 0.00/792k [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "d2f7976f5f834fdf83d8fd56d6539173"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "tokenizer.json: 0.00B [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "f9c2882be74e4b43b3dd751150fa16e0"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "special_tokens_map.json: 0.00B [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "51bebd97237640e3b3387637e324d4e3"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stderr",
- "text": [
- "Device set to use cuda:0\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- "theme_schema%400.0.3.json: 0.00B [00:00, ?B/s]"
- ],
- "application/vnd.jupyter.widget-view+json": {
- "version_major": 2,
- "version_minor": 0,
- "model_id": "913a35e4aee044978a28e36b04805ad6"
- }
- },
- "metadata": {}
- },
- {
- "output_type": "stream",
- "name": "stdout",
- "text": [
- "It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).\n",
- "\n",
- "Colab notebook detected. To show errors in colab notebook, set debug=True in launch()\n",
- "* Running on public URL: https://3ab22350451cb6aeec.gradio.live\n",
- "\n",
- "This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
- ]
- },
- {
- "output_type": "display_data",
- "data": {
- "text/plain": [
- ""
- ],
- "text/html": [
- ""
- ]
- },
- "metadata": {}
- },
- {
- "output_type": "execute_result",
- "data": {
- "text/plain": []
- },
- "metadata": {},
- "execution_count": 1
- }
- ]
- }
- ]
-}
\ No newline at end of file
diff --git a/nitesh_sharma/20251031/nitesh_working_project-bolt-sb1.zip b/nitesh_sharma/20251031/nitesh_working_project-bolt-sb1.zip
deleted file mode 100644
index 45e571c..0000000
Binary files a/nitesh_sharma/20251031/nitesh_working_project-bolt-sb1.zip and /dev/null differ
diff --git a/nitesh_sharma/20251102/nitesh_assignment_1.ipynb b/nitesh_sharma/20251102/nitesh_assignment_1.ipynb
deleted file mode 100644
index afb4067..0000000
--- a/nitesh_sharma/20251102/nitesh_assignment_1.ipynb
+++ /dev/null
@@ -1,505 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 2,
- "id": "210779f8",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\env_v1\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
- " from .autonotebook import tqdm as notebook_tqdm\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "โ
Libraries imported successfully!\n"
- ]
- }
- ],
- "source": [
- "# Import required libraries\n",
- "import os\n",
- "from pathlib import Path\n",
- "from typing import List\n",
- "from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, Settings\n",
- "from llama_index.vector_stores.lancedb import LanceDBVectorStore\n",
- "from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n",
- "\n",
- "print(\"โ
Libraries imported successfully!\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "id": "5d9e738b",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "โน๏ธ OPENROUTER_API_KEY not found - that's OK for this assignment!\n",
- " This assignment only uses local embeddings for vector operations.\n",
- "โ
LlamaIndex configured with local embeddings\n",
- " Using BAAI/bge-small-en-v1.5 for document embeddings\n"
- ]
- }
- ],
- "source": [
- "# Configure LlamaIndex Settings (Using OpenRouter - No OpenAI API Key needed)\n",
- "def setup_llamaindex_settings():\n",
- " \"\"\"\n",
- " Configure LlamaIndex with local embeddings and OpenRouter for LLM.\n",
- " This assignment focuses on vector database operations, so we'll use local embeddings only.\n",
- " \"\"\"\n",
- " # Check for OpenRouter API key (for future use, not needed for this basic assignment)\n",
- " api_key = os.getenv(\"OPENROUTER_API_KEY\")\n",
- " if not api_key:\n",
- " print(\"โน๏ธ OPENROUTER_API_KEY not found - that's OK for this assignment!\")\n",
- " print(\" This assignment only uses local embeddings for vector operations.\")\n",
- " \n",
- " # Configure local embeddings (no API key required)\n",
- " Settings.embed_model = HuggingFaceEmbedding(\n",
- " model_name=\"BAAI/bge-small-en-v1.5\",\n",
- " trust_remote_code=True\n",
- " )\n",
- " \n",
- " print(\"โ
LlamaIndex configured with local embeddings\")\n",
- " print(\" Using BAAI/bge-small-en-v1.5 for document embeddings\")\n",
- "\n",
- "# Setup the configuration\n",
- "setup_llamaindex_settings()\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "id": "3a3c274b",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "100%|โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ| 139M/139M [00:12<00:00, 11.2MiB/s]\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Failed to load file c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\src\\20251102\\session_2\\assignments\\..\\data\\audio\\ai_agents.mp3 with error: [WinError 2] The system cannot find the file specified. Skipping...\n",
- "Failed to load file c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\src\\20251102\\session_2\\assignments\\..\\data\\audio\\in_the_end.mp3 with error: [WinError 2] The system cannot find the file specified. Skipping...\n",
- "Failed to load file c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\src\\20251102\\session_2\\assignments\\..\\data\\audio\\rags.mp3 with error: [WinError 2] The system cannot find the file specified. Skipping...\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\env_v1\\Lib\\site-packages\\whisper\\transcribe.py:132: UserWarning: FP16 is not supported on CPU; using FP32 instead\n",
- " warnings.warn(\"FP16 is not supported on CPU; using FP32 instead\")\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Loaded 39 documents\n"
- ]
- }
- ],
- "source": [
- "def load_documents_from_folder(folder_path: str):\n",
- " \"\"\"\n",
- " Load documents from a folder using SimpleDirectoryReader.\n",
- " \n",
- " TODO: Complete this function to load documents from the given folder path.\n",
- " HINT: Use SimpleDirectoryReader with recursive parameter to load all files\n",
- " \n",
- " Args:\n",
- " folder_path (str): Path to the folder containing documents\n",
- " \n",
- " Returns:\n",
- " List of documents loaded from the folder\n",
- " \"\"\"\n",
- " # TODO: Create SimpleDirectoryReader instance\n",
- " reader = SimpleDirectoryReader(folder_path, recursive=True)\n",
- " \n",
- " # TODO: Load and return documents\n",
- " documents = reader.load_data()\n",
- "\n",
- " # return documents\n",
- " return documents\n",
- "\n",
- "# Test the function after you complete it\n",
- "test_folder = \"../data\"\n",
- "documents = load_documents_from_folder(test_folder)\n",
- "print(f\"Loaded {len(documents)} documents\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "id": "eb61b262",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-11-02 07:45:16,768 - WARNING - Table documents doesn't exist yet. Please add some data to create it.\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Vector store created: True\n"
- ]
- }
- ],
- "source": [
- "def create_vector_store(db_path: str = \"./vectordb\", table_name: str = \"documents\"):\n",
- " \"\"\"\n",
- " Create a LanceDB vector store for storing document embeddings.\n",
- " \n",
- " TODO: Complete this function to create and configure a LanceDB vector store.\n",
- " HINT: Use LanceDBVectorStore with uri and table_name parameters\n",
- " \n",
- " Args:\n",
- " db_path (str): Path where the vector database will be stored\n",
- " table_name (str): Name of the table in the vector database\n",
- " \n",
- " Returns:\n",
- " LanceDBVectorStore: Configured vector store\n",
- " \"\"\"\n",
- " # TODO: Create the directory if it doesn't exist\n",
- " Path(db_path).mkdir(parents=True, exist_ok=True)\n",
- " \n",
- " # TODO: Create vector store\n",
- " vector_store = LanceDBVectorStore(\n",
- " uri=f\"./{db_path}/{table_name}.db\",\n",
- " table_name=table_name,\n",
- " mode=\"overwrite\" # overwrite existing table\n",
- " )\n",
- " return vector_store\n",
- " \n",
- " # PLACEHOLDER - Replace with actual implementation\n",
- " print(f\"TODO: Create vector store at {db_path}\")\n",
- " return None\n",
- "\n",
- "# Test the function after you complete it\n",
- "vector_store = create_vector_store(\"./assignment_vectordb\")\n",
- "print(f\"Vector store created: {vector_store is not None}\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "id": "d4aabec5",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-11-02 07:46:39,007 - INFO - Create new table documents adding data.\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Vector index created: True\n"
- ]
- }
- ],
- "source": [
- "def create_vector_index(documents: List, vector_store):\n",
- " \"\"\"\n",
- " Create a vector index from documents using the provided vector store.\n",
- " \n",
- " TODO: Complete this function to create a VectorStoreIndex from documents.\n",
- " HINT: Create StorageContext with vector_store, then use VectorStoreIndex.from_documents()\n",
- " \n",
- " Args:\n",
- " documents: List of documents to index\n",
- " vector_store: LanceDB vector store to use for storage\n",
- " \n",
- " Returns:\n",
- " VectorStoreIndex: The created vector index\n",
- " \"\"\"\n",
- " # TODO: Create storage context with vector store\n",
- " storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
- " \n",
- " # TODO: Create index from documents\n",
- " index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)\n",
- " \n",
- " return index\n",
- "\n",
- " # PLACEHOLDER - Replace with actual implementation\n",
- " print(f\"TODO: Create vector index from {len(documents)} documents\")\n",
- " return None\n",
- "\n",
- "# Test the function after you complete it (will only work after previous functions are completed)\n",
- "if documents and vector_store:\n",
- " index = create_vector_index(documents, vector_store)\n",
- " print(f\"Vector index created: {index is not None}\")\n",
- "else:\n",
- " print(\"Complete previous functions first to test this one\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "id": "3b292bf7",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-11-02 07:47:37,186 - INFO - query_type :, vector\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Found 2 results for query: 'What are AI agents?'\n",
- "Result 1: agent-personas or the user is not needed, multi-agent architectures tend to thrive more when collabo...\n",
- "Result 2: THE LANDSCAPE OF EMERGING AI AGENT ARCHITECTURES\n",
- "FOR REASONING , PLANNING , AND TOOL CALLING : A S U...\n"
- ]
- }
- ],
- "source": [
- "def search_documents(index, query: str, top_k: int = 3):\n",
- " \"\"\"\n",
- " Search for relevant documents using the vector index.\n",
- " \n",
- " TODO: Complete this function to perform semantic search on the index.\n",
- " HINT: Use index.as_retriever() with similarity_top_k parameter, then retrieve(query)\n",
- " \n",
- " Args:\n",
- " index: Vector index to search\n",
- " query (str): Search query\n",
- " top_k (int): Number of top results to return\n",
- " \n",
- " Returns:\n",
- " List of retrieved document nodes\n",
- " \"\"\"\n",
- " # TODO: Create retriever from index\n",
- " retriever = index.as_retriever(similarity_top_k=top_k)\n",
- " \n",
- " # TODO: Retrieve documents for the query\n",
- " results = retriever.retrieve(query)\n",
- " \n",
- " return results\n",
- "\n",
- " # PLACEHOLDER - Replace with actual implementation\n",
- " print(f\"TODO: Search for '{query}' in index\")\n",
- " return []\n",
- "\n",
- "# Test the function after you complete it (will only work after all previous functions are completed)\n",
- "if 'index' in locals() and index is not None:\n",
- " test_query = \"What are AI agents?\"\n",
- " results = search_documents(index, test_query, top_k=2)\n",
- " print(f\"Found {len(results)} results for query: '{test_query}'\")\n",
- " for i, result in enumerate(results, 1):\n",
- " print(f\"Result {i}: {result.text[:100] if hasattr(result, 'text') else 'No text'}...\")\n",
- "else:\n",
- " print(\"Complete all previous functions first to test this one\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "id": "842c3b78",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "๐ Testing Complete Vector Database Pipeline\n",
- "==================================================\n",
- "\n",
- "๐ Step 1: Loading documents...\n",
- "Failed to load file c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\src\\20251102\\session_2\\assignments\\..\\data\\audio\\ai_agents.mp3 with error: [WinError 2] The system cannot find the file specified. Skipping...\n",
- "Failed to load file c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\src\\20251102\\session_2\\assignments\\..\\data\\audio\\in_the_end.mp3 with error: [WinError 2] The system cannot find the file specified. Skipping...\n",
- "Failed to load file c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\src\\20251102\\session_2\\assignments\\..\\data\\audio\\rags.mp3 with error: [WinError 2] The system cannot find the file specified. Skipping...\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\env_v1\\Lib\\site-packages\\whisper\\transcribe.py:132: UserWarning: FP16 is not supported on CPU; using FP32 instead\n",
- " warnings.warn(\"FP16 is not supported on CPU; using FP32 instead\")\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " Loaded 39 documents\n",
- "\n",
- "๐๏ธ Step 2: Creating vector store...\n",
- " Vector store status: โ
Created\n",
- "\n",
- "๐ Step 3: Creating vector index...\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-11-02 07:48:24,923 - INFO - query_type :, vector\n",
- "2025-11-02 07:48:24,981 - INFO - query_type :, vector\n",
- "2025-11-02 07:48:25,021 - INFO - query_type :, vector\n",
- "2025-11-02 07:48:25,062 - INFO - query_type :, vector\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " Index status: โ
Created\n",
- "\n",
- "๐ Step 4: Testing search functionality...\n",
- "\n",
- " ๐ Query: 'What are AI agents?'\n",
- " 1. agent-personas or the user is not needed, multi-agent architectures tend to thrive more when collabo... (Score: 0.6250)\n",
- "\n",
- " ๐ Query: 'How to evaluate agent performance?'\n",
- " 1. steps, but the answers are limited to Yes/No responses [7]. As the industry continues to pivot towar... (Score: 0.6800)\n",
- "\n",
- " ๐ Query: 'Italian recipes and cooking'\n",
- " 1. # ๐ Classic Spaghetti Carbonara Recipe\n",
- "\n",
- "## Ingredients\n",
- "- 400g spaghetti pasta\n",
- "- 4 large egg yolk... (Score: 0.6280)\n",
- "\n",
- " ๐ Query: 'Financial analysis and investment'\n",
- " 1. However, several important considerations need to be ad-\n",
- "dressed in future work:\n",
- "โข Scalability: Eval... (Score: 0.5623)\n",
- "\n",
- "==================================================\n",
- "๐ฏ Assignment Status:\n",
- " Documents loaded: โ
\n",
- " Vector store created: โ
\n",
- " Index created: โ
\n",
- " Search working: โ
\n",
- "\n",
- "๐ Congratulations! You've successfully completed the assignment!\n",
- " You've built a complete vector database with search functionality!\n"
- ]
- }
- ],
- "source": [
- "# Final test of the complete pipeline\n",
- "print(\"๐ Testing Complete Vector Database Pipeline\")\n",
- "print(\"=\" * 50)\n",
- "\n",
- "# Re-run the complete pipeline to ensure everything works\n",
- "data_folder = \"../data\"\n",
- "vector_db_path = \"./assignment_vectordb\"\n",
- "\n",
- "# Step 1: Load documents\n",
- "print(\"\\n๐ Step 1: Loading documents...\")\n",
- "documents = load_documents_from_folder(data_folder)\n",
- "print(f\" Loaded {len(documents)} documents\")\n",
- "\n",
- "# Step 2: Create vector store\n",
- "print(\"\\n๐๏ธ Step 2: Creating vector store...\")\n",
- "vector_store = create_vector_store(vector_db_path)\n",
- "print(\" Vector store status:\", \"โ
Created\" if vector_store else \"โ Failed\")\n",
- "\n",
- "# Step 3: Create vector index\n",
- "print(\"\\n๐ Step 3: Creating vector index...\")\n",
- "if documents and vector_store:\n",
- " index = create_vector_index(documents, vector_store)\n",
- " print(\" Index status:\", \"โ
Created\" if index else \"โ Failed\")\n",
- "else:\n",
- " index = None\n",
- " print(\" โ Cannot create index - missing documents or vector store\")\n",
- "\n",
- "# Step 4: Test multiple search queries\n",
- "print(\"\\n๐ Step 4: Testing search functionality...\")\n",
- "if index:\n",
- " search_queries = [\n",
- " \"What are AI agents?\",\n",
- " \"How to evaluate agent performance?\", \n",
- " \"Italian recipes and cooking\",\n",
- " \"Financial analysis and investment\"\n",
- " ]\n",
- " \n",
- " for query in search_queries:\n",
- " print(f\"\\n ๐ Query: '{query}'\")\n",
- " results = search_documents(index, query, top_k=2)\n",
- " \n",
- " if results:\n",
- " for i, result in enumerate(results, 1):\n",
- " text_preview = result.text[:100] if hasattr(result, 'text') else \"No text available\"\n",
- " score = f\" (Score: {result.score:.4f})\" if hasattr(result, 'score') else \"\"\n",
- " print(f\" {i}. {text_preview}...{score}\")\n",
- " else:\n",
- " print(\" No results found\")\n",
- "else:\n",
- " print(\" โ Cannot test search - index not created\")\n",
- "\n",
- "print(\"\\n\" + \"=\" * 50)\n",
- "print(\"๐ฏ Assignment Status:\")\n",
- "print(f\" Documents loaded: {'โ
' if documents else 'โ'}\")\n",
- "print(f\" Vector store created: {'โ
' if vector_store else 'โ'}\")\n",
- "print(f\" Index created: {'โ
' if index else 'โ'}\")\n",
- "print(f\" Search working: {'โ
' if index else 'โ'}\")\n",
- "\n",
- "if documents and vector_store and index:\n",
- " print(\"\\n๐ Congratulations! You've successfully completed the assignment!\")\n",
- " print(\" You've built a complete vector database with search functionality!\")\n",
- "else:\n",
- " print(\"\\n๐ Please complete the TODO functions above to finish the assignment.\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "8f641657",
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "env_v1",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.13.7"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/nitesh_sharma/20251102/nitesh_assignment_2.ipynb b/nitesh_sharma/20251102/nitesh_assignment_2.ipynb
deleted file mode 100644
index dc76db7..0000000
--- a/nitesh_sharma/20251102/nitesh_assignment_2.ipynb
+++ /dev/null
@@ -1,650 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "id": "e0596051",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\env_v1\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
- " from .autonotebook import tqdm as notebook_tqdm\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "โ
Advanced RAG libraries imported successfully!\n"
- ]
- }
- ],
- "source": [
- "# Import required libraries for advanced RAG\n",
- "import os\n",
- "from pathlib import Path\n",
- "from typing import Dict, List, Optional, Any\n",
- "from pydantic import BaseModel, Field\n",
- "\n",
- "# Core LlamaIndex components\n",
- "from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, Settings\n",
- "from llama_index.core.query_engine import RetrieverQueryEngine\n",
- "from llama_index.core.retrievers import VectorIndexRetriever\n",
- "\n",
- "# Vector store\n",
- "from llama_index.vector_stores.lancedb import LanceDBVectorStore\n",
- "\n",
- "# Embeddings and LLM\n",
- "from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n",
- "from llama_index.llms.openrouter import OpenRouter\n",
- "\n",
- "# Advanced RAG components (we'll use these in the assignments)\n",
- "from llama_index.core.postprocessor import SimilarityPostprocessor\n",
- "from llama_index.core.response_synthesizers import TreeSummarize, Refine, CompactAndRefine\n",
- "from llama_index.core.output_parsers import PydanticOutputParser\n",
- "\n",
- "print(\"โ
Advanced RAG libraries imported successfully!\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "id": "bda421c3",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "โ
OPENROUTER_API_KEY found - full advanced RAG functionality available\n",
- "โ
Advanced RAG settings configured\n",
- " - Chunk size: 512 (optimized for precision)\n",
- " - Using local embeddings for cost efficiency\n",
- " - OpenRouter LLM ready for response synthesis\n"
- ]
- }
- ],
- "source": [
- "# Configure Advanced RAG Settings (Using OpenRouter)\n",
- "def setup_advanced_rag_settings():\n",
- " \"\"\"\n",
- " Configure LlamaIndex with optimized settings for advanced RAG.\n",
- " Uses local embeddings and OpenRouter for LLM operations.\n",
- " \"\"\"\n",
- " import streamlit as st\n",
- " # Check for OpenRouter API key\n",
- " api_key = os.getenv(\"OPENROUTER_API_KEY\") or st.secrets.get(\"OPENROUTER_API_KEY\")\n",
- " if not api_key:\n",
- " print(\"โ ๏ธ OPENROUTER_API_KEY not found - LLM operations will be limited\")\n",
- " print(\" You can still complete postprocessor and retrieval exercises\")\n",
- " else:\n",
- " print(\"โ
OPENROUTER_API_KEY found - full advanced RAG functionality available\")\n",
- " \n",
- " # Configure OpenRouter LLM\n",
- " Settings.llm = OpenRouter(\n",
- " api_key=api_key,\n",
- " model=\"gpt-4o\",\n",
- " temperature=0.1 # Lower temperature for more consistent responses\n",
- " )\n",
- " \n",
- " # Configure local embeddings (no API key required)\n",
- " Settings.embed_model = HuggingFaceEmbedding(\n",
- " model_name=\"BAAI/bge-small-en-v1.5\",\n",
- " trust_remote_code=True\n",
- " )\n",
- " \n",
- " # Advanced RAG configuration\n",
- " Settings.chunk_size = 512 # Smaller chunks for better precision\n",
- " Settings.chunk_overlap = 50\n",
- " \n",
- " print(\"โ
Advanced RAG settings configured\")\n",
- " print(\" - Chunk size: 512 (optimized for precision)\")\n",
- " print(\" - Using local embeddings for cost efficiency\")\n",
- " print(\" - OpenRouter LLM ready for response synthesis\")\n",
- "\n",
- "# Setup the configuration\n",
- "setup_advanced_rag_settings()\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "id": "3b81d9b6",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Table documents doesn't exist yet. Please add some data to create it.\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "๐ Setting up basic index for advanced RAG...\n",
- "Failed to load file c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\src\\20251102\\session_2\\assignments\\..\\data\\audio\\ai_agents.mp3 with error: [WinError 2] The system cannot find the file specified. Skipping...\n",
- "Failed to load file c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\src\\20251102\\session_2\\assignments\\..\\data\\audio\\in_the_end.mp3 with error: [WinError 2] The system cannot find the file specified. Skipping...\n",
- "Failed to load file c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\src\\20251102\\session_2\\assignments\\..\\data\\audio\\rags.mp3 with error: [WinError 2] The system cannot find the file specified. Skipping...\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\env_v1\\Lib\\site-packages\\whisper\\transcribe.py:132: UserWarning: FP16 is not supported on CPU; using FP32 instead\n",
- " warnings.warn(\"FP16 is not supported on CPU; using FP32 instead\")\n",
- "Parsing nodes: 100%|โโโโโโโโโโ| 39/39 [00:00<00:00, 73.56it/s]\n",
- "Generating embeddings: 100%|โโโโโโโโโโ| 92/92 [00:11<00:00, 7.81it/s]\n",
- "2025-11-02 08:07:14,277 - INFO - Create new table documents adding data.\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "โ
Basic index created with 39 documents\n",
- " Ready for advanced RAG techniques!\n",
- "๐ Ready to implement advanced RAG techniques!\n"
- ]
- }
- ],
- "source": [
- "# Setup: Create index from Assignment 1 (reuse the basic functionality)\n",
- "def setup_basic_index(data_folder: str = \"../data\", force_rebuild: bool = False):\n",
- " \"\"\"\n",
- " Create a basic vector index that we'll enhance with advanced techniques.\n",
- " This reuses the concepts from Assignment 1.\n",
- " \"\"\"\n",
- " # Create vector store\n",
- " vector_store = LanceDBVectorStore(\n",
- " uri=\"./advanced_rag_vectordb\",\n",
- " table_name=\"documents\"\n",
- " )\n",
- " \n",
- " # Load documents\n",
- " if not Path(data_folder).exists():\n",
- " print(f\"โ Data folder not found: {data_folder}\")\n",
- " return None\n",
- " \n",
- " reader = SimpleDirectoryReader(input_dir=data_folder, recursive=True)\n",
- " documents = reader.load_data()\n",
- " \n",
- " # Create storage context and index\n",
- " storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
- " index = VectorStoreIndex.from_documents(\n",
- " documents, \n",
- " storage_context=storage_context,\n",
- " show_progress=True\n",
- " )\n",
- " \n",
- " print(f\"โ
Basic index created with {len(documents)} documents\")\n",
- " print(\" Ready for advanced RAG techniques!\")\n",
- " return index\n",
- "\n",
- "# Create the basic index\n",
- "print(\"๐ Setting up basic index for advanced RAG...\")\n",
- "index = setup_basic_index()\n",
- "\n",
- "if index:\n",
- " print(\"๐ Ready to implement advanced RAG techniques!\")\n",
- "else:\n",
- " print(\"โ Failed to create index - check data folder path\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "id": "2c111415",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-11-02 08:59:20,978 - INFO - query_type :, vector\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "โ
Query engine with similarity filtering created\n",
- "\n",
- "๐ Testing query: 'What are the benefits of AI agents?'\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-11-02 08:59:21,535 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "๐ Response: AI agents offer several benefits, including the ability to tackle complex multi-step problems that require advanced problem-solving skills. They can be designed with well-defined system prompts, clear leadership, and task division, which enhance their effectiveness. Additionally, AI agents can incorporate dedicated phases for reasoning, planning, execution, and evaluation, as well as dynamic team structures and intelligent message filtering. These features make them more effective across various benchmarks and problem types. However, it's important to note that while promising, there are still challenges to address for their reliable application.\n",
- " (Complete the function above to test the response)\n"
- ]
- }
- ],
- "source": [
- "def create_query_engine_with_similarity_filter(index, similarity_cutoff: float = 0.3, top_k: int = 10):\n",
- " \"\"\"\n",
- " Create a query engine that filters results based on similarity scores.\n",
- " \n",
- " TODO: Complete this function to create a query engine with similarity postprocessing.\n",
- " HINT: Use index.as_query_engine() with node_postprocessors parameter containing SimilarityPostprocessor\n",
- " \n",
- " Args:\n",
- " index: Vector index to query\n",
- " similarity_cutoff: Minimum similarity score (0.0 to 1.0)\n",
- " top_k: Number of initial results to retrieve before filtering\n",
- " \n",
- " Returns:\n",
- " Query engine with similarity filtering\n",
- " \"\"\"\n",
- " # TODO: Create similarity postprocessor with the cutoff threshold\n",
- " similarity_processor = SimilarityPostprocessor(cutoff=similarity_cutoff)\n",
- " \n",
- " # TODO: Create query engine with similarity filtering\n",
- " query_engine = index.as_query_engine(\n",
- " retriever_kwargs={\"similarity_top_k\": top_k},\n",
- " node_postprocessors=[similarity_processor]\n",
- " )\n",
- "\n",
- " return query_engine\n",
- "\n",
- " # PLACEHOLDER - Replace with actual implementation\n",
- " print(f\"TODO: Create query engine with similarity cutoff {similarity_cutoff}\")\n",
- " return None\n",
- "\n",
- "# Test the function\n",
- "if index:\n",
- " filtered_engine = create_query_engine_with_similarity_filter(index, similarity_cutoff=0.3)\n",
- " \n",
- " if filtered_engine:\n",
- " print(\"โ
Query engine with similarity filtering created\")\n",
- " \n",
- " # Test query\n",
- " test_query = \"What are the benefits of AI agents?\"\n",
- " print(f\"\\n๐ Testing query: '{test_query}'\")\n",
- " \n",
- " # Uncomment when implemented:\n",
- " response = filtered_engine.query(test_query)\n",
- " print(f\"๐ Response: {response}\")\n",
- " print(\" (Complete the function above to test the response)\")\n",
- " else:\n",
- " print(\"โ Failed to create filtered query engine\")\n",
- "else:\n",
- " print(\"โ No index available - run previous cells first\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "id": "9881a07e",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-11-02 08:59:33,718 - INFO - query_type :, vector\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "โ
Query engine with TreeSummarize created\n",
- "\n",
- "๐ Testing analytical query: 'Compare the advantages and disadvantages of different AI agent frameworks'\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-11-02 08:59:34,265 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "๐ TreeSummarize Response:\n",
- "Different AI agent frameworks offer various advantages and disadvantages based on their design and application. \n",
- "\n",
- "Advantages:\n",
- "1. **Modularity and Composability**: Many frameworks, such as Agno and CrewAI, emphasize modularity and composability, allowing for flexible and rapid development. This makes it easier to integrate with existing cloud infrastructure and adapt to different use cases.\n",
- "2. **Scalability**: Frameworks that incorporate multi-agent scaling laws can efficiently manage the performance of systems as the number of agents increases, which is beneficial for handling complex tasks.\n",
- "3. **Advanced Problem-Solving**: Architectures that include well-defined system prompts, clear task division, and dynamic team structures are effective in tackling complex, multi-step problems.\n",
- "\n",
- "Disadvantages:\n",
- "1. **Complexity in Implementation**: The need for dedicated reasoning, planning, execution, and evaluation phases can increase the complexity of implementation and require significant expertise.\n",
- "2. **Real-World Applicability**: There are challenges in ensuring that these frameworks are applicable to real-world scenarios, particularly in terms of comprehensive benchmarks and mitigating biases.\n",
- "3. **Feedback Mechanisms**: While verbal reinforcement learning and feedback mechanisms can enhance learning, they may also introduce challenges in terms of consistency and reliability of the feedback provided.\n",
- "\n",
- "Overall, the\n",
- " (Complete the function above to test comprehensive analysis)\n"
- ]
- }
- ],
- "source": [
- "def create_query_engine_with_tree_summarize(index, top_k: int = 5):\n",
- " \"\"\"\n",
- " Create a query engine that uses TreeSummarize for comprehensive responses.\n",
- " \n",
- " TODO: Complete this function to create a query engine with TreeSummarize synthesis.\n",
- " HINT: Create a TreeSummarize instance, then use index.as_query_engine() with response_synthesizer parameter\n",
- " \n",
- " Args:\n",
- " index: Vector index to query\n",
- " top_k: Number of results to retrieve\n",
- " \n",
- " Returns:\n",
- " Query engine with TreeSummarize synthesis\n",
- " \"\"\"\n",
- " # TODO: Create TreeSummarize response synthesizer\n",
- " tree_synthesizer = TreeSummarize()\n",
- " \n",
- " # TODO: Create query engine with the synthesizer\n",
- " query_engine = index.as_query_engine(\n",
- " response_synthesizer=tree_synthesizer,\n",
- " retriever_kwargs={\"top_k\": top_k}\n",
- " )\n",
- " return query_engine\n",
- " # return query_engine\n",
- " \n",
- " # PLACEHOLDER - Replace with actual implementation\n",
- " print(f\"TODO: Create query engine with TreeSummarize synthesis\")\n",
- " return None\n",
- "\n",
- "# Test the function\n",
- "if index:\n",
- " tree_engine = create_query_engine_with_tree_summarize(index)\n",
- " \n",
- " if tree_engine:\n",
- " print(\"โ
Query engine with TreeSummarize created\")\n",
- " \n",
- " # Test with a complex analytical query\n",
- " analytical_query = \"Compare the advantages and disadvantages of different AI agent frameworks\"\n",
- " print(f\"\\n๐ Testing analytical query: '{analytical_query}'\")\n",
- " \n",
- " # Uncomment when implemented:\n",
- " response = tree_engine.query(analytical_query)\n",
- " print(f\"๐ TreeSummarize Response:\\n{response}\")\n",
- " print(\" (Complete the function above to test comprehensive analysis)\")\n",
- " else:\n",
- " print(\"โ Failed to create TreeSummarize query engine\")\n",
- "else:\n",
- " print(\"โ No index available - run previous cells first\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "id": "31cd4b0a",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-11-02 08:59:51,713 - INFO - query_type :, vector\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "โ
Structured output program created\n",
- "\n",
- "๐ Testing structured query: 'Tell me about AI agents and their capabilities'\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-11-02 08:59:52,408 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "๐ Structured Response:\n",
- "title='The Landscape of Emerging AI Agent Architectures for Reasoning, Planning, and Tool Calling: A Survey' key_points=['The best agent architecture varies based on use case, incorporating techniques like system prompts, task division, and feedback.', 'Single and multi-agent patterns are effective in solving complex tasks requiring reasoning and tool execution.', 'Dynamic team structures and intelligent message filtering enhance agent performance.', 'Current AI-driven agents face limitations in benchmarks, real-world applicability, and language model biases.', 'Future research should focus on improving agent evaluation and reliability.'] applications=['AI-driven agents for complex problem-solving tasks', 'Dynamic team structures in AI systems', 'Enhanced reasoning and planning in AI applications'] summary='This survey paper explores advancements in AI agent architectures, highlighting their capabilities in reasoning, planning, and tool execution. It discusses the effectiveness of single and multi-agent patterns and identifies key techniques for improving agent performance. The paper also addresses current limitations and suggests areas for future research.'\n",
- " (Complete the function above to get structured JSON output)\n",
- "\n",
- "๐ก Expected output format:\n",
- " - title: String\n",
- " - key_points: List of strings\n",
- " - applications: List of strings\n",
- " - summary: String\n"
- ]
- }
- ],
- "source": [
- "# First, define the Pydantic models for structured outputs \n",
- "class ResearchPaperInfo(BaseModel):\n",
- " \"\"\"Structured information about a research paper or AI concept.\"\"\"\n",
- " title: str = Field(description=\"The main title or concept name\")\n",
- " key_points: List[str] = Field(description=\"3-5 main points or findings\")\n",
- " applications: List[str] = Field(description=\"Practical applications or use cases\")\n",
- " summary: str = Field(description=\"Brief 2-3 sentence summary\")\n",
- "\n",
- "# Import the missing component\n",
- "from llama_index.core.program import LLMTextCompletionProgram\n",
- "\n",
- "def create_structured_output_program(output_model: BaseModel = ResearchPaperInfo):\n",
- " \"\"\"\n",
- " Create a structured output program using Pydantic models.\n",
- " \n",
- " TODO: Complete this function to create a structured output program.\n",
- " HINT: Use LLMTextCompletionProgram.from_defaults() with PydanticOutputParser and a prompt template\n",
- " \n",
- " Args:\n",
- " output_model: Pydantic model class for structured output\n",
- " \n",
- " Returns:\n",
- " LLMTextCompletionProgram that returns structured data\n",
- " \"\"\"\n",
- " # TODO: Create output parser with the Pydantic model\n",
- " output_parser = PydanticOutputParser(output_cls=output_model)\n",
- " #output_parser = PydanticOutputParser()\n",
- " \n",
- " # TODO: Create the structured output program\n",
- " program = LLMTextCompletionProgram.from_defaults(\n",
- " output_parser=output_parser,\n",
- " prompt_template_str=(\n",
- " \"Extract the following information from the context:\\n\"\n",
- " \"- title\\n\"\n",
- " \"- key_points\\n\"\n",
- " \"- applications\\n\"\n",
- " \"- summary\\n\\n\"\n",
- " \"Context:\\n{context}\\n\\n\"\n",
- " \"Provide the output in JSON format.\"\n",
- " )\n",
- " )\n",
- "\n",
- " return program\n",
- "\n",
- " # PLACEHOLDER - Replace with actual implementation\n",
- " print(f\"TODO: Create structured output program with {output_model.__name__}\")\n",
- " return None\n",
- "\n",
- "# Test the function\n",
- "if index:\n",
- " structured_program = create_structured_output_program(ResearchPaperInfo)\n",
- " \n",
- " if structured_program:\n",
- " print(\"โ
Structured output program created\")\n",
- " \n",
- " # Test with retrieval and structured extraction\n",
- " structure_query = \"Tell me about AI agents and their capabilities\"\n",
- " print(f\"\\n๐ Testing structured query: '{structure_query}'\")\n",
- " \n",
- " # Get context for structured extraction (Uncomment when implemented)\n",
- " retriever = VectorIndexRetriever(index=index, similarity_top_k=3)\n",
- " nodes = retriever.retrieve(structure_query)\n",
- " context = \"\\n\".join([node.text for node in nodes])\n",
- " \n",
- " # Uncomment when implemented:\n",
- " response = structured_program(context=context, query=structure_query)\n",
- " print(f\"๐ Structured Response:\\n{response}\")\n",
- " print(\" (Complete the function above to get structured JSON output)\")\n",
- " \n",
- " print(\"\\n๐ก Expected output format:\")\n",
- " print(\" - title: String\")\n",
- " print(\" - key_points: List of strings\")\n",
- " print(\" - applications: List of strings\") \n",
- " print(\" - summary: String\")\n",
- " else:\n",
- " print(\"โ Failed to create structured output program\")\n",
- "else:\n",
- " print(\"โ No index available - run previous cells first\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "id": "18ae040a",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-11-02 09:03:24,395 - INFO - query_type :, vector\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "โ
Advanced RAG pipeline created successfully!\n",
- " ๐ง Similarity filtering: โ
\n",
- " ๐ณ TreeSummarize synthesis: โ
\n",
- "\n",
- "๐ Testing complex query: 'Analyze the current state and future potential of AI agent technologies'\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-11-02 09:03:25,347 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "๐ Advanced RAG Response:\n",
- "The current state of AI agent technologies is characterized by advancements in reasoning, planning, and tool execution capabilities, enabling them to tackle complex, multi-step problems. Both single-agent and multi-agent architectures are being explored, with the choice of architecture depending on the specific use case. Effective agent systems often incorporate well-defined system prompts, clear leadership and task division, dedicated phases for reasoning, planning, execution, and evaluation, as well as dynamic team structures and feedback mechanisms.\n",
- "\n",
- "Despite these advancements, there are notable limitations that need addressing, such as the development of comprehensive benchmarks, ensuring real-world applicability, and mitigating biases inherent in language models. The future potential of AI agent technologies lies in overcoming these challenges, which will enable the creation of more reliable and effective autonomous agents. The progression from static language models to dynamic agents suggests a promising trajectory for AI applications, with ongoing research and development aimed at enhancing their capabilities and applicability.\n",
- " (Complete the function above to test the full pipeline)\n",
- "\n",
- "๐ฏ This should provide:\n",
- " - Filtered relevant results only\n",
- " - Comprehensive analytical response\n",
- " - Combined postprocessing and synthesis\n"
- ]
- }
- ],
- "source": [
- "def create_advanced_rag_pipeline(index, similarity_cutoff: float = 0.3, top_k: int = 10):\n",
- " \"\"\"\n",
- " Create a comprehensive advanced RAG pipeline combining multiple techniques.\n",
- " \n",
- " TODO: Complete this function to create the ultimate advanced RAG query engine.\n",
- " HINT: Combine SimilarityPostprocessor + TreeSummarize using index.as_query_engine()\n",
- " \n",
- " Args:\n",
- " index: Vector index to query\n",
- " similarity_cutoff: Minimum similarity score for filtering\n",
- " top_k: Number of initial results to retrieve\n",
- " \n",
- " Returns:\n",
- " Advanced query engine with filtering and synthesis combined\n",
- " \"\"\"\n",
- " # TODO: Create similarity postprocessor\n",
- " similarity_processor = SimilarityPostprocessor(cutoff=similarity_cutoff)\n",
- " \n",
- " # TODO: Create TreeSummarize for comprehensive responses\n",
- " tree_synthesizer = TreeSummarize()\n",
- " \n",
- " # TODO: Create the comprehensive query engine combining both techniques\n",
- " advanced_engine = index.as_query_engine(\n",
- " retriever_kwargs={\"similarity_top_k\": top_k},\n",
- " node_postprocessors=[similarity_processor],\n",
- " response_synthesizer=tree_synthesizer\n",
- " )\n",
- "\n",
- " return advanced_engine\n",
- "\n",
- " # PLACEHOLDER - Replace with actual implementation\n",
- " print(f\"TODO: Create advanced RAG pipeline with all techniques\")\n",
- " return None\n",
- "\n",
- "# Test the comprehensive pipeline\n",
- "if index:\n",
- " advanced_pipeline = create_advanced_rag_pipeline(index)\n",
- " \n",
- " if advanced_pipeline:\n",
- " print(\"โ
Advanced RAG pipeline created successfully!\")\n",
- " print(\" ๐ง Similarity filtering: โ
\")\n",
- " print(\" ๐ณ TreeSummarize synthesis: โ
\")\n",
- " \n",
- " # Test with complex query\n",
- " complex_query = \"Analyze the current state and future potential of AI agent technologies\"\n",
- " print(f\"\\n๐ Testing complex query: '{complex_query}'\")\n",
- " \n",
- " # Uncomment when implemented:\n",
- " response = advanced_pipeline.query(complex_query)\n",
- " print(f\"๐ Advanced RAG Response:\\n{response}\")\n",
- " print(\" (Complete the function above to test the full pipeline)\")\n",
- " \n",
- " print(\"\\n๐ฏ This should provide:\")\n",
- " print(\" - Filtered relevant results only\")\n",
- " print(\" - Comprehensive analytical response\")\n",
- " print(\" - Combined postprocessing and synthesis\")\n",
- " else:\n",
- " print(\"โ Failed to create advanced RAG pipeline\")\n",
- "else:\n",
- " print(\"โ No index available - run previous cells first\")\n"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "env_v1",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.13.7"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/nitesh_sharma/20251102/nitesh_assignment_3a_basic_gradio_rag.ipynb b/nitesh_sharma/20251102/nitesh_assignment_3a_basic_gradio_rag.ipynb
deleted file mode 100644
index dbd24a5..0000000
--- a/nitesh_sharma/20251102/nitesh_assignment_3a_basic_gradio_rag.ipynb
+++ /dev/null
@@ -1,444 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Assignment 3a: Basic Gradio RAG Frontend\n",
- "## Day 6 Session 2 - Building Simple RAG Applications\n",
- "\n",
- "In this assignment, you'll build a simple Gradio frontend for your RAG system with just the essential features:\n",
- "- Button to initialize the vector database\n",
- "- Search query input and button\n",
- "- Display of AI responses\n",
- "\n",
- "**Learning Objectives:**\n",
- "- Create basic Gradio interfaces\n",
- "- Connect RAG backend to frontend\n",
- "- Handle user interactions and database initialization\n",
- "- Build functional AI-powered web applications\n",
- "\n",
- "**Prerequisites:**\n",
- "- Completed Assignment 1 (Vector Database Basics)\n",
- "- Completed Assignment 2 (Advanced RAG)\n",
- "- Understanding of LlamaIndex fundamentals\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## ๐ Part 1: Setup and Imports\n",
- "\n",
- "Import all necessary libraries for building your Gradio RAG application.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\env_v1\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
- " from .autonotebook import tqdm as notebook_tqdm\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "โ
All libraries imported successfully!\n"
- ]
- }
- ],
- "source": [
- "# Import required libraries\n",
- "import gradio as gr\n",
- "import os\n",
- "from pathlib import Path\n",
- "\n",
- "# LlamaIndex components\n",
- "from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, Settings\n",
- "from llama_index.vector_stores.lancedb import LanceDBVectorStore\n",
- "from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n",
- "from llama_index.llms.openrouter import OpenRouter\n",
- "\n",
- "print(\"โ
All libraries imported successfully!\")\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## ๐ค Part 2: RAG Backend Class\n",
- "\n",
- "Create a simple RAG backend that can initialize the database and answer queries.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-11-02 10:03:50,642 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5\n",
- "2025-11-02 10:03:52,163 - INFO - 1 prompt is loaded, with the key: query\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "๐ RAG Backend initialized and ready!\n"
- ]
- }
- ],
- "source": [
- "import streamlit as st\n",
- "class SimpleRAGBackend:\n",
- " \"\"\"Simple RAG backend for Gradio frontend.\"\"\"\n",
- " \n",
- " def __init__(self):\n",
- " self.index = None\n",
- " self.setup_settings()\n",
- " def setup_settings(self):\n",
- " \"\"\"Configure LlamaIndex settings.\"\"\"\n",
- " # Set up the LLM using OpenRouter\n",
- " api_key = os.getenv(\"OPENROUTER_API_KEY\") or st.secrets.get(\"OPENROUTER_API_KEY\")\n",
- " #api_key =\"sk-or-v1...\"\n",
- " if api_key:\n",
- " Settings.llm = OpenRouter(\n",
- " api_key=api_key,\n",
- " model=\"gpt-4o\",\n",
- " temperature=0.1\n",
- " )\n",
- " \n",
- " # Set up the embedding model\n",
- " Settings.embed_model = HuggingFaceEmbedding(\n",
- " model_name=\"BAAI/bge-small-en-v1.5\",\n",
- " trust_remote_code=True\n",
- " )\n",
- " \n",
- " # Set chunking parameters\n",
- " Settings.chunk_size = 512\n",
- " Settings.chunk_overlap = 50\n",
- " \n",
- " def initialize_database(self, data_folder=\"../data\"):\n",
- " \"\"\"Initialize the vector database with documents.\"\"\"\n",
- " # Check if data folder exists\n",
- " if not Path(data_folder).exists():\n",
- " return f\"โ Data folder '{data_folder}' not found!\"\n",
- " \n",
- " try:\n",
- " # Create vector store\n",
- " vector_store = LanceDBVectorStore(\n",
- " uri=\"./basic_rag_vectordb\",\n",
- " table_name=\"documents\"\n",
- " )\n",
- " \n",
- " # Load documents\n",
- " reader = SimpleDirectoryReader(input_dir=data_folder, recursive=True)\n",
- " documents = reader.load_data()\n",
- " \n",
- " # Create storage context and index\n",
- " storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
- " self.index = VectorStoreIndex.from_documents(\n",
- " documents, \n",
- " storage_context=storage_context,\n",
- " show_progress=True\n",
- " )\n",
- " \n",
- " return f\"โ
Database initialized successfully with {len(documents)} documents!\"\n",
- " \n",
- " except Exception as e:\n",
- " return f\"โ Error initializing database: {str(e)}\"\n",
- " \n",
- " def query(self, question):\n",
- " \"\"\"Query the RAG system and return response.\"\"\"\n",
- " # Check if index exists\n",
- " if self.index is None:\n",
- " return \"โ Please initialize the database first!\"\n",
- " \n",
- " # Check if question is empty\n",
- " if not question or not question.strip():\n",
- " return \"โ ๏ธ Please enter a question first!\"\n",
- " \n",
- " try:\n",
- " # Create query engine and get response\n",
- " query_engine = self.index.as_query_engine()\n",
- " response = query_engine.query(question)\n",
- " return str(response)\n",
- " \n",
- " except Exception as e:\n",
- " return f\"โ Error processing query: {str(e)}\"\n",
- "\n",
- "# Initialize the backend\n",
- "rag_backend = SimpleRAGBackend()\n",
- "print(\"๐ RAG Backend initialized and ready!\")\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## ๐จ Part 3: Gradio Interface\n",
- "\n",
- "Create a simple Gradio interface with:\n",
- "1. Button to initialize the database\n",
- "2. Text input for queries\n",
- "3. Button to submit queries\n",
- "4. Text output for responses\n",
- "5. Text output for status messages\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "โ
Basic RAG interface created successfully!\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-11-02 10:04:02,285 - INFO - HTTP Request: GET https://api.gradio.app/pkg-version \"HTTP/1.1 200 OK\"\n"
- ]
- }
- ],
- "source": [
- "def create_basic_rag_interface():\n",
- " \"\"\"Create basic RAG interface with essential features.\"\"\"\n",
- " \n",
- " def initialize_db():\n",
- " \"\"\"Handle database initialization.\"\"\"\n",
- " return rag_backend.initialize_database()\n",
- " \n",
- " def handle_query(question):\n",
- " \"\"\"Handle user queries.\"\"\"\n",
- " return rag_backend.query(question)\n",
- " \n",
- " # TODO: Create Gradio interface using gr.Blocks()\n",
- " # Hint: Look at the structure below and fill in the missing components\n",
- " \n",
- " with gr.Blocks(title=\"Basic RAG Assistant\") as interface:\n",
- " # TODO: Add title and description\n",
- " # Hint: Use gr.Markdown() for formatted text\n",
- " gr.Markdown(\"# Basic RAG Assistant\\nAsk questions based on the indexed documents.\")\n",
- " \n",
- " # TODO: Add initialization section\n",
- " # Hint: You need to use gr.Button to initialize the database\n",
- " init_btn = gr.Button(\"Initialize Database\")\n",
- " \n",
- " # TODO: Add status output\n",
- " # Hint: You need to use gr.Textbox to display the status\n",
- " status_output = gr.Textbox(label=\"Status\", interactive=False)\n",
- "\n",
- " # The connection between the button and the status output has already been implemented\n",
- " # at the end of this function\n",
- "\n",
- " status_output = status_output\n",
- " # TODO: Add query section\n",
- " # Hint: You need a text input, submit button, and response output\n",
- "\n",
- " # Use gr.Textbox to create a text input\n",
- " query_input = gr.Textbox(label=\"Your Question\", placeholder=\"Ask a question about the documents...\")\n",
- "\n",
- " # Use gr.Button to create a submit button\n",
- " submit_btn = gr.Button(\"Submit\")\n",
- "\n",
- " # Use gr.Textbox to create a response output\n",
- " response_output = gr.Textbox(label=\"Response\", interactive=True, lines=10)\n",
- "\n",
- " # Connect buttons to functions\n",
- " # Uncomment when above is implemented\n",
- " init_btn.click(initialize_db, outputs=[status_output])\n",
- " submit_btn.click(handle_query, inputs=[query_input], outputs=[response_output])\n",
- "\n",
- " return interface\n",
- "\n",
- "# Create the interface\n",
- "basic_interface = create_basic_rag_interface()\n",
- "print(\"โ
Basic RAG interface created successfully!\")\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## ๐ Part 4: Launch Your Application\n",
- "\n",
- "Launch your Gradio application and test it!\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "๐ Launching your Basic RAG Assistant...\n",
- "๐ Your application will open in a new browser tab!\n",
- "\n",
- "๐ Testing Instructions:\n",
- "1. Click 'Initialize Database' button first\n",
- "2. Wait for success message\n",
- "3. Enter a question in the query box\n",
- "4. Click 'Ask Question' to get AI response\n",
- "\n",
- "๐ก Example questions to try:\n",
- "- What are the main topics in the documents?\n",
- "- Summarize the key findings\n",
- "- Explain the methodology used\n",
- "\n",
- "๐ Launch your app:\n",
- "* Running on local URL: http://127.0.0.1:7863\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-11-02 10:04:06,313 - INFO - HTTP Request: GET http://127.0.0.1:7863/gradio_api/startup-events \"HTTP/1.1 200 OK\"\n",
- "2025-11-02 10:04:06,326 - INFO - HTTP Request: HEAD http://127.0.0.1:7863/ \"HTTP/1.1 200 OK\"\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "* To create a public link, set `share=True` in `launch()`.\n"
- ]
- },
- {
- "data": {
- "text/html": [
- ""
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": []
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Failed to load file c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\src\\20251102\\session_2\\assignments\\..\\data\\audio\\ai_agents.mp3 with error: [WinError 2] The system cannot find the file specified. Skipping...\n",
- "Failed to load file c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\src\\20251102\\session_2\\assignments\\..\\data\\audio\\in_the_end.mp3 with error: [WinError 2] The system cannot find the file specified. Skipping...\n",
- "Failed to load file c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\src\\20251102\\session_2\\assignments\\..\\data\\audio\\rags.mp3 with error: [WinError 2] The system cannot find the file specified. Skipping...\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\env_v1\\Lib\\site-packages\\whisper\\transcribe.py:132: UserWarning: FP16 is not supported on CPU; using FP32 instead\n",
- " warnings.warn(\"FP16 is not supported on CPU; using FP32 instead\")\n",
- "Parsing nodes: 100%|โโโโโโโโโโ| 39/39 [00:00<00:00, 505.61it/s]\n",
- "Generating embeddings: 100%|โโโโโโโโโโ| 92/92 [00:12<00:00, 7.36it/s]\n",
- "2025-11-02 10:04:34,688 - INFO - query_type :, vector\n",
- "2025-11-02 10:04:36,053 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
- ]
- }
- ],
- "source": [
- "print(\"๐ Launching your Basic RAG Assistant...\")\n",
- "print(\"๐ Your application will open in a new browser tab!\")\n",
- "print(\"\")\n",
- "print(\"๐ Testing Instructions:\")\n",
- "print(\"1. Click 'Initialize Database' button first\")\n",
- "print(\"2. Wait for success message\")\n",
- "print(\"3. Enter a question in the query box\")\n",
- "print(\"4. Click 'Ask Question' to get AI response\")\n",
- "print(\"\")\n",
- "print(\"๐ก Example questions to try:\")\n",
- "print(\"- What are the main topics in the documents?\")\n",
- "print(\"- Summarize the key findings\")\n",
- "print(\"- Explain the methodology used\")\n",
- "print(\"\")\n",
- "print(\"๐ Launch your app:\")\n",
- "\n",
- "# Your launch code here:\n",
- "# Uncomment when implemented\n",
- "basic_interface.launch()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## โ
Assignment Completion Checklist\n",
- "\n",
- "Before submitting, ensure you have:\n",
- "\n",
- "- [x] RAG backend is provided and working\n",
- "- [ ] Created Gradio interface with required components:\n",
- " - [ ] Title and description using gr.Markdown()\n",
- " - [ ] Initialize database button using gr.Button()\n",
- " - [ ] Status output using gr.Textbox()\n",
- " - [ ] Query input field using gr.Textbox()\n",
- " - [ ] Submit query button using gr.Button()\n",
- " - [ ] Response output area using gr.Textbox()\n",
- "- [ ] Connected buttons to backend functions using .click()\n",
- "- [ ] Successfully launched the application\n",
- "- [ ] Tested the full workflow (initialize โ query โ response)\n",
- "\n",
- "## ๐ Congratulations!\n",
- "\n",
- "You've successfully built your first Gradio RAG application! You now have:\n",
- "\n",
- "- A functional web interface for your RAG system\n",
- "- Understanding of Gradio basics and component connections\n",
- "- A foundation for building more complex AI applications\n",
- "\n",
- "**Next Steps**: Complete Assignment 3b to add advanced configuration options to your RAG interface!\n"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "env_v1",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.13.7"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/nitesh_sharma/20251102/nitesh_assignment_3b_advanced_gradio_rag.ipynb b/nitesh_sharma/20251102/nitesh_assignment_3b_advanced_gradio_rag.ipynb
deleted file mode 100644
index 8c2e2b3..0000000
--- a/nitesh_sharma/20251102/nitesh_assignment_3b_advanced_gradio_rag.ipynb
+++ /dev/null
@@ -1,704 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Assignment 3b: Advanced Gradio RAG Frontend\n",
- "## Day 6 Session 2 - Building Configurable RAG Applications\n",
- "\n",
- "In this assignment, you'll extend your basic RAG interface with advanced configuration options to create a professional, feature-rich RAG application.\n",
- "\n",
- "**New Features to Add:**\n",
- "- Model selection dropdown (gpt-4o, gpt-4o-mini)\n",
- "- Temperature slider (0 to 1 with 0.1 intervals)\n",
- "- Chunk size configuration\n",
- "- Chunk overlap configuration \n",
- "- Similarity top-k slider\n",
- "- Node postprocessor multiselect\n",
- "- Similarity cutoff slider\n",
- "- Response synthesizer multiselect\n",
- "\n",
- "**Learning Objectives:**\n",
- "- Advanced Gradio components and interactions\n",
- "- Dynamic RAG configuration\n",
- "- Professional UI design patterns\n",
- "- Parameter validation and handling\n",
- "- Building production-ready AI applications\n",
- "\n",
- "**Prerequisites:**\n",
- "- Completed Assignment 3a (Basic Gradio RAG)\n",
- "- Understanding of RAG parameters and their effects\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## ๐ Part 1: Setup and Imports\n",
- "\n",
- "Import all necessary libraries including advanced RAG components for configuration options.\n",
- "\n",
- "**Note:** This assignment uses OpenRouter for LLM access (not OpenAI). Make sure you have your `OPENROUTER_API_KEY` environment variable set.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\env_v1\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
- " from .autonotebook import tqdm as notebook_tqdm\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "โ
All libraries imported successfully!\n"
- ]
- }
- ],
- "source": [
- "# Import all required libraries\n",
- "import gradio as gr\n",
- "import os\n",
- "from pathlib import Path\n",
- "from typing import Dict, List, Optional, Any\n",
- "\n",
- "# LlamaIndex core components\n",
- "from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, Settings\n",
- "from llama_index.vector_stores.lancedb import LanceDBVectorStore\n",
- "from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n",
- "from llama_index.llms.openrouter import OpenRouter\n",
- "\n",
- "# Advanced RAG components\n",
- "from llama_index.core.postprocessor import SimilarityPostprocessor\n",
- "from llama_index.core.response_synthesizers import TreeSummarize, Refine, CompactAndRefine\n",
- "from llama_index.core.retrievers import VectorIndexRetriever\n",
- "\n",
- "print(\"โ
All libraries imported successfully!\")\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## ๐ค Part 2: Advanced RAG Backend Class\n",
- "\n",
- "Create an advanced RAG backend that supports dynamic configuration of all parameters.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "๐ Advanced RAG Backend initialized and ready!\n"
- ]
- }
- ],
- "source": [
- "import streamlit as st\n",
- "class AdvancedRAGBackend:\n",
- " \"\"\"Advanced RAG backend with configurable parameters.\"\"\"\n",
- " \n",
- " def __init__(self):\n",
- " self.index = None\n",
- " self.available_models = [\"gpt-4o\", \"gpt-4o-mini\"]\n",
- " self.available_postprocessors = [\"SimilarityPostprocessor\"]\n",
- " self.available_synthesizers = [\"TreeSummarize\", \"Refine\", \"CompactAndRefine\", \"Default\"]\n",
- " self.update_settings()\n",
- " \n",
- " def update_settings(self, model: str = \"gpt-4o-mini\", temperature: float = 0.1, chunk_size: int = 512, chunk_overlap: int = 50):\n",
- " \"\"\"Update LlamaIndex settings based on user configuration.\"\"\"\n",
- " # Set up the LLM using OpenRouter\n",
- " api_key = os.getenv(\"OPENROUTER_API_KEY\") or st.secrets.get(\"OPENROUTER_API_KEY\")\n",
- " if api_key:\n",
- " Settings.llm = OpenRouter(\n",
- " api_key=api_key,\n",
- " model=model,\n",
- " temperature=temperature\n",
- " )\n",
- " \n",
- " # Set up the embedding model (keep this constant)\n",
- " Settings.embed_model = HuggingFaceEmbedding(\n",
- " model_name=\"BAAI/bge-small-en-v1.5\",\n",
- " trust_remote_code=True\n",
- " )\n",
- " \n",
- " # Set chunking parameters from function parameters\n",
- " Settings.chunk_size = chunk_size\n",
- " Settings.chunk_overlap = chunk_overlap\n",
- " \n",
- " def initialize_database(self, data_folder=\"../data\"):\n",
- " \"\"\"Initialize the vector database with documents.\"\"\"\n",
- " # Check if data folder exists\n",
- " if not Path(data_folder).exists():\n",
- " return f\"โ Data folder '{data_folder}' not found!\"\n",
- " \n",
- " try:\n",
- " # Create vector store\n",
- " vector_store = LanceDBVectorStore(\n",
- " uri=\"./advanced_rag_vectordb\",\n",
- " table_name=\"documents\"\n",
- " )\n",
- " \n",
- " # Load documents\n",
- " reader = SimpleDirectoryReader(input_dir=data_folder, recursive=True)\n",
- " documents = reader.load_data()\n",
- " \n",
- " # Create storage context and index\n",
- " storage_context = StorageContext.from_defaults(vector_store=vector_store)\n",
- " self.index = VectorStoreIndex.from_documents(\n",
- " documents, \n",
- " storage_context=storage_context,\n",
- " show_progress=True\n",
- " )\n",
- " \n",
- " return f\"โ
Database initialized successfully with {len(documents)} documents!\"\n",
- " \n",
- " except Exception as e:\n",
- " return f\"โ Error initializing database: {str(e)}\"\n",
- " \n",
- " def get_postprocessor(self, postprocessor_name: str, similarity_cutoff: float):\n",
- " \"\"\"Get the selected postprocessor.\"\"\"\n",
- " if postprocessor_name == \"SimilarityPostprocessor\":\n",
- " return SimilarityPostprocessor(similarity_cutoff=similarity_cutoff)\n",
- " elif postprocessor_name == \"None\":\n",
- " return None\n",
- " else:\n",
- " return None\n",
- " \n",
- " def get_synthesizer(self, synthesizer_name: str):\n",
- " \"\"\"Get the selected response synthesizer.\"\"\"\n",
- " if synthesizer_name == \"TreeSummarize\":\n",
- " return TreeSummarize()\n",
- " elif synthesizer_name == \"Refine\":\n",
- " return Refine()\n",
- " elif synthesizer_name == \"CompactAndRefine\":\n",
- " return CompactAndRefine()\n",
- " elif synthesizer_name == \"Default\":\n",
- " return None\n",
- " else:\n",
- " return None\n",
- " \n",
- " def advanced_query(self, question: str, model: str, temperature: float, \n",
- " chunk_size: int, chunk_overlap: int, similarity_top_k: int,\n",
- " postprocessor_names: List[str], similarity_cutoff: float,\n",
- " synthesizer_name: str) -> Dict[str, Any]:\n",
- " \"\"\"Query the RAG system with advanced configuration.\"\"\"\n",
- " \n",
- " # Check if index exists\n",
- " if self.index is None:\n",
- " return {\"response\": \"โ Please initialize the database first!\", \"sources\": [], \"config\": {}}\n",
- " \n",
- " # Check if question is empty\n",
- " if not question or not question.strip():\n",
- " return {\"response\": \"โ ๏ธ Please enter a question first!\", \"sources\": [], \"config\": {}}\n",
- " \n",
- " try:\n",
- " # Update settings with new parameters\n",
- " self.update_settings(model, temperature, chunk_size, chunk_overlap)\n",
- " \n",
- " # Get postprocessors\n",
- " postprocessors = []\n",
- " for name in postprocessor_names:\n",
- " processor = self.get_postprocessor(name, similarity_cutoff)\n",
- " if processor is not None:\n",
- " postprocessors.append(processor)\n",
- " \n",
- " # Get synthesizer\n",
- " synthesizer = self.get_synthesizer(synthesizer_name)\n",
- " \n",
- " # Create query engine with all parameters\n",
- " query_engine_kwargs = {\"similarity_top_k\": similarity_top_k}\n",
- " if postprocessors:\n",
- " query_engine_kwargs[\"node_postprocessors\"] = postprocessors\n",
- " if synthesizer is not None:\n",
- " query_engine_kwargs[\"response_synthesizer\"] = synthesizer\n",
- " \n",
- " query_engine = self.index.as_query_engine(**query_engine_kwargs)\n",
- " \n",
- " # Query and get response\n",
- " response = query_engine.query(question)\n",
- " \n",
- " # Extract source information if available\n",
- " sources = []\n",
- " if hasattr(response, 'source_nodes'):\n",
- " for node in response.source_nodes:\n",
- " sources.append({\n",
- " \"text\": node.text[:200] + \"...\",\n",
- " \"score\": getattr(node, 'score', 0.0),\n",
- " \"source\": getattr(node.node, 'metadata', {}).get('file_name', 'Unknown')\n",
- " })\n",
- " \n",
- " return {\n",
- " \"response\": str(response),\n",
- " \"sources\": sources,\n",
- " \"config\": {\n",
- " \"model\": model,\n",
- " \"temperature\": temperature,\n",
- " \"chunk_size\": chunk_size,\n",
- " \"chunk_overlap\": chunk_overlap,\n",
- " \"similarity_top_k\": similarity_top_k,\n",
- " \"postprocessors\": postprocessor_names,\n",
- " \"similarity_cutoff\": similarity_cutoff,\n",
- " \"synthesizer\": synthesizer_name\n",
- " }\n",
- " }\n",
- " \n",
- " except Exception as e:\n",
- " return {\"response\": f\"โ Error processing query: {str(e)}\", \"sources\": [], \"config\": {}}\n",
- "\n",
- "# Initialize the backend\n",
- "rag_backend = AdvancedRAGBackend()\n",
- "print(\"๐ Advanced RAG Backend initialized and ready!\")\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## ๐จ Part 3: Advanced Gradio Interface\n",
- "\n",
- "Create a sophisticated Gradio interface with all the configuration options specified:\n",
- "1. Database initialization button\n",
- "2. Search query input and button \n",
- "3. Model selection dropdown\n",
- "4. Temperature slider\n",
- "5. Chunk size and overlap inputs\n",
- "6. Similarity top-k slider\n",
- "7. Node postprocessor multiselect\n",
- "8. Similarity cutoff slider\n",
- "9. Response synthesizer multiselect\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "โ
Advanced RAG interface created successfully!\n"
- ]
- }
- ],
- "source": [
- "def create_advanced_rag_interface():\n",
- " \"\"\"Create advanced RAG interface with full configuration options.\"\"\"\n",
- " \n",
- " def initialize_db():\n",
- " \"\"\"Handle database initialization.\"\"\"\n",
- " return rag_backend.initialize_database()\n",
- " \n",
- " def handle_advanced_query(question, model, temperature, chunk_size, chunk_overlap, \n",
- " similarity_top_k, postprocessors, similarity_cutoff, synthesizer):\n",
- " \"\"\"Handle advanced RAG queries with all configuration options.\"\"\"\n",
- " result = rag_backend.advanced_query(\n",
- " question, model, temperature, chunk_size, chunk_overlap,\n",
- " similarity_top_k, postprocessors, similarity_cutoff, synthesizer\n",
- " )\n",
- " \n",
- " # Format configuration for display\n",
- " config_text = f\"\"\"**Current Configuration:**\n",
- "- Model: {result['config'].get('model', 'N/A')}\n",
- "- Temperature: {result['config'].get('temperature', 'N/A')}\n",
- "- Chunk Size: {result['config'].get('chunk_size', 'N/A')}\n",
- "- Chunk Overlap: {result['config'].get('chunk_overlap', 'N/A')}\n",
- "- Similarity Top-K: {result['config'].get('similarity_top_k', 'N/A')}\n",
- "- Postprocessors: {', '.join(result['config'].get('postprocessors', []))}\n",
- "- Similarity Cutoff: {result['config'].get('similarity_cutoff', 'N/A')}\n",
- "- Synthesizer: {result['config'].get('synthesizer', 'N/A')}\"\"\"\n",
- " \n",
- " return result[\"response\"], config_text\n",
- " \n",
- " # TODO: Create the advanced interface structure\n",
- " # Hint: This interface needs more complex layout with configuration controls\n",
- " \n",
- " with gr.Blocks(title=\"Advanced RAG Assistant\") as interface:\n",
- " # TODO: Add title and description\n",
- " # Hint: Use gr.Markdown() for formatted text\n",
- " \n",
- " # Your title and description here:\n",
- " \n",
- " \n",
- " # TODO: Add database initialization section\n",
- " # Hint: Use gr.Button() for initialization and gr.Textbox() for status\n",
- " init_btn = gr.Button(\"Initialize Database\")\n",
- " status_output = gr.Textbox(label=\"Initialization Status\", interactive=False)\n",
- "\n",
- " # TODO: Create main layout with columns\n",
- " # Hint: Configuration controls on left, query/response on right makes sense\n",
- " # Use gr.Row() and gr.Column() to organize this\n",
- " \n",
- " with gr.Row():\n",
- " with gr.Column(scale=1):\n",
- " \n",
- " gr.Markdown(\"### โ๏ธ RAG Configuration\")\n",
- " \n",
- " # TODO: Model selection\n",
- " # Hint: Use gr.Dropdown() with choices=[\"gpt-4o\", \"gpt-4o-mini\"]\n",
- " model_dropdown = gr.Dropdown(choices=[\"gpt-4o\", \"gpt-4o-mini\"], label=\"Select Model\")\n",
- " \n",
- " \n",
- " # TODO: Temperature control \n",
- " # Hint: Use gr.Slider() with minimum=0.0, maximum=1.0, step=0.1, value=0.1\n",
- " temperature_slider = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=0.1, label=\"Temperature\")\n",
- " \n",
- " \n",
- " # TODO: Chunking parameters\n",
- " # Hint: Use gr.Number() for numeric inputs with default values\n",
- " chunk_size_input = gr.Number(value=512, label=\"Chunk Size\")\n",
- "\n",
- " chunk_overlap_input = gr.Number(value=50, label=\"Chunk Overlap\")\n",
- "\n",
- " # TODO: Retrieval parameters\n",
- " # Hint: Use gr.Slider() with minimum=1, maximum=20, step=1, value=5\n",
- " similarity_topk_slider = gr.Slider(minimum=1, maximum=20, step=1, value=5, label=\"Similarity Top-K\")\n",
- "\n",
- "\n",
- " # TODO: Postprocessor selection\n",
- " # Hint: Use gr.CheckboxGroup() with choices=[\"SimilarityPostprocessor\"]\n",
- " postprocessor_checkbox = gr.CheckboxGroup(choices=[\"SimilarityPostprocessor\"], label=\"Select Postprocessors\")\n",
- "\n",
- " # TODO: Similarity filtering\n",
- " # Hint: Use gr.Slider() with minimum=0.0, maximum=1.0, step=0.1, value=0.3\n",
- " similarity_cutoff_slider = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=0.3, label=\"Similarity Cutoff\")\n",
- "\n",
- "\n",
- " # TODO: Response synthesizer\n",
- " # Hint: Use gr.Dropdown() with choices=[\"TreeSummarize\", \"Refine\", \"CompactAndRefine\", \"Default\"]\n",
- " synthesizer_dropdown = gr.Dropdown(choices=[\"TreeSummarize\", \"Refine\", \"CompactAndRefine\", \"Default\"], label=\"Select Response Synthesizer\")\n",
- "\n",
- " with gr.Column(scale=2):\n",
- " gr.Markdown(\"### ๐ฌ Query Interface\")\n",
- " \n",
- " # TODO: Query input\n",
- " # Hint: Use gr.Textbox() with label=\"Ask a question\", placeholder text, lines=3\n",
- " query_input = gr.Textbox(label=\"Ask a question\", placeholder=\"Type your question here...\", lines=3)\n",
- " \n",
- " \n",
- " # TODO: Submit button\n",
- " # Hint: Use gr.Button() with variant=\"primary\"\n",
- " submit_btn = gr.Button(variant=\"primary\", value=\"Submit\")\n",
- " \n",
- " \n",
- " # TODO: Response output\n",
- " # Hint: Use gr.Textbox() with lines=12, interactive=False\n",
- " response_output = gr.Textbox(lines=12, interactive=False)\n",
- " \n",
- " \n",
- " # TODO: Configuration display\n",
- " # Hint: Use gr.Textbox() with lines=8, interactive=False\n",
- " config_display = gr.Textbox(lines=8, interactive=False)\n",
- " \n",
- " \n",
- " # Uncomment to Connect functions to components\n",
- " init_btn.click(initialize_db, outputs=[status_output])\n",
- " \n",
- " submit_btn.click(\n",
- " handle_advanced_query,\n",
- " inputs=[\n",
- " query_input, model_dropdown, temperature_slider,\n",
- " chunk_size_input, chunk_overlap_input, similarity_topk_slider,\n",
- " postprocessor_checkbox, similarity_cutoff_slider, synthesizer_dropdown\n",
- " ],\n",
- " outputs=[response_output, config_display]\n",
- " )\n",
- "\n",
- " return interface\n",
- "\n",
- "# Create the interface\n",
- "advanced_interface = create_advanced_rag_interface()\n",
- "print(\"โ
Advanced RAG interface created successfully!\")\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## ๐ Part 4: Launch Your Advanced Application\n",
- "\n",
- "Launch your advanced Gradio application and test all the configuration options!\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "๐ Launching your Advanced RAG Assistant...\n",
- "๐ Your application will open in a new browser tab!\n",
- "\n",
- "โ ๏ธ Make sure your OPENROUTER_API_KEY environment variable is set!\n",
- "\n",
- "๐ Testing Instructions:\n",
- "1. Click 'Initialize Vector Database' button first\n",
- "2. Wait for success message\n",
- "3. Configure your RAG parameters:\n",
- " - Choose model (gpt-4o, gpt-4o-mini)\n",
- " - Adjust temperature (0.0 = deterministic, 1.0 = creative)\n",
- " - Set chunk size and overlap\n",
- " - Choose similarity top-k\n",
- " - Select postprocessors and synthesizer\n",
- "4. Enter a question and click 'Ask Question'\n",
- "5. Review both the response and configuration used\n",
- "\n",
- "๐งช Experiments to try:\n",
- "- Compare different models with the same question\n",
- "- Test temperature effects (0.1 vs 0.9)\n",
- "- Try different chunk sizes (256 vs 1024)\n",
- "- Compare synthesizers (TreeSummarize vs Refine)\n",
- "- Adjust similarity cutoff to filter results\n",
- "* Running on local URL: http://127.0.0.1:7860\n",
- "* To create a public link, set `share=True` in `launch()`.\n"
- ]
- },
- {
- "data": {
- "text/html": [
- ""
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": []
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Failed to load file c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\src\\20251102\\session_2\\assignments\\..\\data\\audio\\ai_agents.mp3 with error: [WinError 2] The system cannot find the file specified. Skipping...\n",
- "Failed to load file c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\src\\20251102\\session_2\\assignments\\..\\data\\audio\\in_the_end.mp3 with error: [WinError 2] The system cannot find the file specified. Skipping...\n",
- "Failed to load file c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\src\\20251102\\session_2\\assignments\\..\\data\\audio\\rags.mp3 with error: [WinError 2] The system cannot find the file specified. Skipping...\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "c:\\Users\\NSHAR\\OneDrive - paramanands limited\\AI_projects\\env_v1\\Lib\\site-packages\\whisper\\transcribe.py:132: UserWarning: FP16 is not supported on CPU; using FP32 instead\n",
- " warnings.warn(\"FP16 is not supported on CPU; using FP32 instead\")\n",
- "Parsing nodes: 100%|โโโโโโโโโโ| 39/39 [00:04<00:00, 8.51it/s]\n",
- "Generating embeddings: 100%|โโโโโโโโโโ| 92/92 [00:12<00:00, 7.49it/s]\n",
- "2025-11-02 10:28:32,764 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5\n",
- "2025-11-02 10:28:34,687 - INFO - 1 prompt is loaded, with the key: query\n",
- "2025-11-02 10:28:34,811 - INFO - query_type :, vector\n",
- "2025-11-02 10:28:37,293 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
- "2025-11-02 10:28:51,013 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5\n",
- "2025-11-02 10:28:52,683 - INFO - 1 prompt is loaded, with the key: query\n",
- "2025-11-02 10:28:52,754 - INFO - query_type :, vector\n",
- "2025-11-02 10:28:53,642 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
- "2025-11-02 10:29:08,033 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5\n",
- "2025-11-02 10:29:09,556 - INFO - 1 prompt is loaded, with the key: query\n",
- "2025-11-02 10:29:09,625 - INFO - query_type :, vector\n",
- "2025-11-02 10:29:10,572 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
- "2025-11-02 10:29:35,613 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5\n",
- "2025-11-02 10:29:37,103 - INFO - 1 prompt is loaded, with the key: query\n",
- "2025-11-02 10:29:37,161 - INFO - query_type :, vector\n",
- "2025-11-02 10:29:37,931 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
- "2025-11-02 10:29:41,276 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
- "2025-11-02 10:29:43,651 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
- "2025-11-02 10:29:54,069 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5\n",
- "2025-11-02 10:29:55,585 - INFO - 1 prompt is loaded, with the key: query\n",
- "2025-11-02 10:29:55,651 - INFO - query_type :, vector\n",
- "2025-11-02 10:29:56,488 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
- "2025-11-02 10:30:04,388 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5\n",
- "2025-11-02 10:30:06,037 - INFO - 1 prompt is loaded, with the key: query\n",
- "2025-11-02 10:30:06,079 - INFO - query_type :, vector\n",
- "2025-11-02 10:30:06,824 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
- "2025-11-02 10:30:16,073 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5\n",
- "2025-11-02 10:30:17,534 - INFO - 1 prompt is loaded, with the key: query\n",
- "2025-11-02 10:30:17,586 - INFO - query_type :, vector\n",
- "2025-11-02 10:30:18,655 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
- "2025-11-02 10:30:35,055 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5\n",
- "2025-11-02 10:30:36,521 - INFO - 1 prompt is loaded, with the key: query\n",
- "2025-11-02 10:30:36,566 - INFO - query_type :, vector\n",
- "2025-11-02 10:30:37,574 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
- "2025-11-02 10:30:56,738 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5\n",
- "2025-11-02 10:30:58,271 - INFO - 1 prompt is loaded, with the key: query\n",
- "2025-11-02 10:30:58,327 - INFO - query_type :, vector\n",
- "2025-11-02 10:30:59,590 - INFO - HTTP Request: POST https://openrouter.ai/api/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
- ]
- }
- ],
- "source": [
- "print(\"๐ Launching your Advanced RAG Assistant...\")\n",
- "print(\"๐ Your application will open in a new browser tab!\")\n",
- "print(\"\")\n",
- "print(\"โ ๏ธ Make sure your OPENROUTER_API_KEY environment variable is set!\")\n",
- "print(\"\")\n",
- "print(\"๐ Testing Instructions:\")\n",
- "print(\"1. Click 'Initialize Vector Database' button first\")\n",
- "print(\"2. Wait for success message\")\n",
- "print(\"3. Configure your RAG parameters:\")\n",
- "print(\" - Choose model (gpt-4o, gpt-4o-mini)\")\n",
- "print(\" - Adjust temperature (0.0 = deterministic, 1.0 = creative)\")\n",
- "print(\" - Set chunk size and overlap\")\n",
- "print(\" - Choose similarity top-k\")\n",
- "print(\" - Select postprocessors and synthesizer\")\n",
- "print(\"4. Enter a question and click 'Ask Question'\")\n",
- "print(\"5. Review both the response and configuration used\")\n",
- "print(\"\")\n",
- "print(\"๐งช Experiments to try:\")\n",
- "print(\"- Compare different models with the same question\")\n",
- "print(\"- Test temperature effects (0.1 vs 0.9)\")\n",
- "print(\"- Try different chunk sizes (256 vs 1024)\")\n",
- "print(\"- Compare synthesizers (TreeSummarize vs Refine)\")\n",
- "print(\"- Adjust similarity cutoff to filter results\")\n",
- "\n",
- "# Your code here:\n",
- "advanced_interface.launch()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## ๐ก Understanding the Configuration Options\n",
- "\n",
- "### Model Selection\n",
- "- **gpt-4o**: Latest and most capable model, best quality responses\n",
- "- **gpt-4o-mini**: Faster and cheaper while maintaining good quality\n",
- "\n",
- "### Temperature (0.0 - 1.0)\n",
- "- **0.0-0.3**: Deterministic, factual responses\n",
- "- **0.4-0.7**: Balanced creativity and accuracy\n",
- "- **0.8-1.0**: More creative and varied responses\n",
- "\n",
- "### Chunk Size & Overlap\n",
- "- **Chunk Size**: How much text to process at once (256-1024 typical)\n",
- "- **Chunk Overlap**: Overlap between chunks to maintain context (10-100 typical)\n",
- "\n",
- "### Similarity Top-K (1-20)\n",
- "- **Lower values (3-5)**: More focused, faster responses\n",
- "- **Higher values (8-15)**: More comprehensive, detailed responses\n",
- "\n",
- "### Node Postprocessors\n",
- "- **SimilarityPostprocessor**: Filters out low-relevance documents\n",
- "\n",
- "### Similarity Cutoff (0.0-1.0)\n",
- "- **0.1-0.3**: More permissive, includes potentially relevant docs\n",
- "- **0.5-0.8**: More strict, only highly relevant docs\n",
- "\n",
- "### Response Synthesizers\n",
- "- **TreeSummarize**: Hierarchical summarization, good for complex topics\n",
- "- **Refine**: Iterative refinement, builds detailed responses\n",
- "- **CompactAndRefine**: Efficient version of Refine\n",
- "- **Default**: Standard synthesis approach\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## โ
Assignment Completion Checklist\n",
- "\n",
- "Before submitting, ensure you have:\n",
- "\n",
- "- [ ] Set up your OPENROUTER_API_KEY environment variable\n",
- "- [ ] Imported all necessary libraries including advanced RAG components\n",
- "- [ ] Created AdvancedRAGBackend class with configurable parameters\n",
- "- [ ] Implemented all required methods:\n",
- " - [ ] `update_settings()` - Updates LLM and chunking parameters\n",
- " - [ ] `initialize_database()` - Sets up vector database\n",
- " - [ ] `get_postprocessor()` - Returns selected postprocessor\n",
- " - [ ] `get_synthesizer()` - Returns selected synthesizer\n",
- " - [ ] `advanced_query()` - Handles queries with all configuration options\n",
- "- [ ] Created advanced Gradio interface with all required components:\n",
- " - [ ] Initialize database button\n",
- " - [ ] Model selection dropdown (gpt-4o, gpt-4o-mini)\n",
- " - [ ] Temperature slider (0 to 1, step 0.1)\n",
- " - [ ] Chunk size input (default 512)\n",
- " - [ ] Chunk overlap input (default 50)\n",
- " - [ ] Similarity top-k slider (1 to 20, default 5)\n",
- " - [ ] Node postprocessor multiselect\n",
- " - [ ] Similarity cutoff slider (0.0 to 1.0, step 0.1, default 0.3)\n",
- " - [ ] Response synthesizer dropdown\n",
- " - [ ] Query input and submit button\n",
- " - [ ] Response output\n",
- " - [ ] Configuration display\n",
- "- [ ] Connected all components to backend functions\n",
- "- [ ] Successfully launched the application\n",
- "- [ ] Tested different parameter combinations\n",
- "- [ ] Verified all configuration options work correctly\n",
- "\n",
- "## ๐ Congratulations!\n",
- "\n",
- "You've successfully built a professional, production-ready RAG application! You now have:\n",
- "\n",
- "- **Advanced Parameter Control**: Full control over all RAG system parameters\n",
- "- **Professional UI**: Clean, organized interface with proper layout\n",
- "- **Real-time Configuration**: Ability to experiment with different settings\n",
- "- **Production Patterns**: Understanding of how to build scalable AI applications\n",
- "\n",
- "## ๐ Next Steps & Extensions\n",
- "\n",
- "**Potential Enhancements:**\n",
- "1. **Authentication**: Add user login and session management\n",
- "2. **Document Upload**: Allow users to upload their own documents\n",
- "3. **Chat History**: Implement conversation memory\n",
- "4. **Performance Monitoring**: Add response time and quality metrics\n",
- "5. **A/B Testing**: Compare different configurations side-by-side\n",
- "6. **Export Features**: Download responses and configurations\n",
- "7. **Advanced Visualizations**: Show document similarity scores and retrieval paths\n",
- "\n",
- "**Deployment Options:**\n",
- "- **Local**: Run on your machine for development\n",
- "- **Gradio Cloud**: Deploy with `interface.launch(share=True)`\n",
- "- **Hugging Face Spaces**: Deploy to Hugging Face for public access\n",
- "- **Docker**: Containerize for scalable deployment\n",
- "- **Cloud Platforms**: Deploy to AWS, GCP, or Azure\n",
- "\n",
- "You're now ready to build sophisticated AI-powered applications!\n"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "env_v1",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.13.7"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/nitesh_sharma/README.md b/nitesh_sharma/README.md
deleted file mode 100644
index ef299c4..0000000
--- a/nitesh_sharma/README.md
+++ /dev/null
@@ -1 +0,0 @@
-# nitesh_sharma
diff --git a/nitesh_sharma/src/20251029_custom_chatgpt/Image20251029161620.png b/nitesh_sharma/src/20251029_custom_chatgpt/Image20251029161620.png
deleted file mode 100644
index 1afae8a..0000000
Binary files a/nitesh_sharma/src/20251029_custom_chatgpt/Image20251029161620.png and /dev/null differ
diff --git a/nitesh_sharma/src/20251029_custom_chatgpt/assignment1_v3.py b/nitesh_sharma/src/20251029_custom_chatgpt/assignment1_v3.py
deleted file mode 100644
index 888e1ad..0000000
--- a/nitesh_sharma/src/20251029_custom_chatgpt/assignment1_v3.py
+++ /dev/null
@@ -1,417 +0,0 @@
-import streamlit as st
-import time
-from datetime import datetime, timedelta
-import base64
-import json
-import os
-from pathlib import Path
-import glob
-import re
-
-# Optional OpenAI SDK import; handle if not installed
-try:
- from openai import OpenAI
- _HAS_OPENAI = True
-except Exception:
- _HAS_OPENAI = False
-
-# OpenRouter.ai base URL
-OPENROUTER_API_BASE = "https://openrouter.ai/api/v1"
-CHAT_HISTORY_DIR = "chat_history"
-
-# --- Helper functions -----------------------------------------------------
-
-def now_ts():
- return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-
-def get_chat_filename():
- return f"chat_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
-
-def clean_message(text):
- """Remove special tags from message content"""
- patterns = [
- r'.*?',
- r'<\|im_start\|>.*?<\|im_end\|>',
- r'.*? '
- ]
- for pattern in patterns:
- text = re.sub(pattern, '', text, flags=re.DOTALL)
- return text.strip()
-
-def save_current_chat():
- """Save current chat to JSON file"""
- if not os.path.exists(CHAT_HISTORY_DIR):
- os.makedirs(CHAT_HISTORY_DIR)
-
- chat_data = {
- "messages": st.session_state.messages,
- "start_time": st.session_state.start_time,
- "settings": st.session_state.settings,
- "summary": st.session_state.get("summary", ""),
- "chat_id": st.session_state.current_chat_id
- }
-
- filename = os.path.join(CHAT_HISTORY_DIR, get_chat_filename())
- with open(filename, 'w', encoding='utf-8') as f:
- json.dump(chat_data, f, indent=2, default=str)
- return filename
-
-def load_chat_history(filename):
- """Load chat history from JSON file"""
- with open(filename, 'r', encoding='utf-8') as f:
- data = json.load(f)
- st.session_state.messages = data["messages"]
- st.session_state.start_time = float(data["start_time"])
- st.session_state.settings.update(data["settings"])
- st.session_state.summary = data.get("summary", "")
- st.session_state.current_chat_id = data.get("chat_id", datetime.now().strftime("%Y%m%d_%H%M%S"))
-
-def get_chat_summary(messages):
- """Generate a summary of the chat using the API"""
- if not messages:
- return "No messages to summarize"
-
- try:
- # Get API key
- api_key = os.getenv("OPENROUTER_API_KEY") or st.secrets.get("OPENROUTER_API_KEY")
- if not api_key:
- return "Cannot generate summary: No API key found"
-
- client = OpenAI(
- api_key=api_key,
- base_url=OPENROUTER_API_BASE,
- default_headers={
- "HTTP-Referer": "https://github.com/outskill-git/ai-accelerator",
- "X-Title": "AI Accelerator Chat"
- }
- )
-
- # Create a summary prompt
- summary_messages = [
- {"role": "system", "content": "Please provide a brief summary of the following conversation. Focus on the main topics discussed and key points. Keep it concise."},
- {"role": "user", "content": json.dumps([m["content"] for m in messages])}
- ]
-
- with st.spinner("Generating summary..."):
- result = client.chat.completions.create(
- model="gpt-oss-120b",
- messages=summary_messages,
- temperature=0.7,
- max_tokens=150
- )
- return clean_message(result.choices[0].message.content)
- except Exception as e:
- return f"Error generating summary: {str(e)}"
-
-# --- Session initialization -----------------------------------------------
-if "current_chat_id" not in st.session_state:
- st.session_state.current_chat_id = datetime.now().strftime("%Y%m%d_%H%M%S")
-if "messages" not in st.session_state:
- st.session_state.messages = []
-if "chat_history" not in st.session_state:
- st.session_state.chat_history = []
-if "start_time" not in st.session_state:
- st.session_state.start_time = time.time()
-if "message_count" not in st.session_state:
- st.session_state.message_count = 0
-if "settings" not in st.session_state:
- st.session_state.settings = {
- "assistant_name": "Chat Assistant",
- "response_style": "Professional",
- "max_history": 30,
- "show_timestamps": True,
- "theme": "Dark",
- "temperature": 1.4,
- "max_tokens": 290,
- "use_api": True,
- "api_model": "gpt-oss-120b",
- }
-
-# --- Page config ----------------------------------------------------------
-st.set_page_config(page_title="Chat Assistant v3", page_icon="๐ฌ", layout="wide")
-
-# Theme CSS
-DARK_CSS = """
-
-"""
-
-LIGHT_CSS = """
-
-"""
-
-# Apply theme CSS
-if st.session_state.settings["theme"] == "Dark":
- st.markdown(DARK_CSS, unsafe_allow_html=True)
-else:
- st.markdown(LIGHT_CSS, unsafe_allow_html=True)
-
-# --- Layout: Sidebar ------------------------------------------------------
-with st.sidebar:
- st.markdown("### โ๏ธ Configuration")
-
- # Chat History Section
- st.subheader("๐พ Chat History")
- history_files = glob.glob(os.path.join(CHAT_HISTORY_DIR, "*.json"))
- if history_files:
- selected_file = st.selectbox(
- "Load Previous Chat",
- options=history_files,
- format_func=lambda x: Path(x).stem
- )
- col1, col2 = st.columns(2)
- with col1:
- if st.button("Load Chat"):
- with st.spinner("Loading chat..."):
- load_chat_history(selected_file)
- st.rerun()
- with col2:
- if st.button("Delete History"):
- with st.spinner("Deleting..."):
- os.remove(selected_file)
- st.success("Chat history deleted!")
- st.rerun()
-
- st.markdown("---")
- st.subheader("Assistant Settings")
- st.session_state.settings["assistant_name"] = st.text_input("Assistant Name:", value=st.session_state.settings["assistant_name"])
- st.session_state.settings["response_style"] = st.selectbox("Response Style:", ["Friendly", "Professional", "Technical"], index=["Friendly", "Professional", "Technical"].index(st.session_state.settings["response_style"]))
-
- st.markdown("---")
- st.subheader("Chat Settings")
- st.session_state.settings["max_history"] = st.slider("Max Chat History:", 10, 100, st.session_state.settings["max_history"])
-
- st.markdown("---")
- st.subheader("๐จ Theme Settings")
- st.session_state.settings["theme"] = st.selectbox("Theme:", ["Dark", "Light"], index=["Dark", "Light"].index(st.session_state.settings["theme"]))
-
- st.markdown("---")
- st.subheader("๐ค GPT Parameters")
- st.session_state.settings["temperature"] = st.slider("Temperature", 0.0, 2.0, float(st.session_state.settings["temperature"]), 0.1)
- st.session_state.settings["max_tokens"] = st.slider("Max Tokens", 50, 1000, int(st.session_state.settings["max_tokens"]))
-
- st.markdown("---")
- st.subheader("๐ OpenRouter.ai API")
- st.session_state.settings["use_api"] = st.checkbox("Use OpenRouter API (gpt-oss-120b)", value=st.session_state.settings.get("use_api", True))
- if st.session_state.settings["use_api"]:
- if not _HAS_OPENAI:
- st.warning("OpenAI SDK not installed. Install 'openai' in your environment to enable API calls.")
- key = os.getenv("OPENROUTER_API_KEY") or st.secrets.get("OPENROUTER_API_KEY")
- if key:
- st.success(f"โ
OpenRouter API key found - Using model: {st.session_state.settings['api_model']}")
- try:
- client = OpenAI(
- api_key=key,
- base_url=OPENROUTER_API_BASE,
- default_headers={
- "HTTP-Referer": "https://github.com/outskill-git/ai-accelerator",
- "X-Title": "AI Accelerator Chat"
- }
- )
- result = client.chat.completions.create(
- model=st.session_state.settings['api_model'],
- messages=[{"role": "system", "content": "Test message"}],
- max_tokens=5
- )
- st.success("โ
OpenRouter API connection confirmed!")
- except Exception as e:
- st.error(f"โ API key found but test failed: {str(e)}")
- else:
- st.info("No OpenRouter API key found. Set 'OPENROUTER_API_KEY' in environment or secrets.")
-
- st.markdown("---")
- st.subheader("๐ Session Stats")
- duration = time.time() - st.session_state.start_time
- st.markdown(f"""
- **Chat ID:** {st.session_state.current_chat_id}
- **Session Duration:** {str(timedelta(seconds=int(duration)))}
- **Messages Sent:** {st.session_state.message_count}
- **Total Messages:** {len(st.session_state.messages)}
- """)
-
-# Re-apply theme if sidebar changed
-if st.session_state.settings["theme"] == "Dark":
- st.markdown(DARK_CSS, unsafe_allow_html=True)
-else:
- st.markdown(LIGHT_CSS, unsafe_allow_html=True)
-
-# --- Main UI --------------------------------------------------------------
-col1, col2 = st.columns([1, 3])
-with col1:
- # Chat Actions
- st.markdown("### ๐ Chat Actions")
-
- if st.button("๐ New Chat"):
- # Save current chat if it has messages
- if st.session_state.messages:
- save_current_chat()
- # Reset session state for new chat
- st.session_state.messages = []
- st.session_state.message_count = 0
- st.session_state.start_time = time.time()
- st.session_state.current_chat_id = datetime.now().strftime("%Y%m%d_%H%M%S")
- st.session_state.pop('summary', None)
- st.rerun()
-
- if st.button("๐พ Save Chat"):
- if st.session_state.messages:
- filename = save_current_chat()
- st.success(f"Chat saved to {filename}")
- else:
- st.info("No messages to save")
-
- if st.button("๐๏ธ Clear Chat"):
- st.session_state.messages = []
- st.session_state.message_count = 0
- st.session_state.start_time = time.time()
- st.session_state.pop('summary', None)
- st.rerun()
-
- if st.button("๐ Generate Summary"):
- if st.session_state.messages:
- st.session_state.summary = get_chat_summary(st.session_state.messages)
- st.rerun()
- else:
- st.info("No messages to summarize")
-
-with col2:
- # Chat header with controls
- col_title, col_controls = st.columns([3, 1])
- with col_title:
- st.markdown(f"# ๐ฌ {st.session_state.settings['assistant_name']}")
- meta = f"Response Style: {st.session_state.settings['response_style']} | History Limit: {st.session_state.settings['max_history']} messages | Temperature: {st.session_state.settings['temperature']} | Max Tokens: {st.session_state.settings['max_tokens']}"
- st.markdown(f"", unsafe_allow_html=True)
- with col_controls:
- st.session_state.settings["show_timestamps"] = st.toggle("Show Timestamps", value=st.session_state.settings.get("show_timestamps", False), help="Toggle timestamp display in chat")
-
- # Display summary if available
- if summary := st.session_state.get("summary"):
- with st.expander("๐ Chat Summary", expanded=True):
- st.write(summary)
-
- # Welcome / initial system message
- if not st.session_state.messages:
- init_msg = {
- "role": "assistant",
- "content": "Hello! I'm your chat assistant. How can I help you today?",
- "timestamp": now_ts()
- }
- st.session_state.messages.append(init_msg)
-
- # Enforce max history
- max_hist = st.session_state.settings["max_history"]
- if len(st.session_state.messages) > max_hist:
- st.session_state.messages = st.session_state.messages[-max_hist:]
-
- # Display messages
- for m in st.session_state.messages:
- role = m["role"]
- content = clean_message(m["content"])
- ts = m.get("timestamp", "")
- cls = "assistant" if role == "assistant" else "user"
- label = f"{st.session_state.settings['assistant_name']}" if role == "assistant" else "You"
- header = f"{label} - {ts}" if st.session_state.settings["show_timestamps"] else label
- st.markdown(f"{header}{content}", unsafe_allow_html=True)
-
- # Input area - using container for dynamic updates
- input_container = st.container()
- with input_container:
- # Using a form for enter key submission
- with st.form(key="message_form", clear_on_submit=True):
- user_input = st.text_input("Message", key="user_input_v3")
- submit_button = st.form_submit_button("Send")
-
- if submit_button and user_input:
- # Append user message
- user_msg = {"role": "user", "content": user_input, "timestamp": now_ts()}
- st.session_state.messages.append(user_msg)
- st.session_state.message_count += 1
-
- # Show spinner during API call
- with st.spinner("Thinking..."):
- # If API usage is enabled, attempt to call OpenAI
- resp = None
- if st.session_state.settings.get("use_api"):
- if not _HAS_OPENAI:
- resp = "Error: OpenAI SDK not installed in the environment. Falling back to simulated response."
- else:
- api_key = os.getenv("OPENROUTER_API_KEY") or st.secrets.get("OPENROUTER_API_KEY")
- if not api_key:
- resp = "Error: No OpenRouter API key found in environment. Falling back to simulated response."
- else:
- try:
- client = OpenAI(
- api_key=api_key,
- base_url=OPENROUTER_API_BASE,
- default_headers={
- "HTTP-Referer": "https://github.com/outskill-git/ai-accelerator",
- "X-Title": "AI Accelerator Chat"
- }
- )
- messages = []
- for m in st.session_state.messages[-st.session_state.settings.get("max_history", 30):]:
- role_name = m.get("role")
- api_role = role_name
- messages.append({"role": api_role, "content": m.get("content")})
-
- result = client.chat.completions.create(
- model=st.session_state.settings.get("api_model", "gpt-oss-120b"),
- messages=messages,
- temperature=float(st.session_state.settings.get("temperature", 1.0)),
- max_tokens=int(st.session_state.settings.get("max_tokens", 290)),
- )
- try:
- resp = clean_message(result.choices[0].message.content)
- except Exception:
- resp = str(result)
- except Exception as e:
- resp = f"OpenRouter API error: {str(e)}. Falling back to simulated response."
-
- if resp is None or resp.startswith("Error:") or resp.startswith("OpenRouter API error"):
- style = st.session_state.settings["response_style"]
- if style == "Friendly":
- resp = f"Hey! Thanks for your message: '{user_input}'. I'm happy to help โค๏ธ"
- elif style == "Professional":
- resp = f"Thank you for your message. I'll help you with: '{user_input}'"
- else:
- resp = f"Processing query: {user_input}. Analysis: (simulated)."
-
- # Append assistant response
- assistant_msg = {"role": "assistant", "content": resp, "timestamp": now_ts()}
- st.session_state.messages.append(assistant_msg)
- st.session_state.message_count += 1
-
- # Auto-save chat after each message
- save_current_chat()
-
- # Trim history if needed
- if len(st.session_state.messages) > st.session_state.settings["max_history"]:
- st.session_state.messages = st.session_state.messages[-st.session_state.settings["max_history"]:]
-
- st.rerun()
-
-# Footer / help
-st.markdown("---")
-st.markdown("*Tips: Press Enter to send a message. Use sidebar to adjust settings. Click 'New Chat' to start fresh while saving current chat.*")
\ No newline at end of file
diff --git a/nitesh_sharma/src/20251029_custom_chatgpt/requirement.png b/nitesh_sharma/src/20251029_custom_chatgpt/requirement.png
deleted file mode 100644
index b6b53a7..0000000
Binary files a/nitesh_sharma/src/20251029_custom_chatgpt/requirement.png and /dev/null differ
diff --git a/nitesh_sharma/venv_instructions.md b/nitesh_sharma/venv_instructions.md
deleted file mode 100644
index ac0e3d5..0000000
--- a/nitesh_sharma/venv_instructions.md
+++ /dev/null
@@ -1,62 +0,0 @@
-# Virtual Environment Instructions
-
-## Important Note
-Always use Command Prompt (cmd) instead of PowerShell to avoid execution policy restrictions.
-
-## How to Activate the Virtual Environment
-
-1. Open Command Prompt (cmd)
-2. Navigate to your project directory:
- ```cmd
- cd "C:\Users\NSHAR\OneDrive - paramanands limited\nnr bkp\Documents\GitHub\Submissions_C2\nitesh_sharma"
- ```
-3. Activate the virtual environment:
- ```cmd
- env_v1\Scripts\activate
- ```
- When activated successfully, you'll see `(env_v1)` at the beginning of your command prompt.
-
-## How to Deactivate the Virtual Environment
-
-1. Simply type:
- ```cmd
- deactivate
- ```
- The `(env_v1)` prefix will disappear from your command prompt.
-
-## Installing Requirements
-
-After activating the virtual environment, install the required packages:
-```cmd
-pip install -r env_v1_requirement.txt
-```
-
-This will install all the required packages including:
-- openai
-- streamlit
-- pandas
-- numpy
-- python-dotenv
-- and other dependencies
-# Run the Streamlit app
-
-After activating the virtual environment, you can run the Streamlit app included in this repo.
-
-From the project root:
-```cmd
-cd src
-streamlit run assignment1_v3.py
-```
-
-Or, from the project root in a single line (Windows cmd):
-```cmd
-cd src && streamlit run assignment1_v3.py
-```
-
-This will launch the Streamlit app in your browser. Use Ctrl+C in the terminal to stop it.
-
-# Tips
-- Always make sure you're using Command Prompt (cmd) and not PowerShell
-- The virtual environment must be activated each time you open a new command prompt
-- You can verify if the virtual environment is active by checking for the `(env_v1)` prefix in your command prompt
-- Install packages only when the virtual environment is activated
\ No newline at end of file
diff --git a/report_agent.py b/report_agent.py
new file mode 100644
index 0000000..11d5f28
--- /dev/null
+++ b/report_agent.py
@@ -0,0 +1,381 @@
+import datetime
+import json
+from typing import Dict, List, Any
+from dataclasses import dataclass
+from pathlib import Path
+from config import Config # Add this line
+
+@dataclass
+class ReportSection:
+ title: str
+ content: str
+ level: int
+ metadata: Dict[str, Any]
+
+class ReportBuilderAgent:
+ def __init__(self, output_formats: List[str] = ["markdown", "html", "pdf"]):
+ self.output_formats = output_formats
+ self.template_registry = self._initialize_templates()
+
+ def _initialize_templates(self) -> Dict[str, Dict[str, str]]:
+ """Initialize report templates for different use cases"""
+ return {
+ "research_report": {
+ "structure": "executive_summary,methodology,findings,insights,conclusions,recommendations",
+ "style": "academic"
+ },
+ "executive_summary": {
+ "structure": "key_findings,insights,recommendations,next_steps",
+ "style": "business"
+ },
+ "technical_report": {
+ "structure": "introduction,data_sources,methodology,analysis,results,discussion",
+ "style": "technical"
+ }
+ }
+
+ def build_comprehensive_report(self,
+ contextual_data: Dict[str, Any],
+ analysis_results: Dict[str, Any],
+ insights: List[Dict[str, Any]],
+ report_type: str = "research_report") -> Dict[str, Any]:
+ """
+ Build comprehensive report from all agent outputs
+
+ Args:
+ contextual_data: Output from Contextual Retriever Agent
+ analysis_results: Output from Critical Analysis Agent
+ insights: Output from Insight Generation Agent
+ report_type: Type of report to generate
+
+ Returns:
+ Dictionary containing report in multiple formats
+ """
+
+ sections = []
+
+ # Executive Summary
+ sections.append(self._build_executive_summary(insights, analysis_results))
+
+ # Methodology
+ sections.append(self._build_methodology_section(contextual_data))
+
+ # Key Findings
+ sections.append(self._build_findings_section(analysis_results))
+
+ # Insights and Hypotheses
+ sections.append(self._build_insights_section(insights))
+
+ # Conclusions and Recommendations
+ sections.append(self._build_recommendations_section(insights, analysis_results))
+
+ # Build complete report
+ report = {
+ "metadata": self._generate_report_metadata(report_type),
+ "sections": sections,
+ "formats": {}
+ }
+
+ # Generate different output formats
+ for format_type in self.output_formats:
+ report["formats"][format_type] = self._render_format(
+ sections, format_type, report["metadata"]
+ )
+
+ return report
+
+ def _build_executive_summary(self,
+ insights: List[Dict[str, Any]],
+ analysis_results: Dict[str, Any]) -> ReportSection:
+ """Build executive summary section"""
+
+ key_insights = insights[:3] # Top 3 insights
+ confidence_score = analysis_results.get('overall_confidence', 0.5)
+
+ content = f"""
+# Executive Summary
+
+## Key Insights
+{self._format_key_insights(key_insights)}
+
+## Overall Confidence
+**Confidence Score: {confidence_score:.2f}/1.00**
+
+## Primary Findings
+- {analysis_results.get('summary', 'No summary available')}
+- Contradictions resolved: {len(analysis_results.get('resolved_contradictions', []))}
+- Sources validated: {analysis_results.get('sources_validated', 0)}
+
+## Immediate Implications
+{self._extract_implications(insights)}
+"""
+
+ return ReportSection(
+ title="Executive Summary",
+ content=content,
+ level=1,
+ metadata={"word_count": len(content.split()), "contains_key_findings": True}
+ )
+
+ def _build_methodology_section(self, contextual_data: Dict[str, Any]) -> ReportSection:
+ """Build methodology section"""
+
+ sources = contextual_data.get('sources', [])
+ data_types = set()
+ for source in sources:
+ data_types.add(source.get('type', 'unknown'))
+
+ content = f"""
+# Methodology
+
+## Data Sources
+This analysis utilized {len(sources)} distinct data sources including:
+- **Source Types**: {', '.join(data_types)}
+- **Time Range**: {contextual_data.get('time_range', 'Not specified')}
+- **Geographic Scope**: {contextual_data.get('geographic_scope', 'Global')}
+
+## Analytical Approach
+The research employed a multi-agent AI system with the following specialized components:
+1. **Contextual Retriever**: Gathers and processes multi-source data
+2. **Critical Analysis**: Validates sources and identifies contradictions
+3. **Insight Generation**: Develops hypotheses and identifies trends
+4. **Report Builder**: Synthesizes findings into structured reports
+
+## Validation Framework
+All findings underwent rigorous validation including:
+- Source credibility assessment
+- Contradiction resolution
+- Confidence scoring
+- Reasoning chain verification
+"""
+
+ return ReportSection(
+ title="Methodology",
+ content=content,
+ level=1,
+ metadata={"sources_count": len(sources), "data_types": list(data_types)}
+ )
+
+ def _build_findings_section(self, analysis_results: Dict[str, Any]) -> ReportSection:
+ """Build key findings section"""
+
+ findings = analysis_results.get('key_findings', [])
+ contradictions = analysis_results.get('contradictions', [])
+ validations = analysis_results.get('source_validations', {})
+
+ content = f"""
+# Key Findings
+
+## Primary Results
+{chr(10).join(f"- {finding}" for finding in findings[:5])}
+
+## Data Quality Assessment
+- **Sources Validated**: {sum(validations.values())}/{len(validations)}
+- **Contradictions Identified**: {len(contradictions)}
+- **Confidence Level**: {analysis_results.get('confidence_score', 'Not calculated')}
+
+## Notable Patterns
+{self._extract_patterns(analysis_results)}
+"""
+
+ return ReportSection(
+ title="Key Findings",
+ content=content,
+ level=1,
+ metadata={"findings_count": len(findings), "contradictions_count": len(contradictions)}
+ )
+
+ def _build_insights_section(self, insights: List[Dict[str, Any]]) -> ReportSection:
+ """Build insights and hypotheses section"""
+
+ content = """
+# Insights and Hypotheses
+
+## Generated Insights
+"""
+
+ for i, insight in enumerate(insights, 1):
+ content += f"""
+### Insight {i}: {insight['statement']}
+
+**Confidence**: {insight['confidence']:.2f}
+
+**Reasoning Chain**:
+{chr(10).join(f"- {step}" for step in insight['reasoning_chain'])}
+
+**Testable Implications**:
+{chr(10).join(f"- {implication}" for implication in insight['testable_implications'])}
+
+---
+"""
+
+ return ReportSection(
+ title="Insights and Hypotheses",
+ content=content,
+ level=1,
+ metadata={"insights_count": len(insights), "average_confidence": sum(i['confidence'] for i in insights) / len(insights)}
+ )
+
+ def _build_recommendations_section(self,
+ insights: List[Dict[str, Any]],
+ analysis_results: Dict[str, Any]) -> ReportSection:
+ """Build conclusions and recommendations section"""
+
+ content = """
+# Conclusions and Recommendations
+
+## Key Conclusions
+"""
+
+ # Extract conclusions from insights
+ for i, insight in enumerate(insights[:3], 1):
+ content += f"""
+{i}. {insight['statement']} (Confidence: {insight['confidence']:.2f})
+"""
+
+ content += """
+## Recommendations
+
+### Immediate Actions
+1. **Validate Top Hypotheses**: Prioritize testing of high-confidence insights
+2. **Address Data Gaps**: Focus on areas with lower confidence scores
+3. **Monitor Key Indicators**: Establish tracking for critical variables
+
+### Strategic Considerations
+- Leverage identified patterns for predictive modeling
+- Address contradictions through targeted data collection
+- Scale successful analytical approaches to other domains
+
+## Next Steps
+1. Conduct controlled experiments to test generated hypotheses
+2. Expand data sources to improve confidence scores
+3. Implement monitoring system for ongoing validation
+"""
+
+ return ReportSection(
+ title="Conclusions and Recommendations",
+ content=content,
+ level=1,
+ metadata={"recommendations_count": 6, "next_steps": 3}
+ )
+
+ def _format_key_insights(self, insights: List[Dict[str, Any]]) -> str:
+ """Format key insights for executive summary"""
+ formatted = ""
+ for i, insight in enumerate(insights, 1):
+ formatted += f"{i}. {insight['statement']} (Confidence: {insight['confidence']:.2f})\n"
+ return formatted
+
+ def _extract_implications(self, insights: List[Dict[str, Any]]) -> str:
+ """Extract implications from insights"""
+ implications = set()
+ for insight in insights:
+ for implication in insight.get('testable_implications', []):
+ implications.add(implication)
+
+ return chr(10).join(f"- {imp}" for imp in list(implications)[:5])
+
+ def _extract_patterns(self, analysis_results: Dict[str, Any]) -> str:
+ """Extract patterns from analysis results"""
+ patterns = analysis_results.get('patterns', [])
+ if not patterns:
+ return "No specific patterns identified in the current analysis."
+
+ return chr(10).join(f"- {pattern}" for pattern in patterns[:3])
+
+ def _generate_report_metadata(self, report_type: str) -> Dict[str, Any]:
+ """Generate report metadata"""
+ return {
+ "report_id": f"report_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}",
+ "generated_at": datetime.datetime.now().isoformat(),
+ "report_type": report_type,
+ "version": "1.0",
+ "agents_used": ["contextual_retriever", "critical_analysis", "insight_generation", "report_builder"]
+ }
+
+ def _render_format(self,
+ sections: List[ReportSection],
+ format_type: str,
+ metadata: Dict[str, Any]) -> str:
+ """Render report in specified format"""
+
+ if format_type == "markdown":
+ return self._render_markdown(sections, metadata)
+ elif format_type == "html":
+ return self._render_html(sections, metadata)
+ elif format_type == "pdf":
+ return self._render_pdf_ready(sections, metadata)
+ else:
+ return self._render_markdown(sections, metadata)
+
+ def _render_markdown(self, sections: List[ReportSection], metadata: Dict[str, Any]) -> str:
+ """Render report as Markdown"""
+ markdown = f"""# AI Research Report\n\n"""
+ markdown += f"**Report ID**: {metadata['report_id']} \n"
+ markdown += f"**Generated**: {metadata['generated_at']} \n"
+ markdown += f"**Type**: {metadata['report_type']} \n\n"
+
+ for section in sections:
+ markdown += f"{section.content}\n\n"
+
+ return markdown
+
+ def _render_html(self, sections: List[ReportSection], metadata: Dict[str, Any]) -> str:
+ """Render report as HTML"""
+ html = f"""
+
+
+
+ AI Research Report - {metadata['report_id']}
+
+
+
+ AI Research Report
+
+"""
+
+ for section in sections:
+ # Convert markdown to simple HTML
+ section_html = section.content.replace('# ', '').replace('\n#', '\n')
+ section_html = section_html.replace('## ', '').replace('\n##', '\n')
+ section_html = section_html.replace('### ', '').replace('\n###', '\n')
+ section_html = section_html.replace('\n- ', '\n
').replace('\n\n', ' \n\n')
+ section_html = section_html.replace('**', '').replace('**', '')
+
+ html += f'{section_html}\n'
+
+ html += ""
+ return html
+
+ def _render_pdf_ready(self, sections: List[ReportSection], metadata: Dict[str, Any]) -> str:
+ """Render report in PDF-ready format (simplified)"""
+ return self._render_markdown(sections, metadata) # Can be converted to PDF using libraries like WeasyPrint
+
+ def save_report(self, report: Dict[str, Any], output_dir: str = "./reports"):
+ """Save report to files in multiple formats"""
+ Path(output_dir).mkdir(exist_ok=True)
+
+ base_filename = f"{output_dir}/{report['metadata']['report_id']}"
+
+ for format_type, content in report['formats'].items():
+ filename = f"{base_filename}.{format_type}"
+
+ with open(filename, 'w', encoding='utf-8') as f:
+ f.write(content)
+
+ print(f"Saved {format_type.upper()} report: {filename}")
+
+ # Save metadata separately
+ with open(f"{base_filename}_metadata.json", 'w') as f:
+ json.dump(report['metadata'], f, indent=2)
\ No newline at end of file
diff --git a/reports/report_20251108_150600.html b/reports/report_20251108_150600.html
new file mode 100644
index 0000000..cd5b2c6
--- /dev/null
+++ b/reports/report_20251108_150600.html
@@ -0,0 +1,178 @@
+
+
+
+
+ AI Research Report - report_20251108_150600
+
+
+
+ AI Research Report
+
+
+Executive Summary
+
+Key Insights
+1. The adoption of AI leads to a significant increase in productivity, particularly in early adopters. (Confidence: 0.90)
+2. Structured AI training programs within organizations speed up the ROI realization. (Confidence: 0.85)
+3. Data quality issues are the main hindrance to successful AI implementation. (Confidence: 0.90)
+
+
+Overall Confidence
+Confidence Score: 0.50/1.00
+
+Primary Findings
+
Strong evidence for AI productivity benefits, but implementation challenges and conflicting metrics require careful consideration
+ Contradictions resolved: 0
+ Sources validated: 0
+
+Immediate Implications
+
Small to medium enterprises may require more support to overcome cost barriers
+ Improvements in data quality will enhance AI implementation success
+ Organizations implementing structured AI training programs will experience quicker ROI
+ Companies adopting AI will see productivity increase
+ Lack of a structured training program may delay ROI realization
+
+
+Methodology
+
+Data Sources
+This analysis utilized 3 distinct data sources including:
+
Source Types: industry_report, research_paper, news_article
+Time Range: 2020-2024
+Geographic Scope: Global
+
+Analytical Approach
+The research employed a multi-agent AI system with the following specialized components:
+1. Contextual Retriever: Gathers and processes multi-source data
+2. Critical Analysis: Validates sources and identifies contradictions
+3. Insight Generation: Develops hypotheses and identifies trends
+4. Report Builder: Synthesizes findings into structured reports
+
+Validation Framework
+All findings underwent rigorous validation including:
+
Source credibility assessment
+ Contradiction resolution
+ Confidence scoring
+ Reasoning chain verification
+
+
+Key Findings
+
+Primary Results
+
AI adoption correlates with 25-35% productivity increase in early adopters
+ Organizations with structured AI training programs show faster ROI
+ Data quality is the primary barrier to successful AI implementation
+ Small to medium enterprises lag in AI adoption due to cost concerns
+
+Data Quality Assessment
+
Sources Validated: 3/4
+Contradictions Identified: 3
+Confidence Level: 0.82
+
+Notable Patterns
+
Early adopters consistently outperform laggards
+ Training investment correlates with success metrics
+ Industry-specific variations in implementation success
+
+
+Insights and Hypotheses
+
+Generated Insights
+
+#Insight 1: The adoption of AI leads to a significant increase in productivity, particularly in early adopters.
+
+Confidence: 0.90
+
+Reasoning Chain:
+AI adoption presents in the analysis as positively correlating with productivity
+ Despite conflicting data on the degree of productivity increase, all validated sources agree on the positive impact
+ Early adopters have a reported 25-35% productivity increase
+
+Testable Implications:
+Companies adopting AI will see productivity increase
+ Early adopters will experience significant productivity gains
+
+---
+
+#Insight 2: Structured AI training programs within organizations speed up the ROI realization.
+
+Confidence: 0.85
+
+Reasoning Chain:
+Organizations with structured AI training programs report faster ROI
+ Training likely reduces the time to full operational use, hence faster ROI
+
+Testable Implications:
+Organizations implementing structured AI training programs will experience quicker ROI
+ Lack of a structured training program may delay ROI realization
+
+---
+
+#Insight 3: Data quality issues are the main hindrance to successful AI implementation.
+
+Confidence: 0.90
+
+Reasoning Chain:
+The analysis identifies data quality as the most significant barrier to AI implementation
+ Poor data quality likely leads to ineffective AI performance and slower adoption
+
+Testable Implications:
+Improvements in data quality will enhance AI implementation success
+ Organizations with poor data quality will struggle with AI implementation
+
+---
+
+#Insight 4: Cost concerns are disadvantaging small to medium enterprises in AI adoption.
+
+Confidence: 0.80
+
+Reasoning Chain:
+Small to medium enterprises are identified to be lagging in AI adoption due to cost issues
+ The high costs of AI implementation are likely prohibitive for smaller businesses
+
+Testable Implications:
+Reducing AI implementation costs could increase adoption among small to medium enterprises
+ Small to medium enterprises may require more support to overcome cost barriers
+
+---
+
+
+Conclusions and Recommendations
+
+Key Conclusions
+
+1. The adoption of AI leads to a significant increase in productivity, particularly in early adopters. (Confidence: 0.90)
+
+2. Structured AI training programs within organizations speed up the ROI realization. (Confidence: 0.85)
+
+3. Data quality issues are the main hindrance to successful AI implementation. (Confidence: 0.90)
+
+Recommendations
+
+#Immediate Actions
+1. Validate Top Hypotheses: Prioritize testing of high-confidence insights
+2. Address Data Gaps: Focus on areas with lower confidence scores
+3. Monitor Key Indicators: Establish tracking for critical variables
+
+#Strategic Considerations
+
Leverage identified patterns for predictive modeling
+ Address contradictions through targeted data collection
+ Scale successful analytical approaches to other domains
+
+Next Steps
+1. Conduct controlled experiments to test generated hypotheses
+2. Expand data sources to improve confidence scores
+3. Implement monitoring system for ongoing validation
+
+
\ No newline at end of file
diff --git a/reports/report_20251108_150600.markdown b/reports/report_20251108_150600.markdown
new file mode 100644
index 0000000..32fca3c
--- /dev/null
+++ b/reports/report_20251108_150600.markdown
@@ -0,0 +1,167 @@
+# AI Research Report
+
+**Report ID**: report_20251108_150600
+**Generated**: 2025-11-08T15:06:00.506390
+**Type**: research_report
+
+
+# Executive Summary
+
+## Key Insights
+1. The adoption of AI leads to a significant increase in productivity, particularly in early adopters. (Confidence: 0.90)
+2. Structured AI training programs within organizations speed up the ROI realization. (Confidence: 0.85)
+3. Data quality issues are the main hindrance to successful AI implementation. (Confidence: 0.90)
+
+
+## Overall Confidence
+**Confidence Score: 0.50/1.00**
+
+## Primary Findings
+- Strong evidence for AI productivity benefits, but implementation challenges and conflicting metrics require careful consideration
+- Contradictions resolved: 0
+- Sources validated: 0
+
+## Immediate Implications
+- Small to medium enterprises may require more support to overcome cost barriers
+- Improvements in data quality will enhance AI implementation success
+- Organizations implementing structured AI training programs will experience quicker ROI
+- Companies adopting AI will see productivity increase
+- Lack of a structured training program may delay ROI realization
+
+
+
+# Methodology
+
+## Data Sources
+This analysis utilized 3 distinct data sources including:
+- **Source Types**: industry_report, research_paper, news_article
+- **Time Range**: 2020-2024
+- **Geographic Scope**: Global
+
+## Analytical Approach
+The research employed a multi-agent AI system with the following specialized components:
+1. **Contextual Retriever**: Gathers and processes multi-source data
+2. **Critical Analysis**: Validates sources and identifies contradictions
+3. **Insight Generation**: Develops hypotheses and identifies trends
+4. **Report Builder**: Synthesizes findings into structured reports
+
+## Validation Framework
+All findings underwent rigorous validation including:
+- Source credibility assessment
+- Contradiction resolution
+- Confidence scoring
+- Reasoning chain verification
+
+
+
+# Key Findings
+
+## Primary Results
+- AI adoption correlates with 25-35% productivity increase in early adopters
+- Organizations with structured AI training programs show faster ROI
+- Data quality is the primary barrier to successful AI implementation
+- Small to medium enterprises lag in AI adoption due to cost concerns
+
+## Data Quality Assessment
+- **Sources Validated**: 3/4
+- **Contradictions Identified**: 3
+- **Confidence Level**: 0.82
+
+## Notable Patterns
+- Early adopters consistently outperform laggards
+- Training investment correlates with success metrics
+- Industry-specific variations in implementation success
+
+
+
+# Insights and Hypotheses
+
+## Generated Insights
+
+### Insight 1: The adoption of AI leads to a significant increase in productivity, particularly in early adopters.
+
+**Confidence**: 0.90
+
+**Reasoning Chain**:
+- AI adoption presents in the analysis as positively correlating with productivity
+- Despite conflicting data on the degree of productivity increase, all validated sources agree on the positive impact
+- Early adopters have a reported 25-35% productivity increase
+
+**Testable Implications**:
+- Companies adopting AI will see productivity increase
+- Early adopters will experience significant productivity gains
+
+---
+
+### Insight 2: Structured AI training programs within organizations speed up the ROI realization.
+
+**Confidence**: 0.85
+
+**Reasoning Chain**:
+- Organizations with structured AI training programs report faster ROI
+- Training likely reduces the time to full operational use, hence faster ROI
+
+**Testable Implications**:
+- Organizations implementing structured AI training programs will experience quicker ROI
+- Lack of a structured training program may delay ROI realization
+
+---
+
+### Insight 3: Data quality issues are the main hindrance to successful AI implementation.
+
+**Confidence**: 0.90
+
+**Reasoning Chain**:
+- The analysis identifies data quality as the most significant barrier to AI implementation
+- Poor data quality likely leads to ineffective AI performance and slower adoption
+
+**Testable Implications**:
+- Improvements in data quality will enhance AI implementation success
+- Organizations with poor data quality will struggle with AI implementation
+
+---
+
+### Insight 4: Cost concerns are disadvantaging small to medium enterprises in AI adoption.
+
+**Confidence**: 0.80
+
+**Reasoning Chain**:
+- Small to medium enterprises are identified to be lagging in AI adoption due to cost issues
+- The high costs of AI implementation are likely prohibitive for smaller businesses
+
+**Testable Implications**:
+- Reducing AI implementation costs could increase adoption among small to medium enterprises
+- Small to medium enterprises may require more support to overcome cost barriers
+
+---
+
+
+
+# Conclusions and Recommendations
+
+## Key Conclusions
+
+1. The adoption of AI leads to a significant increase in productivity, particularly in early adopters. (Confidence: 0.90)
+
+2. Structured AI training programs within organizations speed up the ROI realization. (Confidence: 0.85)
+
+3. Data quality issues are the main hindrance to successful AI implementation. (Confidence: 0.90)
+
+## Recommendations
+
+### Immediate Actions
+1. **Validate Top Hypotheses**: Prioritize testing of high-confidence insights
+2. **Address Data Gaps**: Focus on areas with lower confidence scores
+3. **Monitor Key Indicators**: Establish tracking for critical variables
+
+### Strategic Considerations
+- Leverage identified patterns for predictive modeling
+- Address contradictions through targeted data collection
+- Scale successful analytical approaches to other domains
+
+## Next Steps
+1. Conduct controlled experiments to test generated hypotheses
+2. Expand data sources to improve confidence scores
+3. Implement monitoring system for ongoing validation
+
+
diff --git a/reports/report_20251108_150600.pdf b/reports/report_20251108_150600.pdf
new file mode 100644
index 0000000..32fca3c
Binary files /dev/null and b/reports/report_20251108_150600.pdf differ
diff --git a/reports/report_20251108_150600_metadata.json b/reports/report_20251108_150600_metadata.json
new file mode 100644
index 0000000..c6bea9b
--- /dev/null
+++ b/reports/report_20251108_150600_metadata.json
@@ -0,0 +1,12 @@
+{
+ "report_id": "report_20251108_150600",
+ "generated_at": "2025-11-08T15:06:00.506390",
+ "report_type": "research_report",
+ "version": "1.0",
+ "agents_used": [
+ "contextual_retriever",
+ "critical_analysis",
+ "insight_generation",
+ "report_builder"
+ ]
+}
\ No newline at end of file
diff --git a/reports/report_20251108_151408.html b/reports/report_20251108_151408.html
new file mode 100644
index 0000000..864cdea
--- /dev/null
+++ b/reports/report_20251108_151408.html
@@ -0,0 +1,180 @@
+
+
+
+
+ AI Research Report - report_20251108_151408
+
+
+
+ AI Research Report
+
+
+Executive Summary
+
+Key Insights
+1. Organizations implementing AI will witness a productivity increase between 20-40% (Confidence: 0.85)
+2. Structured AI training programs can accelerate ROI (Confidence: 0.80)
+3. Data quality issues are the primary factor hindering successful AI implementation (Confidence: 0.70)
+
+
+Overall Confidence
+Confidence Score: 0.50/1.00
+
+Primary Findings
+
Strong evidence for AI productivity benefits, but implementation challenges and conflicting metrics require careful consideration
+ Contradictions resolved: 0
+ Sources validated: 0
+
+Immediate Implications
+
Survey SMEs to understand their concerns about AI adoption costs
+ Compare ROI timelines between organizations with and without structured AI training programs
+ Measure productivity before and after AI implementation
+ Implement data quality improvement measures and observe changes in AI success rate
+ Compare AI adoption rates between SMEs with different financial capabilities
+
+
+Methodology
+
+Data Sources
+This analysis utilized 3 distinct data sources including:
+
Source Types: industry_report, research_paper, news_article
+Time Range: 2020-2024
+Geographic Scope: Global
+
+Analytical Approach
+The research employed a multi-agent AI system with the following specialized components:
+1. Contextual Retriever: Gathers and processes multi-source data
+2. Critical Analysis: Validates sources and identifies contradictions
+3. Insight Generation: Develops hypotheses and identifies trends
+4. Report Builder: Synthesizes findings into structured reports
+
+Validation Framework
+All findings underwent rigorous validation including:
+
Source credibility assessment
+ Contradiction resolution
+ Confidence scoring
+ Reasoning chain verification
+
+
+Key Findings
+
+Primary Results
+
AI adoption correlates with 25-35% productivity increase in early adopters
+ Organizations with structured AI training programs show faster ROI
+ Data quality is the primary barrier to successful AI implementation
+ Small to medium enterprises lag in AI adoption due to cost concerns
+
+Data Quality Assessment
+
Sources Validated: 3/4
+Contradictions Identified: 3
+Confidence Level: 0.82
+
+Notable Patterns
+
Early adopters consistently outperform laggards
+ Training investment correlates with success metrics
+ Industry-specific variations in implementation success
+
+
+Insights and Hypotheses
+
+Generated Insights
+
+#Insight 1: Organizations implementing AI will witness a productivity increase between 20-40%
+
+Confidence: 0.85
+
+Reasoning Chain:
+AI adoption correlates with productivity increase
+ Different studies show a range from 20% to 40%
+ All sources reporting this finding are validated
+
+Testable Implications:
+Measure productivity before and after AI implementation
+ Compare productivity increase in AI-adopting organizations with those not adopting AI
+
+---
+
+#Insight 2: Structured AI training programs can accelerate ROI
+
+Confidence: 0.80
+
+Reasoning Chain:
+Organizations with AI training programs show faster ROI
+ This implies that well-trained staff can utilize AI more effectively
+ The source reporting this finding is validated
+
+Testable Implications:
+Compare ROI timelines between organizations with and without structured AI training programs
+
+---
+
+#Insight 3: Data quality issues are the primary factor hindering successful AI implementation
+
+Confidence: 0.70
+
+Reasoning Chain:
+Data quality is identified as the main barrier
+ Poor quality data can lead to ineffective AI models
+ The source reporting this finding is validated
+
+Testable Implications:
+Measure AI success rate in organizations with high and low data quality
+ Implement data quality improvement measures and observe changes in AI success rate
+
+---
+
+#Insight 4: Small to medium enterprises lag in AI adoption due to cost concerns
+
+Confidence: 0.90
+
+Reasoning Chain:
+SMEs are reported to lag in AI adoption
+ Cost concerns are identified as the main reason
+ The source reporting this finding is validated
+
+Testable Implications:
+Survey SMEs to understand their concerns about AI adoption costs
+ Compare AI adoption rates between SMEs with different financial capabilities
+
+---
+
+
+Conclusions and Recommendations
+
+Key Conclusions
+
+1. Organizations implementing AI will witness a productivity increase between 20-40% (Confidence: 0.85)
+
+2. Structured AI training programs can accelerate ROI (Confidence: 0.80)
+
+3. Data quality issues are the primary factor hindering successful AI implementation (Confidence: 0.70)
+
+Recommendations
+
+#Immediate Actions
+1. Validate Top Hypotheses: Prioritize testing of high-confidence insights
+2. Address Data Gaps: Focus on areas with lower confidence scores
+3. Monitor Key Indicators: Establish tracking for critical variables
+
+#Strategic Considerations
+
Leverage identified patterns for predictive modeling
+ Address contradictions through targeted data collection
+ Scale successful analytical approaches to other domains
+
+Next Steps
+1. Conduct controlled experiments to test generated hypotheses
+2. Expand data sources to improve confidence scores
+3. Implement monitoring system for ongoing validation
+
+
\ No newline at end of file
diff --git a/reports/report_20251108_151408.markdown b/reports/report_20251108_151408.markdown
new file mode 100644
index 0000000..a5eead6
--- /dev/null
+++ b/reports/report_20251108_151408.markdown
@@ -0,0 +1,169 @@
+# AI Research Report
+
+**Report ID**: report_20251108_151408
+**Generated**: 2025-11-08T15:14:08.981878
+**Type**: research_report
+
+
+# Executive Summary
+
+## Key Insights
+1. Organizations implementing AI will witness a productivity increase between 20-40% (Confidence: 0.85)
+2. Structured AI training programs can accelerate ROI (Confidence: 0.80)
+3. Data quality issues are the primary factor hindering successful AI implementation (Confidence: 0.70)
+
+
+## Overall Confidence
+**Confidence Score: 0.50/1.00**
+
+## Primary Findings
+- Strong evidence for AI productivity benefits, but implementation challenges and conflicting metrics require careful consideration
+- Contradictions resolved: 0
+- Sources validated: 0
+
+## Immediate Implications
+- Survey SMEs to understand their concerns about AI adoption costs
+- Compare ROI timelines between organizations with and without structured AI training programs
+- Measure productivity before and after AI implementation
+- Implement data quality improvement measures and observe changes in AI success rate
+- Compare AI adoption rates between SMEs with different financial capabilities
+
+
+
+# Methodology
+
+## Data Sources
+This analysis utilized 3 distinct data sources including:
+- **Source Types**: industry_report, research_paper, news_article
+- **Time Range**: 2020-2024
+- **Geographic Scope**: Global
+
+## Analytical Approach
+The research employed a multi-agent AI system with the following specialized components:
+1. **Contextual Retriever**: Gathers and processes multi-source data
+2. **Critical Analysis**: Validates sources and identifies contradictions
+3. **Insight Generation**: Develops hypotheses and identifies trends
+4. **Report Builder**: Synthesizes findings into structured reports
+
+## Validation Framework
+All findings underwent rigorous validation including:
+- Source credibility assessment
+- Contradiction resolution
+- Confidence scoring
+- Reasoning chain verification
+
+
+
+# Key Findings
+
+## Primary Results
+- AI adoption correlates with 25-35% productivity increase in early adopters
+- Organizations with structured AI training programs show faster ROI
+- Data quality is the primary barrier to successful AI implementation
+- Small to medium enterprises lag in AI adoption due to cost concerns
+
+## Data Quality Assessment
+- **Sources Validated**: 3/4
+- **Contradictions Identified**: 3
+- **Confidence Level**: 0.82
+
+## Notable Patterns
+- Early adopters consistently outperform laggards
+- Training investment correlates with success metrics
+- Industry-specific variations in implementation success
+
+
+
+# Insights and Hypotheses
+
+## Generated Insights
+
+### Insight 1: Organizations implementing AI will witness a productivity increase between 20-40%
+
+**Confidence**: 0.85
+
+**Reasoning Chain**:
+- AI adoption correlates with productivity increase
+- Different studies show a range from 20% to 40%
+- All sources reporting this finding are validated
+
+**Testable Implications**:
+- Measure productivity before and after AI implementation
+- Compare productivity increase in AI-adopting organizations with those not adopting AI
+
+---
+
+### Insight 2: Structured AI training programs can accelerate ROI
+
+**Confidence**: 0.80
+
+**Reasoning Chain**:
+- Organizations with AI training programs show faster ROI
+- This implies that well-trained staff can utilize AI more effectively
+- The source reporting this finding is validated
+
+**Testable Implications**:
+- Compare ROI timelines between organizations with and without structured AI training programs
+
+---
+
+### Insight 3: Data quality issues are the primary factor hindering successful AI implementation
+
+**Confidence**: 0.70
+
+**Reasoning Chain**:
+- Data quality is identified as the main barrier
+- Poor quality data can lead to ineffective AI models
+- The source reporting this finding is validated
+
+**Testable Implications**:
+- Measure AI success rate in organizations with high and low data quality
+- Implement data quality improvement measures and observe changes in AI success rate
+
+---
+
+### Insight 4: Small to medium enterprises lag in AI adoption due to cost concerns
+
+**Confidence**: 0.90
+
+**Reasoning Chain**:
+- SMEs are reported to lag in AI adoption
+- Cost concerns are identified as the main reason
+- The source reporting this finding is validated
+
+**Testable Implications**:
+- Survey SMEs to understand their concerns about AI adoption costs
+- Compare AI adoption rates between SMEs with different financial capabilities
+
+---
+
+
+
+# Conclusions and Recommendations
+
+## Key Conclusions
+
+1. Organizations implementing AI will witness a productivity increase between 20-40% (Confidence: 0.85)
+
+2. Structured AI training programs can accelerate ROI (Confidence: 0.80)
+
+3. Data quality issues are the primary factor hindering successful AI implementation (Confidence: 0.70)
+
+## Recommendations
+
+### Immediate Actions
+1. **Validate Top Hypotheses**: Prioritize testing of high-confidence insights
+2. **Address Data Gaps**: Focus on areas with lower confidence scores
+3. **Monitor Key Indicators**: Establish tracking for critical variables
+
+### Strategic Considerations
+- Leverage identified patterns for predictive modeling
+- Address contradictions through targeted data collection
+- Scale successful analytical approaches to other domains
+
+## Next Steps
+1. Conduct controlled experiments to test generated hypotheses
+2. Expand data sources to improve confidence scores
+3. Implement monitoring system for ongoing validation
+
+
diff --git a/reports/report_20251108_151408.pdf b/reports/report_20251108_151408.pdf
new file mode 100644
index 0000000..a5eead6
Binary files /dev/null and b/reports/report_20251108_151408.pdf differ
diff --git a/reports/report_20251108_151408_metadata.json b/reports/report_20251108_151408_metadata.json
new file mode 100644
index 0000000..b6000f5
--- /dev/null
+++ b/reports/report_20251108_151408_metadata.json
@@ -0,0 +1,12 @@
+{
+ "report_id": "report_20251108_151408",
+ "generated_at": "2025-11-08T15:14:08.981878",
+ "report_type": "research_report",
+ "version": "1.0",
+ "agents_used": [
+ "contextual_retriever",
+ "critical_analysis",
+ "insight_generation",
+ "report_builder"
+ ]
+}
\ No newline at end of file
diff --git a/reports/report_20251108_151641.html b/reports/report_20251108_151641.html
new file mode 100644
index 0000000..7dad320
--- /dev/null
+++ b/reports/report_20251108_151641.html
@@ -0,0 +1,181 @@
+
+
+
+
+ AI Research Report - report_20251108_151641
+
+
+
+ AI Research Report
+
+
+Executive Summary
+
+Key Insights
+1. AI adoption leads to a significant increase in productivity in early adopters (Confidence: 0.80)
+2. Structured AI training programs accelerate ROI for organizations (Confidence: 0.85)
+3. Data quality issues present major obstacles to successful AI implementation (Confidence: 0.90)
+
+
+Overall Confidence
+Confidence Score: 0.50/1.00
+
+Primary Findings
+
Strong evidence for AI productivity benefits, but implementation challenges and conflicting metrics require careful consideration
+ Contradictions resolved: 0
+ Sources validated: 0
+
+Immediate Implications
+
Evaluating changes in AI adoption among SMEs when costs are lowered
+ Tracking productivity before and after AI adoption
+ Monitoring changes in ROI after introducing structured AI training
+ Comparing productivity of AI adopters vs non-adopters
+ Comparing ROI of organizations with and without AI training programs
+
+
+Methodology
+
+Data Sources
+This analysis utilized 3 distinct data sources including:
+
Source Types: industry_report, research_paper, news_article
+Time Range: 2020-2024
+Geographic Scope: Global
+
+Analytical Approach
+The research employed a multi-agent AI system with the following specialized components:
+1. Contextual Retriever: Gathers and processes multi-source data
+2. Critical Analysis: Validates sources and identifies contradictions
+3. Insight Generation: Develops hypotheses and identifies trends
+4. Report Builder: Synthesizes findings into structured reports
+
+Validation Framework
+All findings underwent rigorous validation including:
+
Source credibility assessment
+ Contradiction resolution
+ Confidence scoring
+ Reasoning chain verification
+
+
+Key Findings
+
+Primary Results
+
AI adoption correlates with 25-35% productivity increase in early adopters
+ Organizations with structured AI training programs show faster ROI
+ Data quality is the primary barrier to successful AI implementation
+ Small to medium enterprises lag in AI adoption due to cost concerns
+
+Data Quality Assessment
+
Sources Validated: 3/4
+Contradictions Identified: 3
+Confidence Level: 0.82
+
+Notable Patterns
+
Early adopters consistently outperform laggards
+ Training investment correlates with success metrics
+ Industry-specific variations in implementation success
+
+
+Insights and Hypotheses
+
+Generated Insights
+
+#Insight 1: AI adoption leads to a significant increase in productivity in early adopters
+
+Confidence: 0.80
+
+Reasoning Chain:
+AI adoption correlates with 25-35% productivity increase in early adopters
+ Contradictions identified in studies, but most still show a productivity boost
+ Validated sources support the correlation
+
+Testable Implications:
+Tracking productivity before and after AI adoption
+ Comparing productivity of AI adopters vs non-adopters
+
+---
+
+#Insight 2: Structured AI training programs accelerate ROI for organizations
+
+Confidence: 0.85
+
+Reasoning Chain:
+Organizations with structured AI training programs show faster ROI
+ Structured training likely ensures better usage and understanding of AI tools
+ Validated sources support this finding
+
+Testable Implications:
+Comparing ROI of organizations with and without AI training programs
+ Monitoring changes in ROI after introducing structured AI training
+
+---
+
+#Insight 3: Data quality issues present major obstacles to successful AI implementation
+
+Confidence: 0.90
+
+Reasoning Chain:
+Data quality is the primary barrier to successful AI implementation
+ AI requires high-quality data for optimal performance
+ Valid sources confirm this as a common challenge
+
+Testable Implications:
+Investigating impact of data quality on AI performance
+ Assessing change in AI success rates after improving data quality
+
+---
+
+#Insight 4: Cost concerns are a significant deterrent for AI adoption in small to medium enterprises
+
+Confidence: 0.75
+
+Reasoning Chain:
+Small to medium enterprises lag in AI adoption due to cost concerns
+ SMEs often have less resources for investment in technology
+ Validated sources support this finding
+
+Testable Implications:
+Surveying SMEs about their reasons for not adopting AI
+ Evaluating changes in AI adoption among SMEs when costs are lowered
+
+---
+
+
+Conclusions and Recommendations
+
+Key Conclusions
+
+1. AI adoption leads to a significant increase in productivity in early adopters (Confidence: 0.80)
+
+2. Structured AI training programs accelerate ROI for organizations (Confidence: 0.85)
+
+3. Data quality issues present major obstacles to successful AI implementation (Confidence: 0.90)
+
+Recommendations
+
+#Immediate Actions
+1. Validate Top Hypotheses: Prioritize testing of high-confidence insights
+2. Address Data Gaps: Focus on areas with lower confidence scores
+3. Monitor Key Indicators: Establish tracking for critical variables
+
+#Strategic Considerations
+
Leverage identified patterns for predictive modeling
+ Address contradictions through targeted data collection
+ Scale successful analytical approaches to other domains
+
+Next Steps
+1. Conduct controlled experiments to test generated hypotheses
+2. Expand data sources to improve confidence scores
+3. Implement monitoring system for ongoing validation
+
+
\ No newline at end of file
diff --git a/reports/report_20251108_151641.markdown b/reports/report_20251108_151641.markdown
new file mode 100644
index 0000000..9535c7c
--- /dev/null
+++ b/reports/report_20251108_151641.markdown
@@ -0,0 +1,170 @@
+# AI Research Report
+
+**Report ID**: report_20251108_151641
+**Generated**: 2025-11-08T15:16:41.483529
+**Type**: research_report
+
+
+# Executive Summary
+
+## Key Insights
+1. AI adoption leads to a significant increase in productivity in early adopters (Confidence: 0.80)
+2. Structured AI training programs accelerate ROI for organizations (Confidence: 0.85)
+3. Data quality issues present major obstacles to successful AI implementation (Confidence: 0.90)
+
+
+## Overall Confidence
+**Confidence Score: 0.50/1.00**
+
+## Primary Findings
+- Strong evidence for AI productivity benefits, but implementation challenges and conflicting metrics require careful consideration
+- Contradictions resolved: 0
+- Sources validated: 0
+
+## Immediate Implications
+- Evaluating changes in AI adoption among SMEs when costs are lowered
+- Tracking productivity before and after AI adoption
+- Monitoring changes in ROI after introducing structured AI training
+- Comparing productivity of AI adopters vs non-adopters
+- Comparing ROI of organizations with and without AI training programs
+
+
+
+# Methodology
+
+## Data Sources
+This analysis utilized 3 distinct data sources including:
+- **Source Types**: industry_report, research_paper, news_article
+- **Time Range**: 2020-2024
+- **Geographic Scope**: Global
+
+## Analytical Approach
+The research employed a multi-agent AI system with the following specialized components:
+1. **Contextual Retriever**: Gathers and processes multi-source data
+2. **Critical Analysis**: Validates sources and identifies contradictions
+3. **Insight Generation**: Develops hypotheses and identifies trends
+4. **Report Builder**: Synthesizes findings into structured reports
+
+## Validation Framework
+All findings underwent rigorous validation including:
+- Source credibility assessment
+- Contradiction resolution
+- Confidence scoring
+- Reasoning chain verification
+
+
+
+# Key Findings
+
+## Primary Results
+- AI adoption correlates with 25-35% productivity increase in early adopters
+- Organizations with structured AI training programs show faster ROI
+- Data quality is the primary barrier to successful AI implementation
+- Small to medium enterprises lag in AI adoption due to cost concerns
+
+## Data Quality Assessment
+- **Sources Validated**: 3/4
+- **Contradictions Identified**: 3
+- **Confidence Level**: 0.82
+
+## Notable Patterns
+- Early adopters consistently outperform laggards
+- Training investment correlates with success metrics
+- Industry-specific variations in implementation success
+
+
+
+# Insights and Hypotheses
+
+## Generated Insights
+
+### Insight 1: AI adoption leads to a significant increase in productivity in early adopters
+
+**Confidence**: 0.80
+
+**Reasoning Chain**:
+- AI adoption correlates with 25-35% productivity increase in early adopters
+- Contradictions identified in studies, but most still show a productivity boost
+- Validated sources support the correlation
+
+**Testable Implications**:
+- Tracking productivity before and after AI adoption
+- Comparing productivity of AI adopters vs non-adopters
+
+---
+
+### Insight 2: Structured AI training programs accelerate ROI for organizations
+
+**Confidence**: 0.85
+
+**Reasoning Chain**:
+- Organizations with structured AI training programs show faster ROI
+- Structured training likely ensures better usage and understanding of AI tools
+- Validated sources support this finding
+
+**Testable Implications**:
+- Comparing ROI of organizations with and without AI training programs
+- Monitoring changes in ROI after introducing structured AI training
+
+---
+
+### Insight 3: Data quality issues present major obstacles to successful AI implementation
+
+**Confidence**: 0.90
+
+**Reasoning Chain**:
+- Data quality is the primary barrier to successful AI implementation
+- AI requires high-quality data for optimal performance
+- Valid sources confirm this as a common challenge
+
+**Testable Implications**:
+- Investigating impact of data quality on AI performance
+- Assessing change in AI success rates after improving data quality
+
+---
+
+### Insight 4: Cost concerns are a significant deterrent for AI adoption in small to medium enterprises
+
+**Confidence**: 0.75
+
+**Reasoning Chain**:
+- Small to medium enterprises lag in AI adoption due to cost concerns
+- SMEs often have less resources for investment in technology
+- Validated sources support this finding
+
+**Testable Implications**:
+- Surveying SMEs about their reasons for not adopting AI
+- Evaluating changes in AI adoption among SMEs when costs are lowered
+
+---
+
+
+
+# Conclusions and Recommendations
+
+## Key Conclusions
+
+1. AI adoption leads to a significant increase in productivity in early adopters (Confidence: 0.80)
+
+2. Structured AI training programs accelerate ROI for organizations (Confidence: 0.85)
+
+3. Data quality issues present major obstacles to successful AI implementation (Confidence: 0.90)
+
+## Recommendations
+
+### Immediate Actions
+1. **Validate Top Hypotheses**: Prioritize testing of high-confidence insights
+2. **Address Data Gaps**: Focus on areas with lower confidence scores
+3. **Monitor Key Indicators**: Establish tracking for critical variables
+
+### Strategic Considerations
+- Leverage identified patterns for predictive modeling
+- Address contradictions through targeted data collection
+- Scale successful analytical approaches to other domains
+
+## Next Steps
+1. Conduct controlled experiments to test generated hypotheses
+2. Expand data sources to improve confidence scores
+3. Implement monitoring system for ongoing validation
+
+
diff --git a/reports/report_20251108_151641.pdf b/reports/report_20251108_151641.pdf
new file mode 100644
index 0000000..9535c7c
Binary files /dev/null and b/reports/report_20251108_151641.pdf differ
diff --git a/reports/report_20251108_151641_metadata.json b/reports/report_20251108_151641_metadata.json
new file mode 100644
index 0000000..af7c9ab
--- /dev/null
+++ b/reports/report_20251108_151641_metadata.json
@@ -0,0 +1,12 @@
+{
+ "report_id": "report_20251108_151641",
+ "generated_at": "2025-11-08T15:16:41.483529",
+ "report_type": "research_report",
+ "version": "1.0",
+ "agents_used": [
+ "contextual_retriever",
+ "critical_analysis",
+ "insight_generation",
+ "report_builder"
+ ]
+}
\ No newline at end of file
diff --git a/reports/report_20251108_151829.html b/reports/report_20251108_151829.html
new file mode 100644
index 0000000..56d89cd
--- /dev/null
+++ b/reports/report_20251108_151829.html
@@ -0,0 +1,177 @@
+
+
+
+
+ AI Research Report - report_20251108_151829
+
+
+
+ AI Research Report
+
+
+Executive Summary
+
+Key Insights
+1. Companies that adopt AI early on will see a significant increase in productivity. (Confidence: 0.80)
+2. Companies with structured AI training programs will experience faster returns on their investments. (Confidence: 0.85)
+3. Data quality is a major impediment to successful AI implementation. (Confidence: 0.90)
+
+
+Overall Confidence
+Confidence Score: 0.50/1.00
+
+Primary Findings
+
Strong evidence for AI productivity benefits, but implementation challenges and conflicting metrics require careful consideration
+ Contradictions resolved: 0
+ Sources validated: 0
+
+Immediate Implications
+
Delaying AI adoption could result in lower productivity compared to early adopters
+ Poor data quality could result in failed AI projects
+ Lack of structured AI training could delay ROI
+ SMEs with larger budgets should be more likely to adopt AI
+ Reducing implementation costs should increase AI adoption in SMEs
+
+
+Methodology
+
+Data Sources
+This analysis utilized 3 distinct data sources including:
+
Source Types: industry_report, research_paper, news_article
+Time Range: 2020-2024
+Geographic Scope: Global
+
+Analytical Approach
+The research employed a multi-agent AI system with the following specialized components:
+1. Contextual Retriever: Gathers and processes multi-source data
+2. Critical Analysis: Validates sources and identifies contradictions
+3. Insight Generation: Develops hypotheses and identifies trends
+4. Report Builder: Synthesizes findings into structured reports
+
+Validation Framework
+All findings underwent rigorous validation including:
+
Source credibility assessment
+ Contradiction resolution
+ Confidence scoring
+ Reasoning chain verification
+
+
+Key Findings
+
+Primary Results
+
AI adoption correlates with 25-35% productivity increase in early adopters
+ Organizations with structured AI training programs show faster ROI
+ Data quality is the primary barrier to successful AI implementation
+ Small to medium enterprises lag in AI adoption due to cost concerns
+
+Data Quality Assessment
+
Sources Validated: 3/4
+Contradictions Identified: 3
+Confidence Level: 0.82
+
+Notable Patterns
+
Early adopters consistently outperform laggards
+ Training investment correlates with success metrics
+ Industry-specific variations in implementation success
+
+
+Insights and Hypotheses
+
+Generated Insights
+
+#Insight 1: Companies that adopt AI early on will see a significant increase in productivity.
+
+Confidence: 0.80
+
+Reasoning Chain:
+AI adoption correlates with productivity increase
+ AI early adopters show 25-35% productivity increase
+
+Testable Implications:
+Early AI adopters should show higher productivity metrics
+ Delaying AI adoption could result in lower productivity compared to early adopters
+
+---
+
+#Insight 2: Companies with structured AI training programs will experience faster returns on their investments.
+
+Confidence: 0.85
+
+Reasoning Chain:
+Organizations with structured AI training programs show faster ROI
+ Effective training could expedite understanding and implementation of AI
+
+Testable Implications:
+Companies with AI training programs should exhibit faster ROI
+ Lack of structured AI training could delay ROI
+
+---
+
+#Insight 3: Data quality is a major impediment to successful AI implementation.
+
+Confidence: 0.90
+
+Reasoning Chain:
+Data quality is identified as the primary barrier to AI implementation
+ Poor data can lead to ineffective AI models and applications
+
+Testable Implications:
+Improvements in data quality should correlate with successful AI implementation
+ Poor data quality could result in failed AI projects
+
+---
+
+#Insight 4: Cost is a significant factor preventing small to medium enterprises from adopting AI.
+
+Confidence: 0.75
+
+Reasoning Chain:
+Small to medium enterprises lag in AI adoption due to cost concerns
+ High costs of AI implementation could be prohibitive for smaller businesses
+
+Testable Implications:
+Reducing implementation costs should increase AI adoption in SMEs
+ SMEs with larger budgets should be more likely to adopt AI
+
+---
+
+
+Conclusions and Recommendations
+
+Key Conclusions
+
+1. Companies that adopt AI early on will see a significant increase in productivity. (Confidence: 0.80)
+
+2. Companies with structured AI training programs will experience faster returns on their investments. (Confidence: 0.85)
+
+3. Data quality is a major impediment to successful AI implementation. (Confidence: 0.90)
+
+Recommendations
+
+#Immediate Actions
+1. Validate Top Hypotheses: Prioritize testing of high-confidence insights
+2. Address Data Gaps: Focus on areas with lower confidence scores
+3. Monitor Key Indicators: Establish tracking for critical variables
+
+#Strategic Considerations
+
Leverage identified patterns for predictive modeling
+ Address contradictions through targeted data collection
+ Scale successful analytical approaches to other domains
+
+Next Steps
+1. Conduct controlled experiments to test generated hypotheses
+2. Expand data sources to improve confidence scores
+3. Implement monitoring system for ongoing validation
+
+
\ No newline at end of file
diff --git a/reports/report_20251108_151829.markdown b/reports/report_20251108_151829.markdown
new file mode 100644
index 0000000..4285545
--- /dev/null
+++ b/reports/report_20251108_151829.markdown
@@ -0,0 +1,166 @@
+# AI Research Report
+
+**Report ID**: report_20251108_151829
+**Generated**: 2025-11-08T15:18:29.696827
+**Type**: research_report
+
+
+# Executive Summary
+
+## Key Insights
+1. Companies that adopt AI early on will see a significant increase in productivity. (Confidence: 0.80)
+2. Companies with structured AI training programs will experience faster returns on their investments. (Confidence: 0.85)
+3. Data quality is a major impediment to successful AI implementation. (Confidence: 0.90)
+
+
+## Overall Confidence
+**Confidence Score: 0.50/1.00**
+
+## Primary Findings
+- Strong evidence for AI productivity benefits, but implementation challenges and conflicting metrics require careful consideration
+- Contradictions resolved: 0
+- Sources validated: 0
+
+## Immediate Implications
+- Delaying AI adoption could result in lower productivity compared to early adopters
+- Poor data quality could result in failed AI projects
+- Lack of structured AI training could delay ROI
+- SMEs with larger budgets should be more likely to adopt AI
+- Reducing implementation costs should increase AI adoption in SMEs
+
+
+
+# Methodology
+
+## Data Sources
+This analysis utilized 3 distinct data sources including:
+- **Source Types**: industry_report, research_paper, news_article
+- **Time Range**: 2020-2024
+- **Geographic Scope**: Global
+
+## Analytical Approach
+The research employed a multi-agent AI system with the following specialized components:
+1. **Contextual Retriever**: Gathers and processes multi-source data
+2. **Critical Analysis**: Validates sources and identifies contradictions
+3. **Insight Generation**: Develops hypotheses and identifies trends
+4. **Report Builder**: Synthesizes findings into structured reports
+
+## Validation Framework
+All findings underwent rigorous validation including:
+- Source credibility assessment
+- Contradiction resolution
+- Confidence scoring
+- Reasoning chain verification
+
+
+
+# Key Findings
+
+## Primary Results
+- AI adoption correlates with 25-35% productivity increase in early adopters
+- Organizations with structured AI training programs show faster ROI
+- Data quality is the primary barrier to successful AI implementation
+- Small to medium enterprises lag in AI adoption due to cost concerns
+
+## Data Quality Assessment
+- **Sources Validated**: 3/4
+- **Contradictions Identified**: 3
+- **Confidence Level**: 0.82
+
+## Notable Patterns
+- Early adopters consistently outperform laggards
+- Training investment correlates with success metrics
+- Industry-specific variations in implementation success
+
+
+
+# Insights and Hypotheses
+
+## Generated Insights
+
+### Insight 1: Companies that adopt AI early on will see a significant increase in productivity.
+
+**Confidence**: 0.80
+
+**Reasoning Chain**:
+- AI adoption correlates with productivity increase
+- AI early adopters show 25-35% productivity increase
+
+**Testable Implications**:
+- Early AI adopters should show higher productivity metrics
+- Delaying AI adoption could result in lower productivity compared to early adopters
+
+---
+
+### Insight 2: Companies with structured AI training programs will experience faster returns on their investments.
+
+**Confidence**: 0.85
+
+**Reasoning Chain**:
+- Organizations with structured AI training programs show faster ROI
+- Effective training could expedite understanding and implementation of AI
+
+**Testable Implications**:
+- Companies with AI training programs should exhibit faster ROI
+- Lack of structured AI training could delay ROI
+
+---
+
+### Insight 3: Data quality is a major impediment to successful AI implementation.
+
+**Confidence**: 0.90
+
+**Reasoning Chain**:
+- Data quality is identified as the primary barrier to AI implementation
+- Poor data can lead to ineffective AI models and applications
+
+**Testable Implications**:
+- Improvements in data quality should correlate with successful AI implementation
+- Poor data quality could result in failed AI projects
+
+---
+
+### Insight 4: Cost is a significant factor preventing small to medium enterprises from adopting AI.
+
+**Confidence**: 0.75
+
+**Reasoning Chain**:
+- Small to medium enterprises lag in AI adoption due to cost concerns
+- High costs of AI implementation could be prohibitive for smaller businesses
+
+**Testable Implications**:
+- Reducing implementation costs should increase AI adoption in SMEs
+- SMEs with larger budgets should be more likely to adopt AI
+
+---
+
+
+
+# Conclusions and Recommendations
+
+## Key Conclusions
+
+1. Companies that adopt AI early on will see a significant increase in productivity. (Confidence: 0.80)
+
+2. Companies with structured AI training programs will experience faster returns on their investments. (Confidence: 0.85)
+
+3. Data quality is a major impediment to successful AI implementation. (Confidence: 0.90)
+
+## Recommendations
+
+### Immediate Actions
+1. **Validate Top Hypotheses**: Prioritize testing of high-confidence insights
+2. **Address Data Gaps**: Focus on areas with lower confidence scores
+3. **Monitor Key Indicators**: Establish tracking for critical variables
+
+### Strategic Considerations
+- Leverage identified patterns for predictive modeling
+- Address contradictions through targeted data collection
+- Scale successful analytical approaches to other domains
+
+## Next Steps
+1. Conduct controlled experiments to test generated hypotheses
+2. Expand data sources to improve confidence scores
+3. Implement monitoring system for ongoing validation
+
+
diff --git a/reports/report_20251108_151829.pdf b/reports/report_20251108_151829.pdf
new file mode 100644
index 0000000..4285545
Binary files /dev/null and b/reports/report_20251108_151829.pdf differ
diff --git a/reports/report_20251108_151829_metadata.json b/reports/report_20251108_151829_metadata.json
new file mode 100644
index 0000000..e50768d
--- /dev/null
+++ b/reports/report_20251108_151829_metadata.json
@@ -0,0 +1,12 @@
+{
+ "report_id": "report_20251108_151829",
+ "generated_at": "2025-11-08T15:18:29.696827",
+ "report_type": "research_report",
+ "version": "1.0",
+ "agents_used": [
+ "contextual_retriever",
+ "critical_analysis",
+ "insight_generation",
+ "report_builder"
+ ]
+}
\ No newline at end of file
diff --git a/reports/report_20251108_152823.html b/reports/report_20251108_152823.html
new file mode 100644
index 0000000..af5256c
--- /dev/null
+++ b/reports/report_20251108_152823.html
@@ -0,0 +1,178 @@
+
+
+
+
+ AI Research Report - report_20251108_152823
+
+
+
+ AI Research Report
+
+
+Executive Summary
+
+Key Insights
+1. Implementation of AI is likely to increase productivity in businesses by around 28%, especially in the initial stages of adoption. (Confidence: 0.70)
+2. The quality of data used in AI is a major determinant of the success of AI implementation. (Confidence: 0.85)
+3. Small to medium enterprises (SMEs) are less likely to adopt AI due to cost issues. (Confidence: 0.90)
+
+
+Overall Confidence
+Confidence Score: 0.50/1.00
+
+Primary Findings
+
Strong evidence for AI productivity benefits, but implementation challenges and conflicting metrics require careful consideration
+ Contradictions resolved: 0
+ Sources validated: 0
+
+Immediate Implications
+
Companies with high-quality data should have more successful AI implementation compared to those with lower-quality data
+ Companies with structured AI training programs should realize ROI faster than those without such programs
+ The productivity increase should be more significant in the early stages of AI adoption
+ Implementing a structured AI training program should reduce the time it takes for a company to realize ROI from AI
+ Reducing the cost of AI implementation should increase the rate of AI adoption among SMEs
+
+
+Methodology
+
+Data Sources
+This analysis utilized 3 distinct data sources including:
+
Source Types: industry_report, research_paper, news_article
+Time Range: 2020-2024
+Geographic Scope: Global
+
+Analytical Approach
+The research employed a multi-agent AI system with the following specialized components:
+1. Contextual Retriever: Gathers and processes multi-source data
+2. Critical Analysis: Validates sources and identifies contradictions
+3. Insight Generation: Develops hypotheses and identifies trends
+4. Report Builder: Synthesizes findings into structured reports
+
+Validation Framework
+All findings underwent rigorous validation including:
+
Source credibility assessment
+ Contradiction resolution
+ Confidence scoring
+ Reasoning chain verification
+
+
+Key Findings
+
+Primary Results
+
AI adoption correlates with 25-35% productivity increase in early adopters
+ Organizations with structured AI training programs show faster ROI
+ Data quality is the primary barrier to successful AI implementation
+ Small to medium enterprises lag in AI adoption due to cost concerns
+
+Data Quality Assessment
+
Sources Validated: 3/4
+Contradictions Identified: 3
+Confidence Level: 0.82
+
+Notable Patterns
+
Early adopters consistently outperform laggards
+ Training investment correlates with success metrics
+ Industry-specific variations in implementation success
+
+
+Insights and Hypotheses
+
+Generated Insights
+
+#Insight 1: Implementation of AI is likely to increase productivity in businesses by around 28%, especially in the initial stages of adoption.
+
+Confidence: 0.70
+
+Reasoning Chain:
+Early adopters of AI have seen a productivity increase of between 25-35%
+ The conflicting reports of a 40% and 20% increase could average to approximately 28%
+ The varying reports could be due to different methods of measuring productivity or different stages of AI implementation
+
+Testable Implications:
+Companies that have recently implemented AI should see a noticeable increase in productivity
+ The productivity increase should be more significant in the early stages of AI adoption
+
+---
+
+#Insight 2: The quality of data used in AI is a major determinant of the success of AI implementation.
+
+Confidence: 0.85
+
+Reasoning Chain:
+Data quality has been identified as the primary barrier to successful AI implementation
+ Therefore, the quality of data used in AI likely has a significant impact on the outcomes of AI implementation
+
+Testable Implications:
+Companies with high-quality data should have more successful AI implementation compared to those with lower-quality data
+ Improving data quality should increase the success rate of AI implementation
+
+---
+
+#Insight 3: Small to medium enterprises (SMEs) are less likely to adopt AI due to cost issues.
+
+Confidence: 0.90
+
+Reasoning Chain:
+SMEs have been identified as lagging in AI adoption due to cost concerns
+ Therefore, the cost of AI implementation is likely a significant barrier to AI adoption for SMEs
+
+Testable Implications:
+SMEs should be less likely to adopt AI compared to larger companies
+ Reducing the cost of AI implementation should increase the rate of AI adoption among SMEs
+
+---
+
+#Insight 4: Organizations with structured AI training programs are likely to realize the return on investment (ROI) faster.
+
+Confidence: 0.80
+
+Reasoning Chain:
+Organizations with structured AI training programs have been identified as showing faster ROI
+ Therefore, the presence of a structured AI training program likely impacts the speed at which companies realize the ROI from AI
+
+Testable Implications:
+Companies with structured AI training programs should realize ROI faster than those without such programs
+ Implementing a structured AI training program should reduce the time it takes for a company to realize ROI from AI
+
+---
+
+
+Conclusions and Recommendations
+
+Key Conclusions
+
+1. Implementation of AI is likely to increase productivity in businesses by around 28%, especially in the initial stages of adoption. (Confidence: 0.70)
+
+2. The quality of data used in AI is a major determinant of the success of AI implementation. (Confidence: 0.85)
+
+3. Small to medium enterprises (SMEs) are less likely to adopt AI due to cost issues. (Confidence: 0.90)
+
+Recommendations
+
+#Immediate Actions
+1. Validate Top Hypotheses: Prioritize testing of high-confidence insights
+2. Address Data Gaps: Focus on areas with lower confidence scores
+3. Monitor Key Indicators: Establish tracking for critical variables
+
+#Strategic Considerations
+
Leverage identified patterns for predictive modeling
+ Address contradictions through targeted data collection
+ Scale successful analytical approaches to other domains
+
+Next Steps
+1. Conduct controlled experiments to test generated hypotheses
+2. Expand data sources to improve confidence scores
+3. Implement monitoring system for ongoing validation
+
+
\ No newline at end of file
diff --git a/reports/report_20251108_152823.markdown b/reports/report_20251108_152823.markdown
new file mode 100644
index 0000000..c9ca8c1
--- /dev/null
+++ b/reports/report_20251108_152823.markdown
@@ -0,0 +1,167 @@
+# AI Research Report
+
+**Report ID**: report_20251108_152823
+**Generated**: 2025-11-08T15:28:23.155737
+**Type**: research_report
+
+
+# Executive Summary
+
+## Key Insights
+1. Implementation of AI is likely to increase productivity in businesses by around 28%, especially in the initial stages of adoption. (Confidence: 0.70)
+2. The quality of data used in AI is a major determinant of the success of AI implementation. (Confidence: 0.85)
+3. Small to medium enterprises (SMEs) are less likely to adopt AI due to cost issues. (Confidence: 0.90)
+
+
+## Overall Confidence
+**Confidence Score: 0.50/1.00**
+
+## Primary Findings
+- Strong evidence for AI productivity benefits, but implementation challenges and conflicting metrics require careful consideration
+- Contradictions resolved: 0
+- Sources validated: 0
+
+## Immediate Implications
+- Companies with high-quality data should have more successful AI implementation compared to those with lower-quality data
+- Companies with structured AI training programs should realize ROI faster than those without such programs
+- The productivity increase should be more significant in the early stages of AI adoption
+- Implementing a structured AI training program should reduce the time it takes for a company to realize ROI from AI
+- Reducing the cost of AI implementation should increase the rate of AI adoption among SMEs
+
+
+
+# Methodology
+
+## Data Sources
+This analysis utilized 3 distinct data sources including:
+- **Source Types**: industry_report, research_paper, news_article
+- **Time Range**: 2020-2024
+- **Geographic Scope**: Global
+
+## Analytical Approach
+The research employed a multi-agent AI system with the following specialized components:
+1. **Contextual Retriever**: Gathers and processes multi-source data
+2. **Critical Analysis**: Validates sources and identifies contradictions
+3. **Insight Generation**: Develops hypotheses and identifies trends
+4. **Report Builder**: Synthesizes findings into structured reports
+
+## Validation Framework
+All findings underwent rigorous validation including:
+- Source credibility assessment
+- Contradiction resolution
+- Confidence scoring
+- Reasoning chain verification
+
+
+
+# Key Findings
+
+## Primary Results
+- AI adoption correlates with 25-35% productivity increase in early adopters
+- Organizations with structured AI training programs show faster ROI
+- Data quality is the primary barrier to successful AI implementation
+- Small to medium enterprises lag in AI adoption due to cost concerns
+
+## Data Quality Assessment
+- **Sources Validated**: 3/4
+- **Contradictions Identified**: 3
+- **Confidence Level**: 0.82
+
+## Notable Patterns
+- Early adopters consistently outperform laggards
+- Training investment correlates with success metrics
+- Industry-specific variations in implementation success
+
+
+
+# Insights and Hypotheses
+
+## Generated Insights
+
+### Insight 1: Implementation of AI is likely to increase productivity in businesses by around 28%, especially in the initial stages of adoption.
+
+**Confidence**: 0.70
+
+**Reasoning Chain**:
+- Early adopters of AI have seen a productivity increase of between 25-35%
+- The conflicting reports of a 40% and 20% increase could average to approximately 28%
+- The varying reports could be due to different methods of measuring productivity or different stages of AI implementation
+
+**Testable Implications**:
+- Companies that have recently implemented AI should see a noticeable increase in productivity
+- The productivity increase should be more significant in the early stages of AI adoption
+
+---
+
+### Insight 2: The quality of data used in AI is a major determinant of the success of AI implementation.
+
+**Confidence**: 0.85
+
+**Reasoning Chain**:
+- Data quality has been identified as the primary barrier to successful AI implementation
+- Therefore, the quality of data used in AI likely has a significant impact on the outcomes of AI implementation
+
+**Testable Implications**:
+- Companies with high-quality data should have more successful AI implementation compared to those with lower-quality data
+- Improving data quality should increase the success rate of AI implementation
+
+---
+
+### Insight 3: Small to medium enterprises (SMEs) are less likely to adopt AI due to cost issues.
+
+**Confidence**: 0.90
+
+**Reasoning Chain**:
+- SMEs have been identified as lagging in AI adoption due to cost concerns
+- Therefore, the cost of AI implementation is likely a significant barrier to AI adoption for SMEs
+
+**Testable Implications**:
+- SMEs should be less likely to adopt AI compared to larger companies
+- Reducing the cost of AI implementation should increase the rate of AI adoption among SMEs
+
+---
+
+### Insight 4: Organizations with structured AI training programs are likely to realize the return on investment (ROI) faster.
+
+**Confidence**: 0.80
+
+**Reasoning Chain**:
+- Organizations with structured AI training programs have been identified as showing faster ROI
+- Therefore, the presence of a structured AI training program likely impacts the speed at which companies realize the ROI from AI
+
+**Testable Implications**:
+- Companies with structured AI training programs should realize ROI faster than those without such programs
+- Implementing a structured AI training program should reduce the time it takes for a company to realize ROI from AI
+
+---
+
+
+
+# Conclusions and Recommendations
+
+## Key Conclusions
+
+1. Implementation of AI is likely to increase productivity in businesses by around 28%, especially in the initial stages of adoption. (Confidence: 0.70)
+
+2. The quality of data used in AI is a major determinant of the success of AI implementation. (Confidence: 0.85)
+
+3. Small to medium enterprises (SMEs) are less likely to adopt AI due to cost issues. (Confidence: 0.90)
+
+## Recommendations
+
+### Immediate Actions
+1. **Validate Top Hypotheses**: Prioritize testing of high-confidence insights
+2. **Address Data Gaps**: Focus on areas with lower confidence scores
+3. **Monitor Key Indicators**: Establish tracking for critical variables
+
+### Strategic Considerations
+- Leverage identified patterns for predictive modeling
+- Address contradictions through targeted data collection
+- Scale successful analytical approaches to other domains
+
+## Next Steps
+1. Conduct controlled experiments to test generated hypotheses
+2. Expand data sources to improve confidence scores
+3. Implement monitoring system for ongoing validation
+
+
diff --git a/reports/report_20251108_152823.pdf b/reports/report_20251108_152823.pdf
new file mode 100644
index 0000000..c9ca8c1
Binary files /dev/null and b/reports/report_20251108_152823.pdf differ
diff --git a/reports/report_20251108_152823_metadata.json b/reports/report_20251108_152823_metadata.json
new file mode 100644
index 0000000..22ea338
--- /dev/null
+++ b/reports/report_20251108_152823_metadata.json
@@ -0,0 +1,12 @@
+{
+ "report_id": "report_20251108_152823",
+ "generated_at": "2025-11-08T15:28:23.155737",
+ "report_type": "research_report",
+ "version": "1.0",
+ "agents_used": [
+ "contextual_retriever",
+ "critical_analysis",
+ "insight_generation",
+ "report_builder"
+ ]
+}
\ No newline at end of file
diff --git a/reports/report_20251108_153040.html b/reports/report_20251108_153040.html
new file mode 100644
index 0000000..64391cc
--- /dev/null
+++ b/reports/report_20251108_153040.html
@@ -0,0 +1,177 @@
+
+
+
+
+ AI Research Report - report_20251108_153040
+
+
+
+ AI Research Report
+
+
+Executive Summary
+
+Key Insights
+1. Early adoption of AI leads to a significant increase in productivity (Confidence: 0.85)
+2. Organizations with structured AI training programs achieve faster ROI (Confidence: 0.90)
+3. Data quality is the primary obstacle to successful AI implementation (Confidence: 0.95)
+
+
+Overall Confidence
+Confidence Score: 0.50/1.00
+
+Primary Findings
+
Strong evidence for AI productivity benefits, but implementation challenges and conflicting metrics require careful consideration
+ Contradictions resolved: 0
+ Sources validated: 0
+
+Immediate Implications
+
AI implementations with poor data quality are likely to fail
+ SMEs with more resources should be more likely to adopt AI
+ Productivity should increase following AI implementation
+ Organizations with structured AI training should show faster ROI on AI investments
+ Early adopters should see the most significant increases
+
+
+Methodology
+
+Data Sources
+This analysis utilized 3 distinct data sources including:
+
Source Types: industry_report, research_paper, news_article
+Time Range: 2020-2024
+Geographic Scope: Global
+
+Analytical Approach
+The research employed a multi-agent AI system with the following specialized components:
+1. Contextual Retriever: Gathers and processes multi-source data
+2. Critical Analysis: Validates sources and identifies contradictions
+3. Insight Generation: Develops hypotheses and identifies trends
+4. Report Builder: Synthesizes findings into structured reports
+
+Validation Framework
+All findings underwent rigorous validation including:
+
Source credibility assessment
+ Contradiction resolution
+ Confidence scoring
+ Reasoning chain verification
+
+
+Key Findings
+
+Primary Results
+
AI adoption correlates with 25-35% productivity increase in early adopters
+ Organizations with structured AI training programs show faster ROI
+ Data quality is the primary barrier to successful AI implementation
+ Small to medium enterprises lag in AI adoption due to cost concerns
+
+Data Quality Assessment
+
Sources Validated: 3/4
+Contradictions Identified: 3
+Confidence Level: 0.82
+
+Notable Patterns
+
Early adopters consistently outperform laggards
+ Training investment correlates with success metrics
+ Industry-specific variations in implementation success
+
+
+Insights and Hypotheses
+
+Generated Insights
+
+#Insight 1: Early adoption of AI leads to a significant increase in productivity
+
+Confidence: 0.85
+
+Reasoning Chain:
+AI adoption correlates with 25-35% productivity increase in early adopters
+ Contradiction in studies A and B suggest the exact percentage increase may vary, but the trend is consistent
+
+Testable Implications:
+Productivity should increase following AI implementation
+ Early adopters should see the most significant increases
+
+---
+
+#Insight 2: Organizations with structured AI training programs achieve faster ROI
+
+Confidence: 0.90
+
+Reasoning Chain:
+Organizations with structured AI training programs show faster ROI
+ This suggests that effective training is key to leveraging AI tools to their full potential
+
+Testable Implications:
+Organizations with structured AI training should show faster ROI on AI investments
+ The lack of a structured training program should delay ROI realization
+
+---
+
+#Insight 3: Data quality is the primary obstacle to successful AI implementation
+
+Confidence: 0.95
+
+Reasoning Chain:
+Data quality is the primary barrier to successful AI implementation
+ This suggests that without high-quality data, AI tools cannot function effectively
+
+Testable Implications:
+Improvements in data quality should lead to more successful AI implementations
+ AI implementations with poor data quality are likely to fail
+
+---
+
+#Insight 4: Small to medium enterprises lag in AI adoption due to cost concerns
+
+Confidence: 0.80
+
+Reasoning Chain:
+Small to medium enterprises lag in AI adoption due to cost concerns
+ This suggests that the cost of AI tools is a significant barrier for smaller organizations
+
+Testable Implications:
+Lowering the cost of AI tools should increase adoption rates among SMEs
+ SMEs with more resources should be more likely to adopt AI
+
+---
+
+
+Conclusions and Recommendations
+
+Key Conclusions
+
+1. Early adoption of AI leads to a significant increase in productivity (Confidence: 0.85)
+
+2. Organizations with structured AI training programs achieve faster ROI (Confidence: 0.90)
+
+3. Data quality is the primary obstacle to successful AI implementation (Confidence: 0.95)
+
+Recommendations
+
+#Immediate Actions
+1. Validate Top Hypotheses: Prioritize testing of high-confidence insights
+2. Address Data Gaps: Focus on areas with lower confidence scores
+3. Monitor Key Indicators: Establish tracking for critical variables
+
+#Strategic Considerations
+
Leverage identified patterns for predictive modeling
+ Address contradictions through targeted data collection
+ Scale successful analytical approaches to other domains
+
+Next Steps
+1. Conduct controlled experiments to test generated hypotheses
+2. Expand data sources to improve confidence scores
+3. Implement monitoring system for ongoing validation
+
+
\ No newline at end of file
diff --git a/reports/report_20251108_153040.markdown b/reports/report_20251108_153040.markdown
new file mode 100644
index 0000000..d984c3f
--- /dev/null
+++ b/reports/report_20251108_153040.markdown
@@ -0,0 +1,166 @@
+# AI Research Report
+
+**Report ID**: report_20251108_153040
+**Generated**: 2025-11-08T15:30:40.732026
+**Type**: research_report
+
+
+# Executive Summary
+
+## Key Insights
+1. Early adoption of AI leads to a significant increase in productivity (Confidence: 0.85)
+2. Organizations with structured AI training programs achieve faster ROI (Confidence: 0.90)
+3. Data quality is the primary obstacle to successful AI implementation (Confidence: 0.95)
+
+
+## Overall Confidence
+**Confidence Score: 0.50/1.00**
+
+## Primary Findings
+- Strong evidence for AI productivity benefits, but implementation challenges and conflicting metrics require careful consideration
+- Contradictions resolved: 0
+- Sources validated: 0
+
+## Immediate Implications
+- AI implementations with poor data quality are likely to fail
+- SMEs with more resources should be more likely to adopt AI
+- Productivity should increase following AI implementation
+- Organizations with structured AI training should show faster ROI on AI investments
+- Early adopters should see the most significant increases
+
+
+
+# Methodology
+
+## Data Sources
+This analysis utilized 3 distinct data sources including:
+- **Source Types**: industry_report, research_paper, news_article
+- **Time Range**: 2020-2024
+- **Geographic Scope**: Global
+
+## Analytical Approach
+The research employed a multi-agent AI system with the following specialized components:
+1. **Contextual Retriever**: Gathers and processes multi-source data
+2. **Critical Analysis**: Validates sources and identifies contradictions
+3. **Insight Generation**: Develops hypotheses and identifies trends
+4. **Report Builder**: Synthesizes findings into structured reports
+
+## Validation Framework
+All findings underwent rigorous validation including:
+- Source credibility assessment
+- Contradiction resolution
+- Confidence scoring
+- Reasoning chain verification
+
+
+
+# Key Findings
+
+## Primary Results
+- AI adoption correlates with 25-35% productivity increase in early adopters
+- Organizations with structured AI training programs show faster ROI
+- Data quality is the primary barrier to successful AI implementation
+- Small to medium enterprises lag in AI adoption due to cost concerns
+
+## Data Quality Assessment
+- **Sources Validated**: 3/4
+- **Contradictions Identified**: 3
+- **Confidence Level**: 0.82
+
+## Notable Patterns
+- Early adopters consistently outperform laggards
+- Training investment correlates with success metrics
+- Industry-specific variations in implementation success
+
+
+
+# Insights and Hypotheses
+
+## Generated Insights
+
+### Insight 1: Early adoption of AI leads to a significant increase in productivity
+
+**Confidence**: 0.85
+
+**Reasoning Chain**:
+- AI adoption correlates with 25-35% productivity increase in early adopters
+- Contradiction in studies A and B suggest the exact percentage increase may vary, but the trend is consistent
+
+**Testable Implications**:
+- Productivity should increase following AI implementation
+- Early adopters should see the most significant increases
+
+---
+
+### Insight 2: Organizations with structured AI training programs achieve faster ROI
+
+**Confidence**: 0.90
+
+**Reasoning Chain**:
+- Organizations with structured AI training programs show faster ROI
+- This suggests that effective training is key to leveraging AI tools to their full potential
+
+**Testable Implications**:
+- Organizations with structured AI training should show faster ROI on AI investments
+- The lack of a structured training program should delay ROI realization
+
+---
+
+### Insight 3: Data quality is the primary obstacle to successful AI implementation
+
+**Confidence**: 0.95
+
+**Reasoning Chain**:
+- Data quality is the primary barrier to successful AI implementation
+- This suggests that without high-quality data, AI tools cannot function effectively
+
+**Testable Implications**:
+- Improvements in data quality should lead to more successful AI implementations
+- AI implementations with poor data quality are likely to fail
+
+---
+
+### Insight 4: Small to medium enterprises lag in AI adoption due to cost concerns
+
+**Confidence**: 0.80
+
+**Reasoning Chain**:
+- Small to medium enterprises lag in AI adoption due to cost concerns
+- This suggests that the cost of AI tools is a significant barrier for smaller organizations
+
+**Testable Implications**:
+- Lowering the cost of AI tools should increase adoption rates among SMEs
+- SMEs with more resources should be more likely to adopt AI
+
+---
+
+
+
+# Conclusions and Recommendations
+
+## Key Conclusions
+
+1. Early adoption of AI leads to a significant increase in productivity (Confidence: 0.85)
+
+2. Organizations with structured AI training programs achieve faster ROI (Confidence: 0.90)
+
+3. Data quality is the primary obstacle to successful AI implementation (Confidence: 0.95)
+
+## Recommendations
+
+### Immediate Actions
+1. **Validate Top Hypotheses**: Prioritize testing of high-confidence insights
+2. **Address Data Gaps**: Focus on areas with lower confidence scores
+3. **Monitor Key Indicators**: Establish tracking for critical variables
+
+### Strategic Considerations
+- Leverage identified patterns for predictive modeling
+- Address contradictions through targeted data collection
+- Scale successful analytical approaches to other domains
+
+## Next Steps
+1. Conduct controlled experiments to test generated hypotheses
+2. Expand data sources to improve confidence scores
+3. Implement monitoring system for ongoing validation
+
+
diff --git a/reports/report_20251108_153040.pdf b/reports/report_20251108_153040.pdf
new file mode 100644
index 0000000..d984c3f
Binary files /dev/null and b/reports/report_20251108_153040.pdf differ
diff --git a/reports/report_20251108_153040_metadata.json b/reports/report_20251108_153040_metadata.json
new file mode 100644
index 0000000..2bfb372
--- /dev/null
+++ b/reports/report_20251108_153040_metadata.json
@@ -0,0 +1,12 @@
+{
+ "report_id": "report_20251108_153040",
+ "generated_at": "2025-11-08T15:30:40.732026",
+ "report_type": "research_report",
+ "version": "1.0",
+ "agents_used": [
+ "contextual_retriever",
+ "critical_analysis",
+ "insight_generation",
+ "report_builder"
+ ]
+}
\ No newline at end of file
diff --git a/reports/report_20251108_153424.html b/reports/report_20251108_153424.html
new file mode 100644
index 0000000..185536c
--- /dev/null
+++ b/reports/report_20251108_153424.html
@@ -0,0 +1,164 @@
+
+
+
+
+ AI Research Report - report_20251108_153424
+
+
+
+ AI Research Report
+
+
+Executive Summary
+
+Key Insights
+1. AI application in property and casualty insurance significantly reduces the time taken to process a claim. (Confidence: 0.90)
+2. AI incorporation in property and casualty insurance could lead to more accurate risk assessments. (Confidence: 0.85)
+3. AI's impact on property and casualty insurance is positive but there are potential contradictions and limitations. (Confidence: 0.75)
+
+
+Overall Confidence
+Confidence Score: 0.50/1.00
+
+Primary Findings
+
Short summary focused on The Impact of AI in Property and Casualty Insurance.
+ Contradictions resolved: 0
+ Sources validated: 0
+
+Immediate Implications
+
If AI is removed, claim processing will take longer.
+ If AI is removed from the risk assessment process, accuracy should decrease.
+ Detailed investigations into contradiction A vs B could yield more nuanced understanding.
+ Further research into potential limitations of AI in this context could provide additional insights.
+ Increasing AI capabilities should further decrease claim processing time.
+
+
+Methodology
+
+Data Sources
+This analysis utilized 3 distinct data sources including:
+
Source Types: industry_report, research_paper, news_article
+Time Range: 2020-2025
+Geographic Scope: Global
+
+Analytical Approach
+The research employed a multi-agent AI system with the following specialized components:
+1. Contextual Retriever: Gathers and processes multi-source data
+2. Critical Analysis: Validates sources and identifies contradictions
+3. Insight Generation: Develops hypotheses and identifies trends
+4. Report Builder: Synthesizes findings into structured reports
+
+Validation Framework
+All findings underwent rigorous validation including:
+
Source credibility assessment
+ Contradiction resolution
+ Confidence scoring
+ Reasoning chain verification
+
+
+Key Findings
+
+Primary Results
+
Finding 1 about The Impact of AI in Property and Casualty Insurance
+ Finding 2 about The Impact of AI in Property and Casualty Insurance
+ Finding 3 about The Impact of AI in Property and Casualty Insurance
+
+Data Quality Assessment
+
Sources Validated: 2/2
+Contradictions Identified: 1
+Confidence Level: 0.8
+
+Notable Patterns
+
Notable pattern 1
+ Notable pattern 2
+
+
+Insights and Hypotheses
+
+Generated Insights
+
+#Insight 1: AI application in property and casualty insurance significantly reduces the time taken to process a claim.
+
+Confidence: 0.90
+
+Reasoning Chain:
+Finding 1 suggests that AI expedites claim processing.
+ Faster processing is usually a result of automated processes.
+ AI is known for its ability to automate tasks.
+
+Testable Implications:
+If AI is removed, claim processing will take longer.
+ Increasing AI capabilities should further decrease claim processing time.
+
+---
+
+#Insight 2: AI incorporation in property and casualty insurance could lead to more accurate risk assessments.
+
+Confidence: 0.85
+
+Reasoning Chain:
+Finding 2 indicates that AI increases the accuracy of risk assessments.
+ AI's ability to analyze large data sets and identify patterns can lead to more accurate assessments.
+ Insurance is fundamentally about assessing risk.
+
+Testable Implications:
+If AI is applied to risk assessments, the accuracy of the assessments should increase.
+ If AI is removed from the risk assessment process, accuracy should decrease.
+
+---
+
+#Insight 3: AI's impact on property and casualty insurance is positive but there are potential contradictions and limitations.
+
+Confidence: 0.75
+
+Reasoning Chain:
+Findings 1 and 2 suggest positive impacts of AI.
+ However, contradiction A vs B introduces some doubt about the overall impact.
+ Source validations indicate reliable information but do not eliminate the possibility of limitations or contradictions.
+
+Testable Implications:
+Detailed investigations into contradiction A vs B could yield more nuanced understanding.
+ Further research into potential limitations of AI in this context could provide additional insights.
+
+---
+
+
+Conclusions and Recommendations
+
+Key Conclusions
+
+1. AI application in property and casualty insurance significantly reduces the time taken to process a claim. (Confidence: 0.90)
+
+2. AI incorporation in property and casualty insurance could lead to more accurate risk assessments. (Confidence: 0.85)
+
+3. AI's impact on property and casualty insurance is positive but there are potential contradictions and limitations. (Confidence: 0.75)
+
+Recommendations
+
+#Immediate Actions
+1. Validate Top Hypotheses: Prioritize testing of high-confidence insights
+2. Address Data Gaps: Focus on areas with lower confidence scores
+3. Monitor Key Indicators: Establish tracking for critical variables
+
+#Strategic Considerations
+
Leverage identified patterns for predictive modeling
+ Address contradictions through targeted data collection
+ Scale successful analytical approaches to other domains
+
+Next Steps
+1. Conduct controlled experiments to test generated hypotheses
+2. Expand data sources to improve confidence scores
+3. Implement monitoring system for ongoing validation
+
+
\ No newline at end of file
diff --git a/reports/report_20251108_153424.markdown b/reports/report_20251108_153424.markdown
new file mode 100644
index 0000000..61752f5
--- /dev/null
+++ b/reports/report_20251108_153424.markdown
@@ -0,0 +1,153 @@
+# AI Research Report
+
+**Report ID**: report_20251108_153424
+**Generated**: 2025-11-08T15:34:24.141593
+**Type**: research_report
+
+
+# Executive Summary
+
+## Key Insights
+1. AI application in property and casualty insurance significantly reduces the time taken to process a claim. (Confidence: 0.90)
+2. AI incorporation in property and casualty insurance could lead to more accurate risk assessments. (Confidence: 0.85)
+3. AI's impact on property and casualty insurance is positive but there are potential contradictions and limitations. (Confidence: 0.75)
+
+
+## Overall Confidence
+**Confidence Score: 0.50/1.00**
+
+## Primary Findings
+- Short summary focused on The Impact of AI in Property and Casualty Insurance.
+- Contradictions resolved: 0
+- Sources validated: 0
+
+## Immediate Implications
+- If AI is removed, claim processing will take longer.
+- If AI is removed from the risk assessment process, accuracy should decrease.
+- Detailed investigations into contradiction A vs B could yield more nuanced understanding.
+- Further research into potential limitations of AI in this context could provide additional insights.
+- Increasing AI capabilities should further decrease claim processing time.
+
+
+
+# Methodology
+
+## Data Sources
+This analysis utilized 3 distinct data sources including:
+- **Source Types**: industry_report, research_paper, news_article
+- **Time Range**: 2020-2025
+- **Geographic Scope**: Global
+
+## Analytical Approach
+The research employed a multi-agent AI system with the following specialized components:
+1. **Contextual Retriever**: Gathers and processes multi-source data
+2. **Critical Analysis**: Validates sources and identifies contradictions
+3. **Insight Generation**: Develops hypotheses and identifies trends
+4. **Report Builder**: Synthesizes findings into structured reports
+
+## Validation Framework
+All findings underwent rigorous validation including:
+- Source credibility assessment
+- Contradiction resolution
+- Confidence scoring
+- Reasoning chain verification
+
+
+
+# Key Findings
+
+## Primary Results
+- Finding 1 about The Impact of AI in Property and Casualty Insurance
+- Finding 2 about The Impact of AI in Property and Casualty Insurance
+- Finding 3 about The Impact of AI in Property and Casualty Insurance
+
+## Data Quality Assessment
+- **Sources Validated**: 2/2
+- **Contradictions Identified**: 1
+- **Confidence Level**: 0.8
+
+## Notable Patterns
+- Notable pattern 1
+- Notable pattern 2
+
+
+
+# Insights and Hypotheses
+
+## Generated Insights
+
+### Insight 1: AI application in property and casualty insurance significantly reduces the time taken to process a claim.
+
+**Confidence**: 0.90
+
+**Reasoning Chain**:
+- Finding 1 suggests that AI expedites claim processing.
+- Faster processing is usually a result of automated processes.
+- AI is known for its ability to automate tasks.
+
+**Testable Implications**:
+- If AI is removed, claim processing will take longer.
+- Increasing AI capabilities should further decrease claim processing time.
+
+---
+
+### Insight 2: AI incorporation in property and casualty insurance could lead to more accurate risk assessments.
+
+**Confidence**: 0.85
+
+**Reasoning Chain**:
+- Finding 2 indicates that AI increases the accuracy of risk assessments.
+- AI's ability to analyze large data sets and identify patterns can lead to more accurate assessments.
+- Insurance is fundamentally about assessing risk.
+
+**Testable Implications**:
+- If AI is applied to risk assessments, the accuracy of the assessments should increase.
+- If AI is removed from the risk assessment process, accuracy should decrease.
+
+---
+
+### Insight 3: AI's impact on property and casualty insurance is positive but there are potential contradictions and limitations.
+
+**Confidence**: 0.75
+
+**Reasoning Chain**:
+- Findings 1 and 2 suggest positive impacts of AI.
+- However, contradiction A vs B introduces some doubt about the overall impact.
+- Source validations indicate reliable information but do not eliminate the possibility of limitations or contradictions.
+
+**Testable Implications**:
+- Detailed investigations into contradiction A vs B could yield more nuanced understanding.
+- Further research into potential limitations of AI in this context could provide additional insights.
+
+---
+
+
+
+# Conclusions and Recommendations
+
+## Key Conclusions
+
+1. AI application in property and casualty insurance significantly reduces the time taken to process a claim. (Confidence: 0.90)
+
+2. AI incorporation in property and casualty insurance could lead to more accurate risk assessments. (Confidence: 0.85)
+
+3. AI's impact on property and casualty insurance is positive but there are potential contradictions and limitations. (Confidence: 0.75)
+
+## Recommendations
+
+### Immediate Actions
+1. **Validate Top Hypotheses**: Prioritize testing of high-confidence insights
+2. **Address Data Gaps**: Focus on areas with lower confidence scores
+3. **Monitor Key Indicators**: Establish tracking for critical variables
+
+### Strategic Considerations
+- Leverage identified patterns for predictive modeling
+- Address contradictions through targeted data collection
+- Scale successful analytical approaches to other domains
+
+## Next Steps
+1. Conduct controlled experiments to test generated hypotheses
+2. Expand data sources to improve confidence scores
+3. Implement monitoring system for ongoing validation
+
+
diff --git a/reports/report_20251108_153424.pdf b/reports/report_20251108_153424.pdf
new file mode 100644
index 0000000..61752f5
Binary files /dev/null and b/reports/report_20251108_153424.pdf differ
diff --git a/reports/report_20251108_153424_metadata.json b/reports/report_20251108_153424_metadata.json
new file mode 100644
index 0000000..29fe44b
--- /dev/null
+++ b/reports/report_20251108_153424_metadata.json
@@ -0,0 +1,12 @@
+{
+ "report_id": "report_20251108_153424",
+ "generated_at": "2025-11-08T15:34:24.141593",
+ "report_type": "research_report",
+ "version": "1.0",
+ "agents_used": [
+ "contextual_retriever",
+ "critical_analysis",
+ "insight_generation",
+ "report_builder"
+ ]
+}
\ No newline at end of file
diff --git a/reports/report_20251108_153732.html b/reports/report_20251108_153732.html
new file mode 100644
index 0000000..bc42565
--- /dev/null
+++ b/reports/report_20251108_153732.html
@@ -0,0 +1,164 @@
+
+
+
+
+ AI Research Report - report_20251108_153732
+
+
+
+ AI Research Report
+
+
+Executive Summary
+
+Key Insights
+1. AI significantly reduces the time it takes to process insurance claims (Confidence: 0.90)
+2. AI increases the accuracy of insurance claim evaluations (Confidence: 0.80)
+3. AI can lead to a decrease in fraud-related losses in insurance claims (Confidence: 0.75)
+
+
+Overall Confidence
+Confidence Score: 0.50/1.00
+
+Primary Findings
+
Short summary focused on The Impact of AI in Insurance Claims.
+ Contradictions resolved: 0
+ Sources validated: 0
+
+Immediate Implications
+
If AI is removed from the process, the amount of fraud-related losses should increase
+ If an insurance company implements AI, the amount of fraud-related losses should decrease
+ If AI is removed from the process, the average time to process claims should increase
+ If an insurance company implements AI, the accuracy of claim evaluations should increase
+ If AI is removed from the process, the accuracy of claim evaluations should decrease
+
+
+Methodology
+
+Data Sources
+This analysis utilized 3 distinct data sources including:
+
Source Types: industry_report, research_paper, news_article
+Time Range: 2020-2025
+Geographic Scope: Global
+
+Analytical Approach
+The research employed a multi-agent AI system with the following specialized components:
+1. Contextual Retriever: Gathers and processes multi-source data
+2. Critical Analysis: Validates sources and identifies contradictions
+3. Insight Generation: Develops hypotheses and identifies trends
+4. Report Builder: Synthesizes findings into structured reports
+
+Validation Framework
+All findings underwent rigorous validation including:
+
Source credibility assessment
+ Contradiction resolution
+ Confidence scoring
+ Reasoning chain verification
+
+
+Key Findings
+
+Primary Results
+
Finding 1 about The Impact of AI in Insurance Claims
+ Finding 2 about The Impact of AI in Insurance Claims
+ Finding 3 about The Impact of AI in Insurance Claims
+
+Data Quality Assessment
+
Sources Validated: 2/2
+Contradictions Identified: 1
+Confidence Level: 0.8
+
+Notable Patterns
+
Notable pattern 1
+ Notable pattern 2
+
+
+Insights and Hypotheses
+
+Generated Insights
+
+#Insight 1: AI significantly reduces the time it takes to process insurance claims
+
+Confidence: 0.90
+
+Reasoning Chain:
+AI algorithms can process data much faster than humans
+ Faster processing of data leads to quicker resolution of claims
+ Therefore, AI significantly reduces the time it takes to process insurance claims
+
+Testable Implications:
+If an insurance company implements AI, the average time to process claims should decrease
+ If AI is removed from the process, the average time to process claims should increase
+
+---
+
+#Insight 2: AI increases the accuracy of insurance claim evaluations
+
+Confidence: 0.80
+
+Reasoning Chain:
+AI algorithms are less prone to errors than humans
+ Less errors lead to more accurate evaluations
+ Therefore, AI increases the accuracy of insurance claim evaluations
+
+Testable Implications:
+If an insurance company implements AI, the accuracy of claim evaluations should increase
+ If AI is removed from the process, the accuracy of claim evaluations should decrease
+
+---
+
+#Insight 3: AI can lead to a decrease in fraud-related losses in insurance claims
+
+Confidence: 0.75
+
+Reasoning Chain:
+AI algorithms can detect patterns and anomalies more effectively than humans
+ Better detection of anomalies can lead to a decrease in fraud
+ Therefore, AI can lead to a decrease in fraud-related losses in insurance claims
+
+Testable Implications:
+If an insurance company implements AI, the amount of fraud-related losses should decrease
+ If AI is removed from the process, the amount of fraud-related losses should increase
+
+---
+
+
+Conclusions and Recommendations
+
+Key Conclusions
+
+1. AI significantly reduces the time it takes to process insurance claims (Confidence: 0.90)
+
+2. AI increases the accuracy of insurance claim evaluations (Confidence: 0.80)
+
+3. AI can lead to a decrease in fraud-related losses in insurance claims (Confidence: 0.75)
+
+Recommendations
+
+#Immediate Actions
+1. Validate Top Hypotheses: Prioritize testing of high-confidence insights
+2. Address Data Gaps: Focus on areas with lower confidence scores
+3. Monitor Key Indicators: Establish tracking for critical variables
+
+#Strategic Considerations
+
Leverage identified patterns for predictive modeling
+ Address contradictions through targeted data collection
+ Scale successful analytical approaches to other domains
+
+Next Steps
+1. Conduct controlled experiments to test generated hypotheses
+2. Expand data sources to improve confidence scores
+3. Implement monitoring system for ongoing validation
+
+
\ No newline at end of file
diff --git a/reports/report_20251108_153732.markdown b/reports/report_20251108_153732.markdown
new file mode 100644
index 0000000..7e75bbd
--- /dev/null
+++ b/reports/report_20251108_153732.markdown
@@ -0,0 +1,153 @@
+# AI Research Report
+
+**Report ID**: report_20251108_153732
+**Generated**: 2025-11-08T15:37:32.179206
+**Type**: research_report
+
+
+# Executive Summary
+
+## Key Insights
+1. AI significantly reduces the time it takes to process insurance claims (Confidence: 0.90)
+2. AI increases the accuracy of insurance claim evaluations (Confidence: 0.80)
+3. AI can lead to a decrease in fraud-related losses in insurance claims (Confidence: 0.75)
+
+
+## Overall Confidence
+**Confidence Score: 0.50/1.00**
+
+## Primary Findings
+- Short summary focused on The Impact of AI in Insurance Claims.
+- Contradictions resolved: 0
+- Sources validated: 0
+
+## Immediate Implications
+- If AI is removed from the process, the amount of fraud-related losses should increase
+- If an insurance company implements AI, the amount of fraud-related losses should decrease
+- If AI is removed from the process, the average time to process claims should increase
+- If an insurance company implements AI, the accuracy of claim evaluations should increase
+- If AI is removed from the process, the accuracy of claim evaluations should decrease
+
+
+
+# Methodology
+
+## Data Sources
+This analysis utilized 3 distinct data sources including:
+- **Source Types**: industry_report, research_paper, news_article
+- **Time Range**: 2020-2025
+- **Geographic Scope**: Global
+
+## Analytical Approach
+The research employed a multi-agent AI system with the following specialized components:
+1. **Contextual Retriever**: Gathers and processes multi-source data
+2. **Critical Analysis**: Validates sources and identifies contradictions
+3. **Insight Generation**: Develops hypotheses and identifies trends
+4. **Report Builder**: Synthesizes findings into structured reports
+
+## Validation Framework
+All findings underwent rigorous validation including:
+- Source credibility assessment
+- Contradiction resolution
+- Confidence scoring
+- Reasoning chain verification
+
+
+
+# Key Findings
+
+## Primary Results
+- Finding 1 about The Impact of AI in Insurance Claims
+- Finding 2 about The Impact of AI in Insurance Claims
+- Finding 3 about The Impact of AI in Insurance Claims
+
+## Data Quality Assessment
+- **Sources Validated**: 2/2
+- **Contradictions Identified**: 1
+- **Confidence Level**: 0.8
+
+## Notable Patterns
+- Notable pattern 1
+- Notable pattern 2
+
+
+
+# Insights and Hypotheses
+
+## Generated Insights
+
+### Insight 1: AI significantly reduces the time it takes to process insurance claims
+
+**Confidence**: 0.90
+
+**Reasoning Chain**:
+- AI algorithms can process data much faster than humans
+- Faster processing of data leads to quicker resolution of claims
+- Therefore, AI significantly reduces the time it takes to process insurance claims
+
+**Testable Implications**:
+- If an insurance company implements AI, the average time to process claims should decrease
+- If AI is removed from the process, the average time to process claims should increase
+
+---
+
+### Insight 2: AI increases the accuracy of insurance claim evaluations
+
+**Confidence**: 0.80
+
+**Reasoning Chain**:
+- AI algorithms are less prone to errors than humans
+- Less errors lead to more accurate evaluations
+- Therefore, AI increases the accuracy of insurance claim evaluations
+
+**Testable Implications**:
+- If an insurance company implements AI, the accuracy of claim evaluations should increase
+- If AI is removed from the process, the accuracy of claim evaluations should decrease
+
+---
+
+### Insight 3: AI can lead to a decrease in fraud-related losses in insurance claims
+
+**Confidence**: 0.75
+
+**Reasoning Chain**:
+- AI algorithms can detect patterns and anomalies more effectively than humans
+- Better detection of anomalies can lead to a decrease in fraud
+- Therefore, AI can lead to a decrease in fraud-related losses in insurance claims
+
+**Testable Implications**:
+- If an insurance company implements AI, the amount of fraud-related losses should decrease
+- If AI is removed from the process, the amount of fraud-related losses should increase
+
+---
+
+
+
+# Conclusions and Recommendations
+
+## Key Conclusions
+
+1. AI significantly reduces the time it takes to process insurance claims (Confidence: 0.90)
+
+2. AI increases the accuracy of insurance claim evaluations (Confidence: 0.80)
+
+3. AI can lead to a decrease in fraud-related losses in insurance claims (Confidence: 0.75)
+
+## Recommendations
+
+### Immediate Actions
+1. **Validate Top Hypotheses**: Prioritize testing of high-confidence insights
+2. **Address Data Gaps**: Focus on areas with lower confidence scores
+3. **Monitor Key Indicators**: Establish tracking for critical variables
+
+### Strategic Considerations
+- Leverage identified patterns for predictive modeling
+- Address contradictions through targeted data collection
+- Scale successful analytical approaches to other domains
+
+## Next Steps
+1. Conduct controlled experiments to test generated hypotheses
+2. Expand data sources to improve confidence scores
+3. Implement monitoring system for ongoing validation
+
+
diff --git a/reports/report_20251108_153732.pdf b/reports/report_20251108_153732.pdf
new file mode 100644
index 0000000..7e75bbd
Binary files /dev/null and b/reports/report_20251108_153732.pdf differ
diff --git a/reports/report_20251108_153732_metadata.json b/reports/report_20251108_153732_metadata.json
new file mode 100644
index 0000000..f8d3c0c
--- /dev/null
+++ b/reports/report_20251108_153732_metadata.json
@@ -0,0 +1,12 @@
+{
+ "report_id": "report_20251108_153732",
+ "generated_at": "2025-11-08T15:37:32.179206",
+ "report_type": "research_report",
+ "version": "1.0",
+ "agents_used": [
+ "contextual_retriever",
+ "critical_analysis",
+ "insight_generation",
+ "report_builder"
+ ]
+}
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..3322e77
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,71 @@
+# requirements.txt
+openai>=1.0.0
+anthropic>=0.19.0
+python-dotenv>=1.0.0
+Jinja2>=3.1.2
+markdown>=3.4.4
+weasyprint>=60.0
+requests>=2.31.0
+annotated-types==0.7.0
+anyio==4.11.0
+asttokens==3.0.0
+certifi==2025.10.5
+cloudpickle==3.1.2
+colorama==0.4.6
+comm==0.2.3
+contourpy==1.3.3
+cycler==0.12.1
+debugpy==1.8.17
+decorator==5.2.1
+distro==1.9.0
+executing==2.2.1
+fonttools==4.60.1
+h11==0.16.0
+httpcore==1.0.9
+httpx==0.28.1
+idna==3.11
+ipykernel==6.31.0
+ipython==9.7.0
+ipython_pygments_lexers==1.1.1
+jedi==0.19.2
+jiter==0.11.1
+jupyter_client==8.6.3
+jupyter_core==5.9.1
+kiwisolver==1.4.9
+matplotlib==3.10.7
+matplotlib-inline==0.2.1
+nest-asyncio==1.6.0
+numpy==2.3.4
+openai==2.7.1
+packaging==25.0
+pandas==2.3.3
+parso==0.8.5
+pillow==12.0.0
+platformdirs==4.5.0
+prompt_toolkit==3.0.52
+psutil==7.1.3
+pure_eval==0.2.3
+pydantic==2.12.4
+pydantic_core==2.41.5
+Pygments==2.19.2
+pyparsing==3.2.5
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+pytz==2025.2
+pyzmq==27.1.0
+six==1.17.0
+sniffio==1.3.1
+spyder-kernels==3.1.1
+stack-data==0.6.3
+tornado==6.5.2
+tqdm==4.67.1
+traitlets==5.14.3
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+tzdata==2025.2
+wcwidth==0.2.14
+openai>=1.0.0
+python-dotenv>=1.0.0
+pydantic>=2.0.0
+requests>=2.31.0
+pathlib2>=2.3.0; python_version < "3.4"
diff --git a/research_input_example.json b/research_input_example.json
new file mode 100644
index 0000000..52cedb6
--- /dev/null
+++ b/research_input_example.json
@@ -0,0 +1,56 @@
+{
+ "contextual_data": {
+ "sources": [
+ {
+ "type": "research_paper",
+ "title": "Example: The Impact of AI in Insurance Claims",
+ "year": 2025,
+ "authors": [
+ "Doe, J.",
+ "Rao, A."
+ ],
+ "journal": "Example Journal"
+ },
+ {
+ "type": "news_article",
+ "title": "Example: Latest developments in The Impact of AI in Insurance Claims",
+ "year": 2025,
+ "publication": "Example News"
+ },
+ {
+ "type": "industry_report",
+ "title": "Example: Q3 2025 market analysis for The Impact of AI in Insurance Claims",
+ "year": 2025,
+ "organization": "Example Research Group"
+ }
+ ],
+ "time_range": "2020-2025",
+ "geographic_scope": "Global",
+ "domain": "The Impact of AI in Insurance Claims",
+ "key_topics": [
+ "",
+ "",
+ ""
+ ]
+ },
+ "analysis_results": {
+ "key_findings": [
+ "Finding 1 about The Impact of AI in Insurance Claims",
+ "Finding 2 about The Impact of AI in Insurance Claims",
+ "Finding 3 about The Impact of AI in Insurance Claims"
+ ],
+ "contradictions": [
+ "Example contradiction A vs B about The Impact of AI in Insurance Claims"
+ ],
+ "source_validations": {
+ "source_1": true,
+ "source_2": true
+ },
+ "confidence_score": 0.8,
+ "summary": "Short summary focused on The Impact of AI in Insurance Claims.",
+ "patterns": [
+ "Notable pattern 1",
+ "Notable pattern 2"
+ ]
+ }
+}
\ No newline at end of file
diff --git a/sangu_santosh/Day7/A1.py b/sangu_santosh/Day7/A1.py
deleted file mode 100644
index 19aeec7..0000000
--- a/sangu_santosh/Day7/A1.py
+++ /dev/null
@@ -1,185 +0,0 @@
-import os
-from pathlib import Path
-from typing import List
-from dotenv import load_dotenv
-from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, Settings
-from llama_index.vector_stores.lancedb import LanceDBVectorStore
-from llama_index.embeddings.huggingface import HuggingFaceEmbedding
-
-# Load environment variables
-load_dotenv()
-
-def setup_llamaindex_settings():
- """
- Configure LlamaIndex with local embeddings and OpenRouter for LLM.
- This assignment focuses on vector database operations, so we'll use local embeddings only.
- """
- # Check for OpenRouter API key (for future use, not needed for this basic assignment)
- api_key = os.getenv("OPENROUTER_API_KEY")
- if not api_key:
- print("โน๏ธ OPENROUTER_API_KEY not found - that's OK for this assignment!")
- print(" This assignment only uses local embeddings for vector operations.")
-
- # Configure local embeddings (no API key required)
- Settings.embed_model = HuggingFaceEmbedding(
- model_name="BAAI/bge-small-en-v1.5",
- trust_remote_code=True
- )
-
- print("โ
LlamaIndex configured with local embeddings")
- print(" Using BAAI/bge-small-en-v1.5 for document embeddings")
-
-def load_documents_from_folder(folder_path: str):
- """
- Load documents from a folder using SimpleDirectoryReader.
-
- Args:
- folder_path (str): Path to the folder containing documents
-
- Returns:
- List of documents loaded from the folder
- """
- # Create SimpleDirectoryReader instance with recursive loading
- reader = SimpleDirectoryReader(
- input_dir=folder_path,
- recursive=True
- )
-
- # Load and return documents
- documents = reader.load_data()
- return documents
-
-def create_vector_store(db_path: str = "./vectordb", table_name: str = "documents"):
- """
- Create a LanceDB vector store for storing document embeddings.
-
- Args:
- db_path (str): Path where the vector database will be stored
- table_name (str): Name of the table in the vector database
-
- Returns:
- LanceDBVectorStore: Configured vector store
- """
- # Create the directory if it doesn't exist
- Path(db_path).mkdir(parents=True, exist_ok=True)
-
- # Create and return vector store
- vector_store = LanceDBVectorStore(uri=str(db_path), table_name=table_name)
- return vector_store
-
-def create_vector_index(documents: List, vector_store):
- """
- Create a vector index from documents using the provided vector store.
-
- Args:
- documents: List of documents to index
- vector_store: LanceDB vector store to use for storage
-
- Returns:
- VectorStoreIndex: The created vector index
- """
- # Create storage context with vector store
- storage_context = StorageContext.from_defaults(vector_store=vector_store)
-
- # Create and return index from documents
- index = VectorStoreIndex.from_documents(
- documents=documents,
- storage_context=storage_context
- )
- return index
-
-def search_documents(index, query: str, top_k: int = 3):
- """
- Search for relevant documents using the vector index.
-
- Args:
- index: Vector index to search
- query (str): Search query
- top_k (int): Number of top results to return
-
- Returns:
- List of retrieved document nodes
- """
- # Create retriever from index
- retriever = index.as_retriever(similarity_top_k=top_k)
-
- # Retrieve and return documents for the query
- results = retriever.retrieve(query)
- return results
-
-def test_pipeline():
- """
- Test the complete vector database pipeline
- """
- print("๐ Testing Complete Vector Database Pipeline")
- print("=" * 50)
-
- # Define paths
- data_folder = "data" # Data folder in Day-7/codes
-
- # Get database path from environment
- vector_db_path = os.getenv("A1_DB_PATH", "AssignmentsDB/Assignment1")
- Path(vector_db_path).mkdir(parents=True, exist_ok=True)
-
- # Step 1: Load documents
- print("\n๐ Step 1: Loading documents...")
- documents = load_documents_from_folder(data_folder)
- print(f" Loaded {len(documents)} documents")
-
- # Step 2: Create vector store
- print("\n๐๏ธ Step 2: Creating vector store...")
- vector_store = create_vector_store(vector_db_path)
- print(" Vector store status:", "โ
Created" if vector_store else "โ Failed")
-
- # Step 3: Create vector index
- print("\n๐ Step 3: Creating vector index...")
- if documents and vector_store:
- index = create_vector_index(documents, vector_store)
- print(" Index status:", "โ
Created" if index else "โ Failed")
- else:
- index = None
- print(" โ Cannot create index - missing documents or vector store")
-
- # Step 4: Test multiple search queries
- print("\n๐ Step 4: Testing search functionality...")
- if index:
- search_queries = [
- "What are AI agents?",
- "How to evaluate agent performance?",
- "Italian recipes and cooking",
- "Financial analysis and investment"
- ]
-
- for query in search_queries:
- print(f"\n ๐ Query: '{query}'")
- results = search_documents(index, query, top_k=2)
-
- if results:
- for i, result in enumerate(results, 1):
- text_preview = result.text[:100] if hasattr(result, 'text') else "No text available"
- score = f" (Score: {result.score:.4f})" if hasattr(result, 'score') else ""
- print(f" {i}. {text_preview}...{score}")
- else:
- print(" No results found")
- else:
- print(" โ Cannot test search - index not created")
-
- print("\n" + "=" * 50)
- print("๐ฏ Assignment Status:")
- print(f" Documents loaded: {'โ
' if documents else 'โ'}")
- print(f" Vector store created: {'โ
' if vector_store else 'โ'}")
- print(f" Index created: {'โ
' if index else 'โ'}")
- print(f" Search working: {'โ
' if index else 'โ'}")
-
- if documents and vector_store and index:
- print("\n๐ Congratulations! You've successfully completed the assignment!")
- print(" You've built a complete vector database with search functionality!")
- else:
- print("\n๐ Please complete the TODO functions above to finish the assignment.")
-
-if __name__ == "__main__":
- # Initialize LlamaIndex settings
- setup_llamaindex_settings()
-
- # Run the test pipeline
- test_pipeline()
\ No newline at end of file
diff --git a/sangu_santosh/Day7/A2.py b/sangu_santosh/Day7/A2.py
deleted file mode 100644
index d2a6d42..0000000
--- a/sangu_santosh/Day7/A2.py
+++ /dev/null
@@ -1,269 +0,0 @@
-import os
-from pathlib import Path
-from typing import Dict, List, Optional, Any
-from pydantic import BaseModel, Field
-from dotenv import load_dotenv
-
-# Load environment variables
-load_dotenv()
-
-# Core LlamaIndex components
-from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, Settings
-from llama_index.core.query_engine import RetrieverQueryEngine
-from llama_index.core.retrievers import VectorIndexRetriever
-
-# Vector store
-from llama_index.vector_stores.lancedb import LanceDBVectorStore
-
-# Embeddings and LLM
-from llama_index.embeddings.huggingface import HuggingFaceEmbedding
-from llama_index.llms.openrouter import OpenRouter
-
-# Advanced RAG components
-from llama_index.core.postprocessor import SimilarityPostprocessor
-from llama_index.core.response_synthesizers import TreeSummarize, Refine, CompactAndRefine
-from llama_index.core.output_parsers import PydanticOutputParser
-from llama_index.core.program import LLMTextCompletionProgram
-
-def setup_advanced_rag_settings():
- """
- Configure LlamaIndex with optimized settings for advanced RAG.
- Using only local embeddings for testing purposes.
- """
- print("โ ๏ธ Running in test mode - using only vector similarity search")
- print(" LLM operations are disabled for testing")
-
- # Configure local embeddings (no API key required)
- Settings.embed_model = HuggingFaceEmbedding(
- model_name="BAAI/bge-small-en-v1.5",
- trust_remote_code=True
- )
-
- # Advanced RAG configuration
- Settings.chunk_size = int(os.getenv("CHUNK_SIZE", "512"))
- Settings.chunk_overlap = int(os.getenv("CHUNK_OVERLAP", "50"))
-
- print("โ
Advanced RAG settings configured")
- print(" - Chunk size: 512 (optimized for precision)")
- print(" - Using local embeddings for cost efficiency")
- print(" - OpenRouter LLM ready for response synthesis")
-
-def setup_basic_index(data_folder: str = None, force_rebuild: bool = False):
- """
- Create a basic vector index that we'll enhance with advanced techniques.
- This reuses the concepts from Assignment 1.
- """
- # Use environment variables for paths
- data_folder = data_folder or "data"
-
- # Get database path from environment
- vector_db_path = os.getenv("A2_DB_PATH", "AssignmentsDB/Assignment2")
- Path(vector_db_path).mkdir(parents=True, exist_ok=True)
-
- # Create vector store
- vector_store = LanceDBVectorStore(
- uri=vector_db_path,
- table_name=os.getenv("VECTOR_DB_TABLE_NAME", "documents")
- )
-
- # Load documents
- if not Path(data_folder).exists():
- print(f"โ Data folder not found: {data_folder}")
- return None
-
- reader = SimpleDirectoryReader(input_dir=data_folder, recursive=True)
- documents = reader.load_data()
-
- # Create storage context and index
- storage_context = StorageContext.from_defaults(vector_store=vector_store)
- index = VectorStoreIndex.from_documents(
- documents,
- storage_context=storage_context,
- show_progress=True
- )
-
- print(f"โ
Basic index created with {len(documents)} documents")
- print(" Ready for advanced RAG techniques!")
- return index
-
-def create_query_engine_with_similarity_filter(
- index,
- similarity_cutoff: float = None,
- top_k: int = None
-):
- """
- Create a query engine that filters results based on similarity scores.
-
- Args:
- index: Vector index to query
- similarity_cutoff: Minimum similarity score (0.0 to 1.0)
- top_k: Number of initial results to retrieve before filtering
-
- Returns:
- Query engine with similarity filtering
- """
- # Get values from environment variables if not provided
- similarity_cutoff = similarity_cutoff or float(os.getenv("DEFAULT_SIMILARITY_CUTOFF", "0.3"))
- top_k = top_k or int(os.getenv("DEFAULT_TOP_K", "10"))
-
- # Create similarity postprocessor with the cutoff threshold
- similarity_processor = SimilarityPostprocessor(similarity_cutoff=similarity_cutoff)
-
- # Create query engine with similarity filtering
- query_engine = index.as_query_engine(
- node_postprocessors=[similarity_processor],
- similarity_top_k=top_k
- )
-
- return query_engine
-
-def create_query_engine_with_tree_summarize(index, top_k: int = 5):
- """
- Create a query engine that uses TreeSummarize for comprehensive responses.
-
- Args:
- index: Vector index to query
- top_k: Number of results to retrieve
-
- Returns:
- Query engine with TreeSummarize synthesis
- """
- # Create TreeSummarize response synthesizer
- tree_synthesizer = TreeSummarize(verbose=True)
-
- # Create query engine with the synthesizer
- query_engine = index.as_query_engine(
- response_synthesizer=tree_synthesizer,
- similarity_top_k=top_k
- )
-
- return query_engine
-
-class ResearchPaperInfo(BaseModel):
- """Structured information about a research paper or AI concept."""
- title: str = Field(description="The main title or concept name")
- key_points: List[str] = Field(description="3-5 main points or findings")
- applications: List[str] = Field(description="Practical applications or use cases")
- summary: str = Field(description="Brief 2-3 sentence summary")
-
-def create_structured_output_program(output_model: BaseModel = ResearchPaperInfo):
- """
- Create a structured output program using Pydantic models.
-
- Args:
- output_model: Pydantic model class for structured output
-
- Returns:
- LLMTextCompletionProgram that returns structured data
- """
- # Create output parser with the Pydantic model
- output_parser = PydanticOutputParser(output_cls=output_model)
-
- # Create the structured output program
- program = LLMTextCompletionProgram.from_defaults(
- output_parser=output_parser,
- prompt_template_str="""
- Based on the following context and query, extract structured information according to the specified format.
-
- Context: {context}
- Query: {query}
-
- Provide a response in the following format:
- {format_instructions}
- """
- )
-
- return program
-
-def create_advanced_rag_pipeline(index, similarity_cutoff: float = 0.3, top_k: int = 10):
- """
- Create a comprehensive advanced RAG pipeline combining multiple techniques.
-
- Args:
- index: Vector index to query
- similarity_cutoff: Minimum similarity score for filtering
- top_k: Number of initial results to retrieve
-
- Returns:
- Advanced query engine with filtering and synthesis combined
- """
- # Create similarity postprocessor
- similarity_processor = SimilarityPostprocessor(similarity_cutoff=similarity_cutoff)
-
- # Create TreeSummarize for comprehensive responses
- tree_synthesizer = TreeSummarize(verbose=True)
-
- # Create the comprehensive query engine combining both techniques
- advanced_engine = index.as_query_engine(
- node_postprocessors=[similarity_processor],
- response_synthesizer=tree_synthesizer,
- similarity_top_k=top_k
- )
-
- return advanced_engine
-
-def test_pipeline(index):
- """Test vector similarity search functionality"""
- print("๐ Vector Search Test - Similarity Based Retrieval")
- print("=" * 60)
-
- # Test queries for vector search
- test_queries = [
- "What are the key capabilities of AI agents?",
- "How do you evaluate agent performance metrics?",
- "Italian recipes and cooking techniques"
- ]
-
- # Create similarity-based retriever
- retriever = VectorIndexRetriever(
- index=index,
- similarity_top_k=3 # Get top 3 most similar documents
- )
-
- print("\n๐ Testing Vector Similarity Search")
- print("=" * 60)
-
- for i, query in enumerate(test_queries, 1):
- print(f"\n๏ฟฝ Query {i}: '{query}'")
- print("-" * 50)
-
- # Retrieve similar documents
- nodes = retriever.retrieve(query)
-
- # Display results
- print(f"Found {len(nodes)} relevant documents:\n")
- for idx, node in enumerate(nodes, 1):
- print(f"๏ฟฝ Result {idx} (Similarity: {node.score:.4f})")
- # Show a preview of the text
- preview = node.text[:200] + "..." if len(node.text) > 200 else node.text
- print(f" {preview}\n")
-
- # Final status
- print("\n" + "=" * 60)
- print("๐ฏ Test Status:")
- print(" โ
Vector Store Created")
- print(" โ
Documents Indexed")
- print(" โ
Similarity Search Working")
-
- print("\n๐ Vector similarity search is working correctly!")
- print(" You can now:")
- print(" 1. Search through documents semantically")
- print(" 2. Get relevance scores for matches")
- print(" 3. Retrieve similar content across different file types")
-
-def main():
- # Initialize settings
- setup_advanced_rag_settings()
-
- # Create and test the index
- print("๐ Setting up basic index for advanced RAG...")
- index = setup_basic_index()
-
- if index:
- print("๐ Ready to implement advanced RAG techniques!")
- test_pipeline(index)
- else:
- print("โ Failed to create index - check data folder path")
-
-if __name__ == "__main__":
- main()
\ No newline at end of file
diff --git a/sangu_santosh/Day7/A3a.py b/sangu_santosh/Day7/A3a.py
deleted file mode 100644
index 75f9b69..0000000
--- a/sangu_santosh/Day7/A3a.py
+++ /dev/null
@@ -1,151 +0,0 @@
-from dotenv import load_dotenv
-import os
-import gradio as gr
-from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
-from llama_index.llms.openrouter import OpenRouter
-from llama_index.vector_stores.lancedb import LanceDBVectorStore
-import lancedb
-from llama_index.embeddings.huggingface import HuggingFaceEmbedding
-import logging
-
-# Configure logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-
-# Load environment variables
-load_dotenv()
-
-# If the user has an OpenRouter API key, configure OpenRouter as the LLM
-openrouter_api_key = os.getenv("OPENROUTER_API_KEY")
-if openrouter_api_key:
- try:
- Settings.llm = OpenRouter(
- api_key=openrouter_api_key,
- model=os.getenv("OPENROUTER_MODEL", "gpt-4o"),
- temperature=float(os.getenv("OPENROUTER_TEMPERATURE", "0.1")),
- )
- logging.info("โ
OpenRouter configured for LLM responses")
- except Exception as _e:
- logging.warning(f"โ ๏ธ Failed to configure OpenRouter LLM: {_e}. Falling back to vector-only mode.")
-
-# Configure paths and settings
-ASSIGNMENTS_DB_ROOT = os.getenv('ASSIGNMENTS_DB_ROOT', 'assignment_vectordb')
-A3_DB_PATH = os.getenv('A3_DB_PATH', os.path.join(ASSIGNMENTS_DB_ROOT, 'documents.lance'))
-
-def create_vector_index():
- logging.info("๐ Setting up basic index for Gradio RAG...")
-
- try:
- # Initialize the local HuggingFace embeddings
- embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
-
- # Configure global settings
- Settings.embed_model = embed_model
- Settings.chunk_size = 512
-
- # Set up LanceDB
- db = lancedb.connect(ASSIGNMENTS_DB_ROOT)
-
- if "documents" not in db.table_names():
- logging.info("Creating new vector store...")
- # Load documents
- documents = SimpleDirectoryReader(
- input_dir="data",
- recursive=True,
- exclude_hidden=True,
- filename_as_id=True
- ).load_data()
-
- # Create vector store
- vector_store = LanceDBVectorStore(
- uri=ASSIGNMENTS_DB_ROOT,
- table_name="documents",
- create_table_if_not_exists=True
- )
-
- # Create index
- index = VectorStoreIndex.from_documents(
- documents,
- vector_store=vector_store
- )
- logging.info(f"โ
Created new index with {len(documents)} documents")
- else:
- logging.info("Loading existing vector store...")
- vector_store = LanceDBVectorStore(
- uri=ASSIGNMENTS_DB_ROOT,
- table_name="documents"
- )
- index = VectorStoreIndex.from_vector_store(
- vector_store
- )
- logging.info("โ
Loaded existing vector store")
-
- return index
- except Exception as e:
- logging.error(f"Error setting up vector store: {str(e)}")
- raise
-
-def query_index(index, query_text):
- try:
- logging.info(f"๐ Processing query: '{query_text}'")
- query_engine = index.as_query_engine(
- similarity_top_k=1, # Get the most relevant result
- response_mode="no_text" # Return only source nodes
- )
- response = query_engine.query(query_text)
-
- # Extract relevant text from the response
- if response and response.source_nodes:
- return response.source_nodes[0].node.text
- else:
- return "No relevant information found."
-
- except Exception as e:
- logging.error(f"Error processing query: {str(e)}")
- return f"An error occurred while processing your question: {str(e)}"
-
-def create_gradio_interface(index):
- def process_query(query):
- if not query or query.strip() == "":
- return "Please enter a question."
- return query_index(index, query.strip())
-
- # Create Gradio interface
- iface = gr.Interface(
- fn=process_query,
- inputs=gr.Textbox(
- lines=2,
- placeholder="Ask a question about the documents in our knowledge base...",
- label="Question"
- ),
- outputs=gr.Textbox(
- label="Answer",
- lines=10
- ),
- title="๐ Basic RAG Q&A System",
- description="Ask questions about the documents in our knowledge base.",
- examples=[
- "What are the key capabilities of AI agents?",
- "How do you evaluate agent performance metrics?",
- "Tell me about Italian recipes and cooking techniques"
- ],
- allow_flagging="never"
- )
- return iface
-
-def main():
- try:
- logging.info("๐ Setting up Basic Gradio RAG System...")
-
- # Create or load the vector index
- index = create_vector_index()
-
- # Create and launch Gradio interface
- interface = create_gradio_interface(index)
- interface.launch(share=False) # Set share=True if you want a public URL
-
- except Exception as e:
- logging.error(f"Failed to start Gradio interface: {str(e)}")
- raise
-
-if __name__ == "__main__":
- main()
\ No newline at end of file
diff --git a/sangu_santosh/Day7/A3b.py b/sangu_santosh/Day7/A3b.py
deleted file mode 100644
index 155a34f..0000000
--- a/sangu_santosh/Day7/A3b.py
+++ /dev/null
@@ -1,208 +0,0 @@
-from dotenv import load_dotenv
-import os
-import logging
-import gradio as gr
-
-from llama_index.core import (
- SimpleDirectoryReader,
- VectorStoreIndex,
- StorageContext,
- Settings,
-)
-from llama_index.vector_stores.lancedb import LanceDBVectorStore
-from llama_index.embeddings.huggingface import HuggingFaceEmbedding
-from llama_index.llms.openrouter import OpenRouter
-from llama_index.core.postprocessor import SimilarityPostprocessor
-from llama_index.core.response_synthesizers import TreeSummarize, Refine, CompactAndRefine
-
-import lancedb
-
-# Configure logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-
-# Load environment variables
-load_dotenv()
-
-# Configure paths and defaults
-ASSIGNMENTS_DB_ROOT = os.getenv('ASSIGNMENTS_DB_ROOT', 'assignment_vectordb')
-DB_TABLE = os.getenv('A3B_VECTOR_TABLE', 'documents')
-DEFAULT_EMBED_MODEL = os.getenv('EMBEDDING_MODEL', 'BAAI/bge-small-en-v1.5')
-
-
-def configure_settings(chunk_size: int, chunk_overlap: int, embed_model_name: str, openrouter_model: str | None, openrouter_temp: float | None, openrouter_key: str | None):
- """Apply Settings for LlamaIndex based on UI inputs."""
- # Embeddings
- Settings.embed_model = HuggingFaceEmbedding(model_name=embed_model_name, trust_remote_code=True)
- Settings.chunk_size = int(chunk_size)
- Settings.chunk_overlap = int(chunk_overlap)
-
- # Configure OpenRouter if provided
- if openrouter_key and openrouter_model:
- try:
- Settings.llm = OpenRouter(api_key=openrouter_key, model=openrouter_model, temperature=openrouter_temp or 0.1)
- logging.info("โ
OpenRouter configured for LLM responses")
- except Exception as e:
- logging.warning(f"โ ๏ธ Failed to configure OpenRouter LLM: {e}. Falling back to vector-only mode.")
- Settings.llm = None
- else:
- Settings.llm = None
-
-
-def create_or_rebuild_index(data_dir: str = 'data', force_rebuild: bool = False):
- """Create or load the LanceDB-backed VectorStoreIndex."""
- logging.info("๐ Initializing vector store...")
- db = lancedb.connect(ASSIGNMENTS_DB_ROOT)
-
- vector_store = LanceDBVectorStore(uri=ASSIGNMENTS_DB_ROOT, table_name=DB_TABLE, create_table_if_not_exists=True)
-
- # If force_rebuild is requested, or table is empty, build index from data
- try:
- need_build = force_rebuild or (DB_TABLE not in db.table_names())
- except Exception:
- need_build = True
-
- if need_build:
- if not os.path.isdir(data_dir):
- raise FileNotFoundError(f"Data directory not found: {data_dir}")
- reader = SimpleDirectoryReader(
- input_dir=data_dir,
- recursive=True,
- filename_as_id=True,
- required_exts=[".txt", ".md", ".csv", ".html", ".pdf", ".json"] # Skip audio for now
- )
- documents = reader.load_data()
-
- index = VectorStoreIndex.from_documents(documents, vector_store=vector_store)
- logging.info(f"โ
Created new index with {len(documents)} documents")
- else:
- index = VectorStoreIndex.from_vector_store(vector_store)
- logging.info("โ
Loaded existing vector store")
-
- return index
-
-
-def build_query_engine(index, top_k: int, use_llm: bool, synthesizer: str, similarity_cutoff: float | None):
- """Build a query engine with optional synthesizer and postprocessor."""
- node_postprocessors = []
- if similarity_cutoff is not None and similarity_cutoff > 0.0:
- node_postprocessors.append(SimilarityPostprocessor(similarity_cutoff=similarity_cutoff))
-
- response_synthesizer = None
- response_mode = 'no_text' # Default to vector-only mode
-
- if use_llm and getattr(Settings, 'llm', None) is not None:
- response_mode = 'compact' # Use LLM synthesis mode
- if synthesizer == 'TreeSummarize':
- response_synthesizer = TreeSummarize(verbose=False)
- elif synthesizer == 'Refine':
- response_synthesizer = Refine()
- elif synthesizer == 'CompactAndRefine':
- response_synthesizer = CompactAndRefine()
- else:
- response_synthesizer = TreeSummarize(verbose=False)
-
- qe = index.as_query_engine(
- node_postprocessors=node_postprocessors or None,
- response_synthesizer=response_synthesizer,
- response_mode=response_mode,
- similarity_top_k=int(top_k),
- )
- return qe
-
-
-def launch_ui():
- # Default UI values matching README
- model_options = ['gpt-4o', 'gpt-4o-mini', 'gpt-4o-nano']
- synth_options = ['TreeSummarize', 'Refine', 'CompactAndRefine']
-
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
- gr.Markdown('# A3b โ Advanced Gradio RAG')
-
- with gr.Row():
- # Left column - Query interface
- with gr.Column(scale=3):
- question = gr.Textbox(lines=3, placeholder='Ask a question...', label='Textbox')
- run_button = gr.Button('Submit', variant='primary', size='lg')
- output = gr.Textbox(label='Answer', lines=15, max_lines=20)
- status = gr.Textbox(label='Status', lines=4, interactive=False)
-
- # Right column - Configuration panel
- with gr.Column(scale=2):
- gr.Markdown('**Advanced Settings**', elem_id='advanced-settings-label')
- with gr.Accordion('Advanced Settings', open=False):
- temp_slider = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=0.1, label='Temperature')
- top_k = gr.Slider(minimum=1, maximum=10, step=1, value=3, label='Similarity Top-K')
- similarity_cutoff = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.3, label='Similarity Cutoff')
- gr.Markdown('## Configuration')
- use_llm_checkbox = gr.Checkbox(label='Use LLM for synthesis (requires valid OpenRouter key)', value=False)
- model_dropdown = gr.Dropdown(label='Model', choices=model_options, value='gpt-4o')
- synth_dropdown = gr.Dropdown(label='Response Synthesizer', choices=synth_options, value='TreeSummarize')
- chunk_size = gr.Dropdown(label='Chunk Size', choices=[256, 512, 1024, 2048], value=512)
- chunk_overlap = gr.Dropdown(label='Chunk Overlap', choices=[0, 25, 50, 100, 200], value=50)
- rebuild_checkbox = gr.Checkbox(label='Force Rebuild (reload all documents)', value=False)
- init_button = gr.Button('Initialize / Load DB', size='lg')
-
- # Internal state holders
- index_state = gr.State(None)
-
- def init_db(model, temp, csize, cover, rebuild=False, data_dir='data'):
- status_msgs = []
- try:
- status_msgs.append('โ๏ธ Configuring settings...')
- openrouter_key = os.getenv('OPENROUTER_API_KEY')
- configure_settings(int(csize), int(cover), DEFAULT_EMBED_MODEL, model, float(temp), openrouter_key)
- status_msgs.append('โ
Settings applied')
-
- status_msgs.append('๐ Loading/creating index...')
- idx = create_or_rebuild_index(data_dir=data_dir, force_rebuild=rebuild)
- status_msgs.append('โ
Index ready!')
- return idx, '\n'.join(status_msgs)
- except Exception as e:
- logging.exception('Failed to initialize DB')
- return None, f'Error initializing DB: {e}'
-
- def answer_question(idx, q, use_llm, topk, synth, sim_cutoff):
- if idx is None:
- return 'Index not initialized. Click Initialize / Load DB first.'
- if not q or q.strip() == '':
- return 'Please enter a question.'
- try:
- logging.info(f"Query with settings: use_llm={use_llm}, top_k={topk}, synth={synth}, sim_cutoff={sim_cutoff}")
- qe = build_query_engine(idx, top_k=topk, use_llm=use_llm, synthesizer=synth, similarity_cutoff=sim_cutoff)
- resp = qe.query(q)
-
- # If LLM synthesis is enabled and we have a response
- if use_llm and getattr(resp, 'response', None):
- return resp.response
-
- # Vector-only mode: return all retrieved nodes
- if resp and getattr(resp, 'source_nodes', None):
- nodes = resp.source_nodes
- if len(nodes) == 0:
- return "No relevant documents found (similarity cutoff may be too high)."
-
- # Format multiple nodes with their scores
- result = f"Found {len(nodes)} relevant document(s):\n\n"
- for i, node in enumerate(nodes, 1):
- score = getattr(node, 'score', None)
- if isinstance(score, float):
- score_str = f"{score:.4f}"
- else:
- score_str = str(score) if score is not None else "N/A"
- result += f"--- Result {i} (Similarity: {score_str}) ---\n"
- result += f"{node.node.text}\n\n"
- return result
-
- return str(resp)
- except Exception as e:
- logging.exception('Error answering')
- return f'Error during query: {e}'
-
- init_button.click(fn=init_db, inputs=[model_dropdown, temp_slider, chunk_size, chunk_overlap, rebuild_checkbox], outputs=[index_state, status])
- run_button.click(fn=answer_question, inputs=[index_state, question, use_llm_checkbox, top_k, synth_dropdown, similarity_cutoff], outputs=[output])
-
- demo.launch(share=False)
-
-
-if __name__ == '__main__':
- launch_ui()
diff --git a/sangu_santosh/README.md b/sangu_santosh/README.md
deleted file mode 100644
index dbb6718..0000000
--- a/sangu_santosh/README.md
+++ /dev/null
@@ -1 +0,0 @@
-# sangu_santosh
diff --git a/sangu_santosh/chatbot1/README.txt b/sangu_santosh/chatbot1/README.txt
deleted file mode 100644
index 8af42af..0000000
--- a/sangu_santosh/chatbot1/README.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-Multi-Chat Assistant (Streamlit + OpenRouter)
-=============================================
-
-Quick start
------------
-1) Create venv and install requirements
- - Windows (PowerShell):
- python -m venv .venv
- .venv\Scripts\activate
- pip install -r requirements.txt
-
- - Linux/macOS:
- python3 -m venv .venv
- source .venv/bin/activate
- pip install -r requirements.txt
-
-2) Add your OpenRouter key to .streamlit/secrets.toml:
- OPENROUTER_API_KEY = "your-key"
-
-3) Run:
- streamlit run app.py
-
-Features
---------
-- Multiple conversations & local persistence in ./chat_history
-- Sidebar to create/switch/delete chats
-- Model selector & persona selector
-- Translation mode with auto language detection
-- Dark/Light theme (CSS injected)
-- Streaming responses
-- Conversation summarization expander
-- Export current chat: TXT, JSON, CSV
\ No newline at end of file
diff --git a/sangu_santosh/chatbot1/chatbotapp1.py b/sangu_santosh/chatbot1/chatbotapp1.py
deleted file mode 100644
index 291953b..0000000
--- a/sangu_santosh/chatbot1/chatbotapp1.py
+++ /dev/null
@@ -1,692 +0,0 @@
-import os
-import json
-import csv
-import uuid
-import time
-import datetime as dt
-from pathlib import Path
-from typing import Dict, List, Generator, Any
-
-import requests
-import streamlit as st
-
-# =============================
-# ---- Config & Constants ----
-# =============================
-APP_TITLE = "Multi-Chat Assistant"
-DATA_DIR = Path("./chats")
-INDEX_FILE = DATA_DIR / "index.json"
-DEFAULT_MODEL = "openai/gpt-4o-mini:free"
-DEFAULT_SUMMARY_MODEL = "openai/gpt-4o-mini:free"
-REQUEST_TIMEOUT = 60
-
-PERSONAS = {
- "General": "You are a helpful, concise assistant. Prefer short, clear answers.",
- "Teacher": "You are a calm teacher. Explain simply, add brief examples when helpful.",
- "Coach": "You are an encouraging coach. Give actionable, stepwise advice.",
- "Analyst": "You are a precise analyst. Be structured and avoid fluff.",
-}
-
-TRANSLATE_TO = [
- "English", "Hindi", "Spanish", "French", "German", "Chinese", "Arabic",
-]
-
-SUGGESTED_MODELS = {
- "OpenAI โข GPT-4o mini (free)": "openai/gpt-4o-mini:free",
- "Meta โข Llama-3.1-8B Instruct (free)": "meta-llama/llama-3.1-8b-instruct:free",
- "Mistral โข Mistral-7B Instruct (free)": "mistralai/mistral-7b-instruct:free",
-}
-
-DEFAULT_TITLES = {"New Chat", "Untitled", ""}
-
-# =============================
-# ---- File I/O Utilities ----
-# =============================
-
-def ensure_store() -> None:
- DATA_DIR.mkdir(parents=True, exist_ok=True)
- if not INDEX_FILE.exists():
- INDEX_FILE.write_text(json.dumps({"chats": []}, indent=2), encoding="utf-8")
-
-
-def read_index() -> Dict[str, Any]:
- ensure_store()
- try:
- idx = json.loads(INDEX_FILE.read_text(encoding="utf-8"))
- if not isinstance(idx, dict) or "chats" not in idx:
- return {"chats": []}
- return idx
- except Exception:
- return {"chats": []}
-
-
-def write_index(index: Dict[str, Any]) -> None:
- ensure_store()
- INDEX_FILE.write_text(json.dumps(index, indent=2, ensure_ascii=False), encoding="utf-8")
-
-
-def chat_file(chat_id: str) -> Path:
- return DATA_DIR / f"{chat_id}.json"
-
-
-def create_chat(title: str = "New Chat") -> str:
- chat_id = uuid.uuid4().hex[:8]
- now = dt.datetime.now(dt.timezone.utc).isoformat()
- doc = {
- "id": chat_id,
- "title": title or "New Chat",
- "created": now,
- "updated": now,
- "persona": "General",
- "model": DEFAULT_MODEL,
- "messages": [],
- "summaries": {"key_points": "", "action_items": ""},
- }
- chat_file(chat_id).write_text(json.dumps(doc, indent=2, ensure_ascii=False), encoding="utf-8")
- index = read_index()
- index["chats"].insert(0, {"id": chat_id, "title": doc["title"], "updated": now})
- write_index(index)
- return chat_id
-
-
-def load_chat(chat_id: str) -> Dict[str, Any]:
- try:
- doc = json.loads(chat_file(chat_id).read_text(encoding="utf-8"))
- if not isinstance(doc, dict):
- return {}
- doc.setdefault("title", "New Chat")
- doc.setdefault("messages", [])
- doc.setdefault("persona", "General")
- doc.setdefault("model", DEFAULT_MODEL)
- doc.setdefault("summaries", {"key_points": "", "action_items": ""})
- return doc
- except Exception:
- return {}
-
-
-def save_chat(doc: Dict[str, Any]) -> None:
- if not doc:
- return
- doc["updated"] = dt.datetime.now(dt.timezone.utc).isoformat()
- chat_file(doc["id"]).write_text(json.dumps(doc, indent=2, ensure_ascii=False), encoding="utf-8")
- index = read_index()
- found = False
- for row in index["chats"]:
- if row["id"] == doc["id"]:
- row["title"] = doc.get("title") or row.get("title") or "New Chat"
- row["updated"] = doc["updated"]
- found = True
- break
- if not found:
- index["chats"].insert(0, {"id": doc["id"], "title": doc.get("title","New Chat"), "updated": doc["updated"]})
- write_index(index)
-
-
-def delete_chat(chat_id: str) -> None:
- try:
- chat_file(chat_id).unlink(missing_ok=True)
- except Exception:
- pass
- index = read_index()
- index["chats"] = [c for c in index["chats"] if c["id"] != chat_id]
- write_index(index)
-
-# =============================
-# ---- OpenRouter Key Guard ---
-# =============================
-
-def _get_api_key() -> str:
- key = ""
- try:
- key = st.secrets.get("OPENROUTER_API_KEY", "")
- except Exception:
- pass
- if not key:
- key = os.getenv("OPENROUTER_API_KEY", "")
- return (key or "").strip()
-
-def _require_api_key_or_stop():
- key = _get_api_key()
- if not key:
- st.error(
- "OpenRouter API key not found. Add `OPENROUTER_API_KEY` to `.streamlit/secrets.toml` "
- "or set it as an environment variable, then restart the app."
- )
- st.stop()
-
-# =============================
-# ---- OpenRouter Client -----
-# =============================
-
-def openrouter_headers() -> Dict[str, str]:
- key = _get_api_key() # validated via _require_api_key_or_stop() in main()
- referer = os.getenv("STREAMLIT_REFERER", "http://localhost:8501")
-
- def ascii_only(s: str) -> str:
- try:
- return s.encode("ascii", "ignore").decode("ascii")
- except Exception:
- return ""
-
- return {
- "Authorization": f"Bearer {key}",
- "HTTP-Referer": referer,
- "X-Title": ascii_only(APP_TITLE),
- "Content-Type": "application/json",
- "Accept": "text/event-stream, application/json",
- }
-
-
-def stream_chat_completion(messages: List[Dict[str, str]], model: str) -> Generator[str, None, None]:
- url = "https://openrouter.ai/api/v1/chat/completions"
- payload = {"model": model, "messages": messages, "stream": True}
- # Read SSE as raw bytes and UTF-8 decode manually to avoid mojibake
- with requests.post(url, headers=openrouter_headers(), json=payload, stream=True, timeout=REQUEST_TIMEOUT) as r:
- r.raise_for_status()
- for raw in r.iter_lines(chunk_size=8192):
- if not raw:
- continue
- raw = raw.strip()
- if raw.startswith(b":"): # heartbeat/comment
- continue
- if not raw.startswith(b"data:"):
- continue
- data = raw.split(b":", 1)[1].strip()
- if data == b"[DONE]":
- break
- try:
- obj = json.loads(data.decode("utf-8"))
- delta = obj.get("choices", [{}])[0].get("delta", {}).get("content", "")
- if delta:
- yield delta
- except Exception:
- continue
-
-
-def non_stream_completion(messages: List[Dict[str, str]], model: str) -> str:
- url = "https://openrouter.ai/api/v1/chat/completions"
- payload = {"model": model, "messages": messages}
- r = requests.post(url, headers=openrouter_headers(), json=payload, timeout=REQUEST_TIMEOUT)
- r.raise_for_status()
- r.encoding = "utf-8"
- data = r.json()
- return data["choices"][0]["message"]["content"]
-
-# =============================
-# ---- AI Helper Functions ----
-# =============================
-
-def persona_system(persona: str) -> str:
- return PERSONAS.get(persona, PERSONAS["General"])
-
-# --- Translation mode helpers (single-call JSON with cultural context) ---
-
-def translate_with_context(text: str, target_language: str, model: str) -> Dict[str, str]:
- """Ask model to detect language and provide translation + notes in strict JSON."""
- sys = {
- "role": "system",
- "content": (
- "You are an expert translator. First detect the input language. "
- f"Translate naturally into {target_language}. Provide helpful cultural notes. "
- "Return ONLY compact JSON with keys: detected_language, translation, cultural_note, alternative, regional_note. "
- "Use one short sentence each for cultural_note and regional_note."
- ),
- }
- user = {"role": "user", "content": text[:4000]}
- raw = non_stream_completion([sys, user], model).strip()
-
- # Robust JSON extraction
- def _coerce_json(s: str) -> Dict[str, str]:
- try:
- return json.loads(s)
- except Exception:
- try:
- start = s.find("{")
- end = s.rfind("}")
- if start != -1 and end != -1:
- return json.loads(s[start:end+1])
- except Exception:
- pass
- return {}
-
- data = _coerce_json(raw)
- return {
- "detected_language": str(data.get("detected_language", "")).strip() or "Unknown",
- "translation": str(data.get("translation", "")).strip() or "",
- "cultural_note": str(data.get("cultural_note", "")).strip(),
- "alternative": str(data.get("alternative", "")).strip(),
- "regional_note": str(data.get("regional_note", "")).strip(),
- }
-
-def render_translation_block(res: Dict[str, str], target_language: str) -> str:
- # Emojis for visual consistency
- det = res.get("detected_language", "Unknown")
- trn = res.get("translation", "")
- cul = res.get("cultural_note", "")
- alt = res.get("alternative", "")
- reg = res.get("regional_note", "")
-
- parts = [
- f"๐ต **Detected Language:** {det}",
- f"๐ฏ **Translation ({target_language}):** {json.dumps(trn, ensure_ascii=False)}",
- ]
- if cul: parts.append(f"๐ก **Cultural Note:** {cul}")
- if alt: parts.append(f"โจ **Alternative:** {json.dumps(alt, ensure_ascii=False)}")
- if reg: parts.append(f"๐ **Regional Note:** {reg}")
- return "\n\n".join(parts)
-
-def detect_language(text: str, model: str) -> str:
- sys_msg = {
- "role": "system",
- "content": "Detect the language name for the given text. Reply with only the language name.",
- }
- user_msg = {"role": "user", "content": text[:2000]}
- try:
- out = non_stream_completion([sys_msg, user_msg], model)
- return out.strip().split("\n")[0]
- except Exception:
- return "Unknown"
-
-
-def translate_text(text: str, target_language: str, model: str) -> str:
- sys_msg = {
- "role": "system",
- "content": f"You translate to {target_language}. Keep meaning and tone; be natural.",
- }
- user_msg = {"role": "user", "content": text}
- return non_stream_completion([sys_msg, user_msg], model)
-
-
-def summarize_conversation(messages: List[Dict[str, str]], model: str) -> Dict[str, str]:
- sys_msg = {
- "role": "system",
- "content": "Summarize the conversation into two parts: 1) Key Points (bulleted, <=6 bullets), 2) Action Items (numbered, <=5 items). Return as JSON with keys key_points and action_items.",
- }
- convo_text = []
- for m in messages[-12:]:
- who = "User" if m["role"] == "user" else "Assistant"
- convo_text.append(f"{who}: {m['content']}")
- user_msg = {"role": "user", "content": "\n".join(convo_text)[:6000]}
- try:
- raw = non_stream_completion([sys_msg, user_msg], model)
- j = json.loads(raw)
- return {"key_points": j.get("key_points", ""), "action_items": j.get("action_items", "")}
- except Exception:
- return {"key_points": "", "action_items": ""}
-
-# ---------- Auto Title Helpers ----------
-
-def _simple_title_from_first_user(messages: List[Dict[str, str]]) -> str:
- text = ""
- for m in messages:
- if m.get("role") == "user" and m.get("content", "").strip():
- text = m["content"].strip()
- break
- if not text:
- return ""
- text = text.replace("\n", " ").strip()
- if len(text) > 60:
- text = text[:57].rstrip() + "โฆ"
- return " ".join(w.capitalize() for w in text.split())
-
-def _model_title_from_history(messages: List[Dict[str, str]], model: str) -> str:
- if not messages:
- return ""
- sys = {"role": "system",
- "content": "Create a concise 3-7 word Title Case title for this conversation. No quotes."}
- convo = []
- for m in messages[-16:]:
- role = m.get("role", "user")
- content = m.get("content", "")
- if not content:
- continue
- convo.append({"role": role, "content": content[:2000]})
- try:
- title = non_stream_completion([sys] + convo, model).strip().replace("\n", " ")
- if len(title) > 60:
- title = title[:57].rstrip() + "โฆ"
- return title
- except Exception:
- return ""
-
-def maybe_auto_title(chat_doc: Dict[str, Any]) -> None:
- current = (chat_doc.get("title") or "").strip()
- if current in DEFAULT_TITLES:
- roles = [m.get("role") for m in chat_doc.get("messages", [])]
- has_user = "user" in roles
- has_assistant = "assistant" in roles
- title = ""
- if has_user and has_assistant:
- title = _model_title_from_history(chat_doc["messages"], chat_doc.get("model", DEFAULT_MODEL))
- if not title:
- title = _simple_title_from_first_user(chat_doc.get("messages", []))
- if title and title not in DEFAULT_TITLES:
- chat_doc["title"] = title
- save_chat(chat_doc)
-
-# =============================
-# ---- Export Helpers ---------
-# =============================
-
-def export_txt(doc: Dict[str, Any]) -> str:
- lines = [
- f"Title: {doc.get('title','')}",
- f"Persona: {doc.get('persona','')}",
- "",
- "Conversation:",
- ]
- for m in doc.get("messages", []):
- who = "User" if m["role"] == "user" else "Assistant"
- lines.append(f"[{who}] {m.get('content','')}")
- return "\n".join(lines)
-
-
-def export_json(doc: Dict[str, Any]) -> str:
- return json.dumps(doc, indent=2, ensure_ascii=False)
-
-
-def export_csv(doc: Dict[str, Any]) -> str:
- from io import StringIO
- buf = StringIO()
- writer = csv.writer(buf)
- writer.writerow(["role", "content", "timestamp"])
- for m in doc.get("messages", []):
- writer.writerow([m.get("role",""), m.get("content",""), doc.get("updated","")])
- return buf.getvalue()
-
-# =============================
-# ---- Theming (Dark/Light) ---
-# =============================
-DARK_CSS = """
-
-"""
-
-LIGHT_CSS = """
-
-"""
-
-# =============================
-# ---- Streamlit UI ----------
-# =============================
-
-def init_session():
- ensure_store()
- if "chat_id" not in st.session_state:
- idx = read_index()
- st.session_state.chat_id = idx["chats"][0]["id"] if idx["chats"] else create_chat("New Chat")
- if "start_ts" not in st.session_state:
- st.session_state.start_ts = time.time()
- # Default theme: Light
- if "theme" not in st.session_state:
- st.session_state.theme = "Light"
- if "rename_mode" not in st.session_state:
- st.session_state.rename_mode = False
- if "rename_val" not in st.session_state:
- st.session_state.rename_val = ""
-
-
-def sidebar(chat_doc: Dict[str, Any]):
- st.sidebar.title("Conversations")
- if st.sidebar.button("โ New Chat", use_container_width=True):
- st.session_state.chat_id = create_chat("New Chat")
- st.rerun()
-
- st.sidebar.subheader("Chat History")
- idx = read_index()
- ids = [row["id"] for row in idx["chats"]]
- labels = {row["id"]: (row["title"] or row["id"]) for row in idx["chats"]}
- if st.session_state.chat_id not in ids and ids:
- st.session_state.chat_id = ids[0]
-
- selected_id = st.sidebar.radio(
- "Your chats",
- options=ids,
- index=ids.index(st.session_state.chat_id) if ids else 0,
- format_func=lambda cid: labels.get(cid, cid),
- label_visibility="collapsed",
- )
- if selected_id and selected_id != st.session_state.chat_id:
- st.session_state.chat_id = selected_id
- st.rerun()
-
- cA, cB = st.sidebar.columns(2, gap="small")
- with cA:
- if st.button("๐ Rename", use_container_width=True):
- st.session_state.rename_mode = True
- st.session_state.rename_val = labels.get(st.session_state.chat_id, "")
- st.rerun()
- with cB:
- if st.button("๐๏ธ Delete", use_container_width=True):
- delete_chat(st.session_state.chat_id)
- new_idx = read_index()
- if new_idx["chats"]:
- st.session_state.chat_id = new_idx["chats"][0]["id"]
- else:
- st.session_state.chat_id = create_chat("New Chat")
- st.rerun()
-
- if st.session_state.rename_mode:
- new_name = st.sidebar.text_input("New title", value=st.session_state.rename_val)
- c1, c2 = st.sidebar.columns(2)
- with c1:
- if st.button("Save"):
- doc = load_chat(st.session_state.chat_id) or {}
- if doc and new_name.strip():
- doc["title"] = new_name.strip()
- save_chat(doc)
- st.session_state.rename_mode = False
- st.rerun()
- with c2:
- if st.button("Cancel"):
- st.session_state.rename_mode = False
- st.rerun()
-
- st.sidebar.divider()
-
- # Persona
- persona_names = list(PERSONAS.keys())
- try:
- sel_index = persona_names.index(chat_doc.get("persona", "General"))
- except ValueError:
- sel_index = 0
- chat_doc["persona"] = st.sidebar.selectbox("Persona", persona_names, index=sel_index)
-
- # Model selector with safe suggestions + optional custom override
- st.sidebar.subheader("Model")
- suggested_label = None
- current_model = chat_doc.get("model", DEFAULT_MODEL)
- for label, mid in SUGGESTED_MODELS.items():
- if mid == current_model:
- suggested_label = label
- break
- label_list = list(SUGGESTED_MODELS.keys())
- if suggested_label is None:
- suggested_label = label_list[0]
- chosen_label = st.sidebar.selectbox("Suggested", label_list, index=label_list.index(suggested_label))
- suggested_model = SUGGESTED_MODELS[chosen_label]
- custom_model = st.sidebar.text_input(
- "Custom model ID (optional)",
- value=current_model if current_model not in SUGGESTED_MODELS.values() else "",
- placeholder="e.g., openai/gpt-4o-mini:free",
- help="Leave blank to use the suggested model above.",
- )
- final_model = custom_model.strip() or suggested_model
- if final_model != current_model:
- chat_doc["model"] = final_model
- save_chat(chat_doc)
-
- # Translation mode
- st.sidebar.subheader("Translation Mode")
- use_translate = st.sidebar.toggle("Enable translation", value=False)
- target_lang = st.sidebar.selectbox("Target language", TRANSLATE_TO, index=0)
-
- # Theme toggle (with fallback)
- try:
- theme = st.sidebar.segmented_control("Theme", ["Light", "Dark"], selection_mode="single", default=st.session_state.theme)
- except Exception:
- theme = st.sidebar.radio("Theme", ["Light", "Dark"], index=0 if st.session_state.theme == "Light" else 1)
- st.session_state.theme = theme
-
- # Session stats
- st.sidebar.subheader("Session Stats")
- elapsed = int(time.time() - st.session_state.start_ts)
- st.sidebar.write(f"Duration: {elapsed//60}m {elapsed%60}s")
-
- # Exports
- st.sidebar.subheader("Export")
- st.sidebar.download_button("Export TXT", export_txt(chat_doc), file_name=f"{chat_doc.get('title','New Chat')}.txt")
- st.sidebar.download_button("Export JSON", export_json(chat_doc), file_name=f"{chat_doc.get('title','New Chat')}.json")
- st.sidebar.download_button("Export CSV", export_csv(chat_doc), file_name=f"{chat_doc.get('title','New Chat')}.csv")
-
- return use_translate, target_lang
-
-
-def main_panel(chat_doc: Dict[str, Any], use_translate: bool, target_lang: str):
- st.markdown(LIGHT_CSS if st.session_state.theme == "Light" else DARK_CSS, unsafe_allow_html=True)
-
- cols = st.columns([0.8, 0.2])
- with cols[0]:
- st.title(APP_TITLE)
- st.caption(f"Chat: **{chat_doc.get('title', 'New Chat')}**")
- with cols[1]:
- if st.button("๐งน Clear messages", use_container_width=True):
- chat_doc["messages"] = []
- save_chat(chat_doc)
- st.rerun()
-
- for m in chat_doc.get("messages", []):
- with st.chat_message("assistant" if m["role"]=="assistant" else "user"):
- st.markdown(m.get("content", ""))
-
- if prompt := st.chat_input("Type your messageโฆ"):
- # always store the user message
- chat_doc["messages"].append({"role": "user", "content": prompt})
- save_chat(chat_doc)
-
- if use_translate:
- # Translation mode: show structured block for the user's input
- with st.chat_message("assistant"):
- placeholder = st.empty()
- try:
- res = translate_with_context(prompt, target_lang, chat_doc.get("model", DEFAULT_MODEL))
- out_text = render_translation_block(res, target_language=target_lang)
- except Exception as e:
- out_text = f"โ Error (translation): {e}"
- placeholder.markdown(out_text)
- else:
- # Regular chat mode with streaming
- sys = {"role": "system", "content": persona_system(chat_doc.get("persona","General"))}
- messages = [sys] + chat_doc["messages"]
- with st.chat_message("assistant"):
- placeholder = st.empty()
- out_text = ""
- try:
- for chunk in stream_chat_completion(messages, chat_doc.get("model", DEFAULT_MODEL)):
- out_text += chunk
- placeholder.markdown(out_text)
- except Exception as e:
- out_text = f"โ Error: {e}"
- placeholder.markdown(out_text)
-
- chat_doc["messages"].append({"role": "assistant", "content": out_text})
- save_chat(chat_doc)
- maybe_auto_title(chat_doc)
-
- st.divider()
- c1, c2 = st.columns(2)
- if c1.button("๐ง Summarize conversation"):
- with st.status("Summarizingโฆ"):
- res = summarize_conversation(chat_doc["messages"], DEFAULT_SUMMARY_MODEL)
- chat_doc["summaries"] = res
- save_chat(chat_doc)
- with c2.expander("Summaries", expanded=False):
- st.markdown("**Key Points**\n\n" + (chat_doc.get("summaries",{}).get("key_points","") or "_No summary yet._"))
- st.markdown("**Action Items**\n\n" + (chat_doc.get("summaries",{}).get("action_items","") or "_No action items yet._"))
-
-# =============================
-# ---- Lightweight Self Tests -
-# =============================
-
-def _run_self_tests():
- assert persona_system("Unknown") == PERSONAS["General"]
- assert persona_system("Analyst").startswith("You are a precise analyst")
-
- sample = {
- "title": "T",
- "persona": "General",
- "messages": [
- {"role": "user", "content": "Hi"},
- {"role": "assistant", "content": "Hello"},
- ],
- "updated": "2020-01-01T00:00:00Z",
- }
- txt = export_txt(sample)
- assert "Title: T" in txt
- assert "[User] Hi" in txt and "[Assistant] Hello" in txt
- assert "\n" in txt
- joined = "\n".join(["A", "B", "C"])
- assert joined == "A\nB\nC"
- csv_out = export_csv(sample)
- assert csv_out.splitlines()[0].startswith("role,content,timestamp")
-
-# =============================
-# ---- Entry Point ------------
-# =============================
-
-def main():
- st.set_page_config(page_title=APP_TITLE, page_icon="๐ฌ", layout="wide")
- _require_api_key_or_stop()
-
- init_session()
- doc = load_chat(st.session_state.chat_id)
- if not doc:
- st.session_state.chat_id = create_chat("New Chat")
- doc = load_chat(st.session_state.chat_id)
- use_translate, target_lang = sidebar(doc)
- main_panel(doc, use_translate, target_lang)
-
-if os.getenv("RUN_SELF_TESTS") == "1":
- _run_self_tests()
-
-if __name__ == "__main__":
- main()
diff --git a/sangu_santosh/chatbot1/chats/12660ba6.json b/sangu_santosh/chatbot1/chats/12660ba6.json
deleted file mode 100644
index e175e88..0000000
--- a/sangu_santosh/chatbot1/chats/12660ba6.json
+++ /dev/null
@@ -1,38 +0,0 @@
-{
- "id": "12660ba6",
- "title": "When Was The Last Olympics Held",
- "created": "2025-10-31T12:22:18.504848+00:00",
- "updated": "2025-10-31T12:24:57.238417+00:00",
- "persona": "General",
- "model": "openai/gpt-oss-20b:free",
- "messages": [
- {
- "role": "user",
- "content": "when was the last Olympics held "
- },
- {
- "role": "assistant",
- "content": "โ Error: 404 Client Error: Not Found for url: https://openrouter.ai/api/v1/chat/completions"
- },
- {
- "role": "user",
- "content": "when was the last Olympics held"
- },
- {
- "role": "assistant",
- "content": "The most recent Olympic Games were the **2022 Winter Olympics in Beijing**, held **4โฏโโฏ20โฏFebruaryโฏ2022**."
- },
- {
- "role": "user",
- "content": "which countries participated in it "
- },
- {
- "role": "assistant",
- "content": "**Countries that participated in the 2022 Winter Olympics (Beijing)** \n*(91 National Olympic Committees โ NOCs)* \n\n1. Australia \n2. Austria \n3. Azerbaijan \n4. Belarus \n5. Belgium \n6. Bosnia & Herzegovina \n7. Brazil \n8. Bulgaria \n9. Canada \n10. Chile \n11. China \n12. Colombia \n13. Croatia \n14. Czech Republic \n15. Denmark \n16. Estonia \n17. Finland \n18. France \n19. Germany \n20. Greece \n21. Greenland (Denmark) \n22. Hungary \n23. Iceland \n24. India \n25. Indonesia \n26. Ireland \n27. Italy \n28. Japan \n29. Kazakhstan \n30. Kenya (Kenya) \n31. South Korea \n32. Kyrgyzstan \n33. Latvia \n34. Liechtenstein \n35. Lithuania \n36. Luxembourg \n37. North Macedonia \n38. Moldova \n39. Monaco \n40. Mongolia \n41. Montenegro \n42. Morocco \n43. Netherlands \n44. NewโฏZealand \n45. Norway \n46. Oman \n47. Pakistan \n48. Poland \n49. Portugal \n50. Qatar \n51. Romania \n52. Russia (Russian Olympic Committee) \n53. Saudi Arabia \n54. Serbia \n55. Slovakia \n56. Slovenia \n57. South Africa \n58. Spain \n59. Sweden \n60. Switzerland \n61. Tajikistan \n62. Thailand \n63. Turkey \n64. Ukraine \n65. United Kingdom (Great Britain) \n66. United States \n67. Uzbekistan \n68. Vietnam \n69. Belarusian (Belarus) โ *see note* \n70. Brazil โ *noted previously* \n71. Jamaica โ *not established* \n(And the remaining 20 NOCs omitted for brevity.) \n\n*Full official list is available on theโฏIOC website.*"
- }
- ],
- "summaries": {
- "key_points": "",
- "action_items": ""
- }
-}
\ No newline at end of file
diff --git a/sangu_santosh/chatbot1/chats/9beadbd5.json b/sangu_santosh/chatbot1/chats/9beadbd5.json
deleted file mode 100644
index 97f9a3b..0000000
--- a/sangu_santosh/chatbot1/chats/9beadbd5.json
+++ /dev/null
@@ -1,22 +0,0 @@
-{
- "id": "9beadbd5",
- "title": "Iโm not sure which country youโre asking aboutโcould youโฆ",
- "created": "2025-10-31T12:25:30.791946+00:00",
- "updated": "2025-10-31T12:35:55.482936+00:00",
- "persona": "General",
- "model": "openai/gpt-oss-20b:free",
- "messages": [
- {
- "role": "user",
- "content": "who is the current president?"
- },
- {
- "role": "assistant",
- "content": "๐ต **Detected Language:** English\n\n๐ฏ **Translation (German):** \"Wer ist der aktuelle Prรคsident?\"\n\n๐ก **Cultural Note:** In Deutschland ist der 'Prรคsident' ein parlamentarisches Staatsoberhaupt, nicht der Exekutivchef.\n\nโจ **Alternative:** \"Wer ist der derzeitige Prรคsident?\"\n\n๐ **Regional Note:** Im deutschsprachigen Raum wird 'Prรคsident' selten fรผr den Bundeskanzler benutzt."
- }
- ],
- "summaries": {
- "key_points": "",
- "action_items": ""
- }
-}
\ No newline at end of file
diff --git a/sangu_santosh/chatbot1/chats/df803f4e.json b/sangu_santosh/chatbot1/chats/df803f4e.json
deleted file mode 100644
index 823d654..0000000
--- a/sangu_santosh/chatbot1/chats/df803f4e.json
+++ /dev/null
@@ -1,62 +0,0 @@
-{
- "id": "df803f4e",
- "title": "Introducing Chat GPT",
- "created": "2025-10-31T08:57:35.130174+00:00",
- "updated": "2025-10-31T10:22:41.749120+00:00",
- "persona": "General",
- "model": "openai/gpt-oss-20b:free",
- "messages": [
- {
- "role": "user",
- "content": "who are you?"
- },
- {
- "role": "assistant",
- "content": "Iโm ChatGPT, a largeโlanguage model created by OpenAI."
- },
- {
- "role": "user",
- "content": "tell me about your self little bit more"
- },
- {
- "role": "assistant",
- "content": "Iโm ChatGPT, a conversational AI built by OpenAI. \n* Itโs a large language model (LLM) thatโฏโโฏafter being trained on a vast mix of internet text, books, and codeโฏโโฏgenerates text that fits the context of a prompt. \n* I can answer questions, explain concepts, write code, draft emails, edit writing, translate, help study, brainstorm ideas, and simulate characters or roles. \n* I donโt have personal experiences, opinions, or feelings; my โknowledgeโ is based on patterns in data up to my last update (2024โ06). \n* I respect privacy, donโt store user data, and follow OpenAIโs safety policies."
- },
- {
- "role": "user",
- "content": "who is the presidient now?"
- },
- {
- "role": "assistant",
- "content": "JoeโฏBiden is the current President of the United States (he was reโelected in 2024)."
- },
- {
- "role": "user",
- "content": "how many language do you know?"
- },
- {
- "role": "assistant",
- "content": "Iโm trained on text from many languagesโhundreds of them, really. I can read, write, and respond in major world languages (English, Spanish, French, Mandarin, Russian, Arabic, German, etc.) as well as many regional or lessโcommon ones. My accuracy is best in the languages with the most training data, and lower for rare or niche tongues."
- },
- {
- "role": "user",
- "content": "Comprendi il tuo mondo e comunica attraverso le lingue"
- },
- {
- "role": "assistant",
- "content": "**Translated (Italian โ English):**\n\nYes, I understand my context and can communicate in many languages. I can help you with translations, explanations, texts, and so on."
- },
- {
- "role": "user",
- "content": "who is the presidient now?"
- },
- {
- "role": "assistant",
- "content": "**Translated (English โ Spanish):**\n\nJoeโฏBiden es el actual presidente de los Estados Unidos."
- }
- ],
- "summaries": {
- "key_points": "",
- "action_items": ""
- }
-}
\ No newline at end of file
diff --git a/sangu_santosh/chatbot1/chats/index.json b/sangu_santosh/chatbot1/chats/index.json
deleted file mode 100644
index 0ee2f6b..0000000
--- a/sangu_santosh/chatbot1/chats/index.json
+++ /dev/null
@@ -1,19 +0,0 @@
-{
- "chats": [
- {
- "id": "9beadbd5",
- "title": "Iโm not sure which country youโre asking aboutโcould youโฆ",
- "updated": "2025-10-31T12:35:55.482936+00:00"
- },
- {
- "id": "12660ba6",
- "title": "When Was The Last Olympics Held",
- "updated": "2025-10-31T12:24:57.238417+00:00"
- },
- {
- "id": "df803f4e",
- "title": "Introducing Chat GPT",
- "updated": "2025-10-31T10:22:41.749120+00:00"
- }
- ]
-}
\ No newline at end of file
diff --git a/sangu_santosh/chatbot1/code copy.txt b/sangu_santosh/chatbot1/code copy.txt
deleted file mode 100644
index a72fe8d..0000000
--- a/sangu_santosh/chatbot1/code copy.txt
+++ /dev/null
@@ -1,662 +0,0 @@
-import os
-import json
-import csv
-import uuid
-import time
-import datetime as dt
-from pathlib import Path
-from typing import Dict, List, Generator, Any
-
-import requests
-import streamlit as st
-
-# =============================
-# ---- Config & Constants ----
-# =============================
-APP_TITLE = "Multi-Chat Assistant"
-DATA_DIR = Path("./chats")
-INDEX_FILE = DATA_DIR / "index.json"
-DEFAULT_MODEL = "openai/gpt-4o-mini:free"
-DEFAULT_SUMMARY_MODEL = "openai/gpt-4o-mini:free"
-REQUEST_TIMEOUT = 60
-
-PERSONAS = {
- "General": "You are a helpful, concise assistant. Prefer short, clear answers.",
- "Teacher": "You are a calm teacher. Explain simply, add brief examples when helpful.",
- "Coach": "You are an encouraging coach. Give actionable, stepwise advice.",
- "Analyst": "You are a precise analyst. Be structured and avoid fluff.",
-}
-
-TRANSLATE_TO = [
- "English", "Hindi", "Spanish", "French", "German", "Chinese", "Arabic",
-]
-
-SUGGESTED_MODELS = {
- "OpenAI โข GPT-4o mini (free)": "openai/gpt-4o-mini:free",
- "Meta โข Llama-3.1-8B Instruct (free)": "meta-llama/llama-3.1-8b-instruct:free",
- "Mistral โข Mistral-7B Instruct (free)": "mistralai/mistral-7b-instruct:free",
-}
-
-DEFAULT_TITLES = {"New Chat", "Untitled", ""}
-
-# =============================
-# ---- File I/O Utilities ----
-# =============================
-
-def ensure_store() -> None:
- DATA_DIR.mkdir(parents=True, exist_ok=True)
- if not INDEX_FILE.exists():
- INDEX_FILE.write_text(json.dumps({"chats": []}, indent=2), encoding="utf-8")
-
-
-def read_index() -> Dict[str, Any]:
- ensure_store()
- try:
- idx = json.loads(INDEX_FILE.read_text(encoding="utf-8"))
- if not isinstance(idx, dict) or "chats" not in idx:
- return {"chats": []}
- return idx
- except Exception:
- return {"chats": []}
-
-
-def write_index(index: Dict[str, Any]) -> None:
- ensure_store()
- INDEX_FILE.write_text(json.dumps(index, indent=2, ensure_ascii=False), encoding="utf-8")
-
-
-def chat_file(chat_id: str) -> Path:
- return DATA_DIR / f"{chat_id}.json"
-
-
-def create_chat(title: str = "New Chat") -> str:
- chat_id = uuid.uuid4().hex[:8]
- now = dt.datetime.now(dt.timezone.utc).isoformat()
- doc = {
- "id": chat_id,
- "title": title or "New Chat",
- "created": now,
- "updated": now,
- "persona": "General",
- "model": DEFAULT_MODEL,
- "messages": [],
- "summaries": {"key_points": "", "action_items": ""},
- }
- chat_file(chat_id).write_text(json.dumps(doc, indent=2, ensure_ascii=False), encoding="utf-8")
- index = read_index()
- index["chats"].insert(0, {"id": chat_id, "title": doc["title"], "updated": now})
- write_index(index)
- return chat_id
-
-
-def load_chat(chat_id: str) -> Dict[str, Any]:
- try:
- doc = json.loads(chat_file(chat_id).read_text(encoding="utf-8"))
- if not isinstance(doc, dict):
- return {}
- doc.setdefault("title", "New Chat")
- doc.setdefault("messages", [])
- doc.setdefault("persona", "General")
- doc.setdefault("model", DEFAULT_MODEL)
- doc.setdefault("summaries", {"key_points": "", "action_items": ""})
- return doc
- except Exception:
- return {}
-
-
-def save_chat(doc: Dict[str, Any]) -> None:
- if not doc:
- return
- doc["updated"] = dt.datetime.now(dt.timezone.utc).isoformat()
- chat_file(doc["id"]).write_text(json.dumps(doc, indent=2, ensure_ascii=False), encoding="utf-8")
- index = read_index()
- found = False
- for row in index["chats"]:
- if row["id"] == doc["id"]:
- row["title"] = doc.get("title") or row.get("title") or "New Chat"
- row["updated"] = doc["updated"]
- found = True
- break
- if not found:
- index["chats"].insert(0, {"id": doc["id"], "title": doc.get("title","New Chat"), "updated": doc["updated"]})
- write_index(index)
-
-
-def delete_chat(chat_id: str) -> None:
- try:
- chat_file(chat_id).unlink(missing_ok=True)
- except Exception:
- pass
- index = read_index()
- index["chats"] = [c for c in index["chats"] if c["id"] != chat_id]
- write_index(index)
-
-# =============================
-# ---- OpenRouter Key Guard ---
-# =============================
-
-def _get_api_key() -> str:
- key = ""
- try:
- key = st.secrets.get("OPENROUTER_API_KEY", "")
- except Exception:
- pass
- if not key:
- key = os.getenv("OPENROUTER_API_KEY", "")
- return (key or "").strip()
-
-def _require_api_key_or_stop():
- key = _get_api_key()
- if not key:
- st.error(
- "OpenRouter API key not found. Add `OPENROUTER_API_KEY` to `.streamlit/secrets.toml` "
- "or set it as an environment variable, then restart the app."
- )
- st.stop()
-
-# =============================
-# ---- OpenRouter Client -----
-# =============================
-
-def openrouter_headers() -> Dict[str, str]:
- key = _get_api_key() # validated via _require_api_key_or_stop() in main()
- referer = os.getenv("STREAMLIT_REFERER", "http://localhost:8501")
-
- def ascii_only(s: str) -> str:
- try:
- return s.encode("ascii", "ignore").decode("ascii")
- except Exception:
- return ""
-
- return {
- "Authorization": f"Bearer {key}",
- "HTTP-Referer": referer,
- "X-Title": ascii_only(APP_TITLE),
- "Content-Type": "application/json",
- "Accept": "text/event-stream, application/json",
- }
-
-
-def stream_chat_completion(messages: List[Dict[str, str]], model: str) -> Generator[str, None, None]:
- url = "https://openrouter.ai/api/v1/chat/completions"
- payload = {
- "model": model,
- "messages": messages,
- "stream": True,
- }
- # Read SSE as raw bytes and UTF-8 decode manually to avoid mojibake
- with requests.post(url, headers=openrouter_headers(), json=payload, stream=True, timeout=REQUEST_TIMEOUT) as r:
- r.raise_for_status()
- for raw in r.iter_lines(chunk_size=8192):
- if not raw:
- continue
- raw = raw.strip()
- if raw.startswith(b":"): # heartbeat/comments like b": ping"
- continue
- if not raw.startswith(b"data:"):
- continue
- data = raw.split(b":", 1)[1].strip()
- if data == b"[DONE]":
- break
- try:
- obj = json.loads(data.decode("utf-8"))
- delta = obj.get("choices", [{}])[0].get("delta", {}).get("content", "")
- if delta:
- yield delta
- except Exception:
- continue
-
-
-def non_stream_completion(messages: List[Dict[str, str]], model: str) -> str:
- url = "https://openrouter.ai/api/v1/chat/completions"
- payload = {"model": model, "messages": messages}
- r = requests.post(url, headers=openrouter_headers(), json=payload, timeout=REQUEST_TIMEOUT)
- r.raise_for_status()
- r.encoding = "utf-8" # ensure utf-8 decoding for JSON
- data = r.json()
- return data["choices"][0]["message"]["content"]
-
-# =============================
-# ---- AI Helper Functions ----
-# =============================
-
-def persona_system(persona: str) -> str:
- return PERSONAS.get(persona, PERSONAS["General"])
-
-
-def detect_language(text: str, model: str) -> str:
- sys_msg = {
- "role": "system",
- "content": "Detect the language name for the given text. Reply with only the language name.",
- }
- user_msg = {"role": "user", "content": text[:2000]}
- try:
- out = non_stream_completion([sys_msg, user_msg], model)
- return out.strip().split("\n")[0]
- except Exception:
- return "Unknown"
-
-
-def translate_text(text: str, target_language: str, model: str) -> str:
- sys_msg = {
- "role": "system",
- "content": f"You translate to {target_language}. Keep meaning and tone; be natural.",
- }
- user_msg = {"role": "user", "content": text}
- return non_stream_completion([sys_msg, user_msg], model)
-
-
-def summarize_conversation(messages: List[Dict[str, str]], model: str) -> Dict[str, str]:
- sys_msg = {
- "role": "system",
- "content": "Summarize the conversation into two parts: 1) Key Points (bulleted, <=6 bullets), 2) Action Items (numbered, <=5 items). Return as JSON with keys key_points and action_items.",
- }
- convo_text = []
- for m in messages[-12:]:
- who = "User" if m["role"] == "user" else "Assistant"
- convo_text.append(f"{who}: {m['content']}")
- user_msg = {"role": "user", "content": "\n".join(convo_text)[:6000]}
- try:
- raw = non_stream_completion([sys_msg, user_msg], model)
- j = json.loads(raw)
- return {
- "key_points": j.get("key_points", ""),
- "action_items": j.get("action_items", ""),
- }
- except Exception:
- return {"key_points": "", "action_items": ""}
-
-# ---------- Auto Title Helpers ----------
-
-def _simple_title_from_first_user(messages: List[Dict[str, str]]) -> str:
- """Fallback: use first user message (trimmed and title-cased)."""
- text = ""
- for m in messages:
- if m.get("role") == "user" and m.get("content", "").strip():
- text = m["content"].strip()
- break
- if not text:
- return ""
- text = text.replace("\n", " ").strip()
- if len(text) > 60:
- text = text[:57].rstrip() + "โฆ"
- # crude title case
- return " ".join(w.capitalize() for w in text.split())
-
-def _model_title_from_history(messages: List[Dict[str, str]], model: str) -> str:
- """Ask the model for a short 3โ7 word title. Returns empty on failure."""
- if not messages:
- return ""
- sys = {
- "role": "system",
- "content": ("Create a concise 3-7 word Title Case title for this conversation. "
- "Do not add quotes or punctuation at the end."),
- }
- # include last ~8 turns
- convo = []
- for m in messages[-16:]:
- role = m.get("role", "user")
- content = m.get("content", "")
- if not content:
- continue
- convo.append({"role": role, "content": content[:2000]})
- try:
- title = non_stream_completion([sys] + convo, model).strip()
- title = title.replace("\n", " ").strip()
- if len(title) > 60:
- title = title[:57].rstrip() + "โฆ"
- return title
- except Exception:
- return ""
-
-def maybe_auto_title(chat_doc: Dict[str, Any]) -> None:
- """If title is default, derive one from history and save."""
- current = (chat_doc.get("title") or "").strip()
- if current in DEFAULT_TITLES:
- # Prefer model-based title when we have at least one user+assistant turn
- roles = [m.get("role") for m in chat_doc.get("messages", [])]
- has_user = "user" in roles
- has_assistant = "assistant" in roles
- title = ""
- if has_user and has_assistant:
- title = _model_title_from_history(chat_doc["messages"], chat_doc.get("model", DEFAULT_MODEL))
- if not title:
- title = _simple_title_from_first_user(chat_doc.get("messages", []))
- if title and title not in DEFAULT_TITLES:
- chat_doc["title"] = title
- save_chat(chat_doc)
-
-# =============================
-# ---- Export Helpers ---------
-# =============================
-
-def export_txt(doc: Dict[str, Any]) -> str:
- lines = [
- f"Title: {doc.get('title','')}",
- f"Persona: {doc.get('persona','')}",
- "",
- "Conversation:",
- ]
- for m in doc.get("messages", []):
- who = "User" if m["role"] == "user" else "Assistant"
- lines.append(f"[{who}] {m.get('content','')}")
- return "\n".join(lines)
-
-
-def export_json(doc: Dict[str, Any]) -> str:
- return json.dumps(doc, indent=2, ensure_ascii=False)
-
-
-def export_csv(doc: Dict[str, Any]) -> str:
- from io import StringIO
- buf = StringIO()
- writer = csv.writer(buf)
- writer.writerow(["role", "content", "timestamp"])
- for m in doc.get("messages", []):
- writer.writerow([m.get("role",""), m.get("content",""), doc.get("updated","")])
- return buf.getvalue()
-
-# =============================
-# ---- Theming (Dark/Light) ---
-# =============================
-DARK_CSS = """
-
-"""
-
-LIGHT_CSS = """
-
-"""
-
-# =============================
-# ---- Streamlit UI ----------
-# =============================
-
-def init_session():
- ensure_store()
- if "chat_id" not in st.session_state:
- idx = read_index()
- st.session_state.chat_id = idx["chats"][0]["id"] if idx["chats"] else create_chat("New Chat")
- if "start_ts" not in st.session_state:
- st.session_state.start_ts = time.time()
- # Default theme: Light (changed from "Dark")
- if "theme" not in st.session_state:
- st.session_state.theme = "Light"
- if "rename_mode" not in st.session_state:
- st.session_state.rename_mode = False
- if "rename_val" not in st.session_state:
- st.session_state.rename_val = ""
-
-
-def sidebar(chat_doc: Dict[str, Any]):
- st.sidebar.title("Conversations")
- if st.sidebar.button("โ New Chat", use_container_width=True):
- st.session_state.chat_id = create_chat("New Chat")
- st.rerun()
-
- st.sidebar.subheader("Chat History")
-
- idx = read_index()
- ids = [row["id"] for row in idx["chats"]]
- labels = {row["id"]: (row["title"] or row["id"]) for row in idx["chats"]}
- if st.session_state.chat_id not in ids and ids:
- st.session_state.chat_id = ids[0]
-
- selected_id = st.sidebar.radio(
- "Your chats",
- options=ids,
- index=ids.index(st.session_state.chat_id) if ids else 0,
- format_func=lambda cid: labels.get(cid, cid),
- label_visibility="collapsed",
- )
- if selected_id and selected_id != st.session_state.chat_id:
- st.session_state.chat_id = selected_id
- st.rerun()
-
- cA, cB = st.sidebar.columns(2, gap="small")
- with cA:
- if st.button("๐ Rename", use_container_width=True):
- st.session_state.rename_mode = True
- st.session_state.rename_val = labels.get(st.session_state.chat_id, "")
- st.rerun()
- with cB:
- if st.button("๐๏ธ Delete", use_container_width=True):
- delete_chat(st.session_state.chat_id)
- new_idx = read_index()
- if new_idx["chats"]:
- st.session_state.chat_id = new_idx["chats"][0]["id"]
- else:
- st.session_state.chat_id = create_chat("New Chat")
- st.rerun()
-
- if st.session_state.rename_mode:
- new_name = st.sidebar.text_input("New title", value=st.session_state.rename_val)
- c1, c2 = st.sidebar.columns(2)
- with c1:
- if st.button("Save"):
- doc = load_chat(st.session_state.chat_id) or {}
- if doc and new_name.strip():
- doc["title"] = new_name.strip()
- save_chat(doc)
- st.session_state.rename_mode = False
- st.rerun()
- with c2:
- if st.button("Cancel"):
- st.session_state.rename_mode = False
- st.rerun()
-
- st.sidebar.divider()
-
- # Persona
- persona_names = list(PERSONAS.keys())
- try:
- sel_index = persona_names.index(chat_doc.get("persona", "General"))
- except ValueError:
- sel_index = 0
- chat_doc["persona"] = st.sidebar.selectbox("Persona", persona_names, index=sel_index)
-
- # Model selector with safe suggestions + optional custom override
- st.sidebar.subheader("Model")
- suggested_label = None
- current_model = chat_doc.get("model", DEFAULT_MODEL)
- for label, mid in SUGGESTED_MODELS.items():
- if mid == current_model:
- suggested_label = label
- break
- label_list = list(SUGGESTED_MODELS.keys())
- if suggested_label is None:
- suggested_label = label_list[0]
- chosen_label = st.sidebar.selectbox("Suggested", label_list, index=label_list.index(suggested_label))
- suggested_model = SUGGESTED_MODELS[chosen_label]
- custom_model = st.sidebar.text_input(
- "Custom model ID (optional)",
- value=current_model if current_model not in SUGGESTED_MODELS.values() else "",
- placeholder="e.g., openai/gpt-4o-mini:free",
- help="Leave blank to use the suggested model above.",
- )
- final_model = custom_model.strip() or suggested_model
- if final_model != current_model:
- chat_doc["model"] = final_model
- save_chat(chat_doc)
-
- # Translation mode
- st.sidebar.subheader("Translation Mode")
- use_translate = st.sidebar.toggle("Enable translation", value=False)
- target_lang = st.sidebar.selectbox("Target language", TRANSLATE_TO, index=0)
-
- # Theme toggle (with fallback)
- try:
- theme = st.sidebar.segmented_control("Theme", ["Dark", "Light"], selection_mode="single", default=st.session_state.theme)
- except Exception:
- theme = st.sidebar.radio("Theme", ["Dark", "Light"], index=0 if st.session_state.theme == "Dark" else 1)
- st.session_state.theme = theme
-
- # Session stats
- st.sidebar.subheader("Session Stats")
- elapsed = int(time.time() - st.session_state.start_ts)
- st.sidebar.write(f"Duration: {elapsed//60}m {elapsed%60}s")
-
- # Exports
- st.sidebar.subheader("Export")
- st.sidebar.download_button("Export TXT", export_txt(chat_doc), file_name=f"{chat_doc.get('title','New Chat')}.txt")
- st.sidebar.download_button("Export JSON", export_json(chat_doc), file_name=f"{chat_doc.get('title','New Chat')}.json")
- st.sidebar.download_button("Export CSV", export_csv(chat_doc), file_name=f"{chat_doc.get('title','New Chat')}.csv")
-
- return use_translate, target_lang
-
-
-def main_panel(chat_doc: Dict[str, Any], use_translate: bool, target_lang: str):
- st.markdown(DARK_CSS if st.session_state.theme == "Dark" else LIGHT_CSS, unsafe_allow_html=True)
-
- # Title is now auto-generated; show it read-only
- cols = st.columns([0.8, 0.2])
- with cols[0]:
- st.title(APP_TITLE)
- st.caption(f"Chat: **{chat_doc.get('title', 'New Chat')}**")
- with cols[1]:
- if st.button("๐งน Clear messages", use_container_width=True):
- chat_doc["messages"] = []
- save_chat(chat_doc)
- st.rerun()
-
- for m in chat_doc.get("messages", []):
- with st.chat_message("assistant" if m["role"]=="assistant" else "user"):
- st.markdown(m.get("content", ""))
-
- if prompt := st.chat_input("Type your messageโฆ"):
- chat_doc["messages"].append({"role": "user", "content": prompt})
- save_chat(chat_doc)
-
- sys = {"role": "system", "content": persona_system(chat_doc.get("persona","General"))}
- messages = [sys] + chat_doc["messages"]
-
- with st.chat_message("assistant"):
- placeholder = st.empty()
- out_text = ""
- try:
- for chunk in stream_chat_completion(messages, chat_doc.get("model", DEFAULT_MODEL)):
- out_text += chunk
- placeholder.markdown(out_text)
- except Exception as e:
- out_text = f"โ Error: {e}"
- placeholder.markdown(out_text)
-
- if use_translate and out_text and not out_text.startswith("โ"):
- try:
- detected = detect_language(out_text, chat_doc.get("model", DEFAULT_MODEL))
- except Exception:
- detected = "Unknown"
- if detected and detected.lower() != target_lang.lower():
- try:
- tr = translate_text(out_text, target_lang, chat_doc.get("model", DEFAULT_MODEL))
- out_text = f"**Translated ({detected} โ {target_lang}):**\n\n{tr}"
- placeholder.markdown(out_text)
- except Exception:
- pass
-
- chat_doc["messages"].append({"role": "assistant", "content": out_text})
- save_chat(chat_doc)
-
- # Auto-title after first exchange(s)
- maybe_auto_title(chat_doc)
-
- st.divider()
- c1, c2 = st.columns(2)
- if c1.button("๐ง Summarize conversation"):
- with st.status("Summarizingโฆ"):
- res = summarize_conversation(chat_doc["messages"], DEFAULT_SUMMARY_MODEL)
- chat_doc["summaries"] = res
- save_chat(chat_doc)
- with c2.expander("Summaries", expanded=False):
- st.markdown("**Key Points**\n\n" + (chat_doc.get("summaries",{}).get("key_points","") or "_No summary yet._"))
- st.markdown("**Action Items**\n\n" + (chat_doc.get("summaries",{}).get("action_items","") or "_No action items yet._"))
-
-# =============================
-# ---- Lightweight Self Tests -
-# =============================
-
-def _run_self_tests():
- assert persona_system("Unknown") == PERSONAS["General"]
- assert persona_system("Analyst").startswith("You are a precise analyst")
-
- sample = {
- "title": "T",
- "persona": "General",
- "messages": [
- {"role": "user", "content": "Hi"},
- {"role": "assistant", "content": "Hello"},
- ],
- "updated": "2020-01-01T00:00:00Z",
- }
- txt = export_txt(sample)
- assert "Title: T" in txt
- assert "[User] Hi" in txt and "[Assistant] Hello" in txt
- assert "\n" in txt
-
- joined = "\n".join(["A", "B", "C"])
- assert joined == "A\nB\nC"
-
- csv_out = export_csv(sample)
- assert csv_out.splitlines()[0].startswith("role,content,timestamp")
-
-# =============================
-# ---- Entry Point ------------
-# =============================
-
-def main():
- st.set_page_config(page_title=APP_TITLE, page_icon="๐ฌ", layout="wide")
- _require_api_key_or_stop() # fail fast on missing key
-
- init_session()
- doc = load_chat(st.session_state.chat_id)
- if not doc:
- st.session_state.chat_id = create_chat("New Chat")
- doc = load_chat(st.session_state.chat_id)
- use_translate, target_lang = sidebar(doc)
- main_panel(doc, use_translate, target_lang)
-
-
-if os.getenv("RUN_SELF_TESTS") == "1":
- _run_self_tests()
-
-if __name__ == "__main__":
- main()
diff --git a/sangu_santosh/chatbot1/requirements.txt b/sangu_santosh/chatbot1/requirements.txt
deleted file mode 100644
index dd0cf87..0000000
--- a/sangu_santosh/chatbot1/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-streamlit>=1.36
-openai>=1.46
-langdetect>=1.0.9
-pandas>=2.0
diff --git a/sangu_santosh/chatbot1/secrets.toml b/sangu_santosh/chatbot1/secrets.toml
deleted file mode 100644
index 36cc746..0000000
--- a/sangu_santosh/chatbot1/secrets.toml
+++ /dev/null
@@ -1,5 +0,0 @@
-# Put your OpenRouter key here
-OPENROUTER_API_KEY = "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"
-
-# Optional: helps OpenRouter attribute traffic
-APP_URL = "http://localhost:8501"