self-correcting-rag/main.py at main · mchavva413/self-correcting-rag · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from utils.vector_store import create_vector_store, retrieve_documents
from agents.relevance_agent import RelevanceAgent
from agents.generator_agent import GeneratorAgent
from agents.factcheck_agent import FactCheckAgent

class SelfCorrectingRAG:
    def __init__(self, documents_path):
        print("\n" + "="*70)
        print("🚀 INITIALIZING SELF-CORRECTING RAG PIPELINE (OLLAMA - FREE)")
        print("="*70 + "\n")

        # Create vector store
        self.vector_store = create_vector_store(documents_path)

        # Initialize agents
        print("\n🤖 Initializing AI agents with Ollama...")
        self.relevance_agent = RelevanceAgent()
        self.generator_agent = GeneratorAgent()
        self.factcheck_agent = FactCheckAgent()

        print("✓ All agents initialized")
        print("\n" + "="*70)
        print("✅ PIPELINE READY - 100% FREE, NO API KEYS NEEDED!")
        print("="*70 + "\n")

    def query(self, question, verbose=True):
        """Process a query through the self-correcting pipeline"""

        if verbose:
            print("\n" + "="*70)
            print(f"📝 QUERY: {question}")
            print("="*70 + "\n")

        # Stage 1: Retrieve documents
        if verbose:
            print("STAGE 1️⃣: DOCUMENT RETRIEVAL")
            print("-" * 70)
        retrieved_docs = retrieve_documents(self.vector_store, question)
        if verbose:
            print(f"✓ Retrieved {len(retrieved_docs)} documents")
            for i, doc in enumerate(retrieved_docs):
                print(f"  [{i+1}] {doc[:80]}...")
            print()

        # Stage 2: Filter for relevance
        if verbose:
            print("STAGE 2️⃣: RELEVANCE FILTERING")
            print("-" * 70)
        relevant_docs, relevance_analysis = self.relevance_agent.filter_relevant_docs(
            question, retrieved_docs
        )
        if verbose:
            print(f"\n📊 Relevance Analysis:")
            print(relevance_analysis)
            print()

        # Stage 3: Generate answer
        if verbose:
            print("STAGE 3️⃣: ANSWER GENERATION")
            print("-" * 70)
        answer = self.generator_agent.generate_answer(question, relevant_docs)
        if verbose:
            print(f"\n💡 Generated Answer:")
            print(f"   {answer}")
            print()

        # Stage 4: Fact-check
        if verbose:
            print("STAGE 4️⃣: FACT-CHECKING")
            print("-" * 70)
        verification = self.factcheck_agent.verify_answer(
            question, answer, relevant_docs
        )
        if verbose:
            print(f"\n🔍 Verification Analysis:")
            print(verification['analysis'])
            print()

        # Return results
        result = {
            "question": question,
            "retrieved_docs": retrieved_docs,
            "relevant_docs": relevant_docs,
            "answer": answer,
            "verification": verification,
            "final_status": "APPROVED ✅" if verification["passed"] else "REJECTED ❌"
        }

        if verbose:
            print("="*70)
            print(f"🏁 FINAL STATUS: {result['final_status']}")
            print("="*70 + "\n")

        return result


def main():
    # Initialize the pipeline
    rag = SelfCorrectingRAG("data/documents.txt")

    # Interactive mode
    print("\n" + "="*70)
    print("💬 INTERACTIVE MODE - Type 'quit' to exit")
    print("="*70)

    while True:
        print("\n" + "-"*70)
        user_query = input("🔎 Enter your question: ").strip()

        if user_query.lower() in ['quit', 'exit', 'q']:
            print("\n👋 Goodbye!")
            break

        if not user_query:
            print("⚠️  Please enter a valid question")
            continue

        result = rag.query(user_query, verbose=True)


if __name__ == "__main__":
    main()