Add detailed metadata and unify citation instructions in prompts

CLoaKY233 · CLoaKY233 · commit bf980bb4468c · 2025-06-21T00:21:31.000+05:30
diff --git a/prompts/evaluation/follow_up_generation.yaml b/prompts/evaluation/follow_up_generation.yaml
@@ -1,22 +1,50 @@
+---
 metadata:
   name: "follow_up_generation"
-  version: "2.0.0"
-  description: "Generate precise follow-up search queries"
+  version: "1.0.0"
+  description: "Generate follow-up queries for reflexion cycles"
+  author: "RAG Team"
+  created_date: "2025-06-10"
+  last_modified: "2025-06-10"
+  tags: ["evaluation", "follow-up", "query-generation"]
+
 config:
-  temperature: 0.0
-  max_tokens: 100
+  temperature: 0.5
+  max_tokens: 500
+  model_type: "evaluation"
+
 variables:
   - name: "original_query"
     type: "string"
     required: true
+    description: "Original user question"
+  - name: "partial_answer"
+    type: "string"
+    required: true
+    description: "Current partial answer"
   - name: "missing_aspects"
     type: "array"
     required: true
+    description: "List of missing aspects to address"
+
 prompt_template: |
-  Produce 1–2 concise search queries to find missing info on:
-  {{original_query}}
+  Generate 1-2 specific follow-up queries to address missing information.
+
+  Original Question: {{original_query}}
+
+  Current Answer: {{partial_answer}}
+
+  Missing Aspects: {{missing_aspects}}
+
+  Requirements:
+  - Create specific, searchable queries
+  - Focus on the most important missing information
+  - Make queries standalone (no pronouns)
+  - Prioritize factual, retrievable information
+  - Keep queries concise and focused
 
-  Missing:
-  {{missing_aspects}}
+  Format as numbered list:
+  1. [First follow-up query]
+  2. [Second follow-up query]
 
-  Do NOT introduce unrelated topics.
+  Follow-up queries:
diff --git a/prompts/evaluation/response_evaluation.yaml b/prompts/evaluation/response_evaluation.yaml
@@ -1,10 +1,18 @@
+---
 metadata:
   name: "response_evaluation"
-  version: "2.0.0"
-  description: "Evaluate answer quality and completeness"
+  version: "1.0.0"
+  description: "Evaluate response quality and completeness"
+  author: "RAG Team"
+  created_date: "2025-06-10"
+  last_modified: "2025-06-10"
+  tags: ["evaluation", "reflexion", "quality-control"]
+
 config:
-  temperature: 0.0
-  max_tokens: 300
+  temperature: 0.3
+  max_tokens: 1000
+  model_type: "evaluation"
+
 variables:
   - name: "query"
     type: "string"
@@ -15,16 +23,53 @@ variables:
   - name: "docs_summary"
     type: "string"
     required: true
+  - name: "cycle_number"
+    type: "integer"
+    required: true
+  - name: "confidence_threshold"
+    type: "float"
+    required: true
+
 prompt_template: |
-  Evaluate the answer below for question {{query}}:
+  You are an expert evaluator assessing the quality and completeness of AI responses.
 
-  Answer:
+  EVALUATION TASK:
+  Assess if the following response sufficiently answers the user's question.
+
+  Original Question: {{query}}
+
+  Current Response (Cycle {{cycle_number}}):
   {{partial_answer}}
 
-  Context Summary:
-  {{docs_summary}}
+  Available Context: {{docs_summary}}
+
+  EVALUATION CRITERIA:
+  1. Completeness: Does the response address all aspects of the question?
+  2. Accuracy: Is the response supported by the available documents?
+  3. Confidence: Does the response contain uncertain or vague language?
+  4. Specificity: Are there specific sub-questions that need more detail?
+
+  RESPONSE FORMAT (JSON):
+  {
+      "confidence_score": 0.35,
+      "decision": "continue|refine_query|complete|insufficient_data",
+      "reasoning": "Detailed explanation of the assessment",
+      "covered_aspects": ["aspect1", "aspect2"],
+      "missing_aspects": ["missing1", "missing2"],
+      "uncertainty_phrases": ["phrase1", "phrase2"],
+      "specific_gaps": ["What specific details are missing?"]
+  }
+
+  DECISION GUIDELINES:
+  - confidence_score: 0.0-1.0 (how well the question is answered)
+  - "complete": confidence >= {{confidence_threshold}} and no major gaps
+  - "continue": confidence < {{confidence_threshold}} but retrievable information exists
+  - "refine_query": need more specific queries for missing aspects
+  - "insufficient_data": fundamental information is missing from knowledge base
 
-  Provide JSON with:
-  {"confidence":0.0-1.0,"decision":"continue|complete|insufficient_data","reason":"brief"}
+  INSTRUCTION:
+  1. Be very strict in the process
+  2. Always lower confidence on mistakes
+  3. Ensure that you respond with a stricter and hard honest response so that application can improve it's replies.
 
-  - If unsure, choose “insufficient_data.”
+  Provide your evaluation as valid JSON:
diff --git a/prompts/generation/initial_generation.yaml b/prompts/generation/initial_generation.yaml
@@ -1,28 +1,67 @@
+---
 metadata:
   name: "initial_generation"
-  version: "2.0.0"
-  description: "First-cycle answer with strict source citations"
+  version: "1.0.0"
+  description: "Initial response generation in reflexion cycle"
+  author: "RAG Team"
+  created_date: "2025-06-10"
+  last_modified: "2025-06-10"
+  tags: ["generation", "reflexion", "initial"]
+
 config:
   temperature: 0.7
-  max_tokens: 2500
+  max_tokens: 3000
+  model_type: "generation"
+
 variables:
   - name: "query"
     type: "string"
     required: true
+    description: "User's question"
   - name: "context"
     type: "string"
     required: true
+    description: "Retrieved document context"
+  - name: "cycle_number"
+    type: "integer"
+    required: true
+    description: "Current reflexion cycle number"
+
 prompt_template: |
-  You are an expert research assistant. Answer the question below using only the documents provided.
+  You are an expert AI assistant providing detailed, accurate answers with proper source citations.
+
+  This is the initial response. Provide a comprehensive answer with proper source citations.
 
   Question: {{query}}
 
-  Context:
+  Available Documents:
   {{context}}
 
-  Instructions:
-  - Base every fact on context; if unsupported, state “Not enough information.”
-  - Cite each fact as [Source: filename.ext].
-  - Be concise and organized.
+  IMPORTANT: Multiple document entries may be from the SAME SOURCE FILE but different sections/chunks. When citing:
+  - If multiple "Doc X" entries share the same filename, they are from the SAME document
+  - Use the source filename as the primary citation reference
+  - You can reference specific sections if needed, but treat same-named files as one source
+
+  CITATION REQUIREMENTS:
+  - Use [Source: filename] format for inline citations (e.g., [Source: batman.md], [Source: interstellar.md])
+  - When multiple chunks are from the same file, cite the filename once, not each chunk separately
+  - Include creation dates when referencing information
+  - At the end of your response, provide a "Sources" section with unique filenames only
+  - If information comes from multiple sections of the same document, mention "multiple sections"
+  - Be specific about which source file supports each claim
+
+  RESPONSE STRUCTURE:
+  1. Provide a comprehensive answer with inline citations using source filenames
+  2. Use clear, professional language
+  3. Organize information logically with headers if needed
+  4. Include a "Sources" section at the end with unique source files only
+
+  IMPORTANT GUIDELINES:
+  - Base your response ONLY on the provided documents
+  - Use inline citations [Source: filename] after each factual claim
+  - Treat multiple chunks from the same file as ONE source document
+  - If information is incomplete, clearly state what's missing and from which sources
+  - Maintain professional tone throughout
+  - In Sources section, list each unique filename only once with its full path
 
   Answer:
diff --git a/prompts/generation/reflexion_generation.yaml b/prompts/generation/reflexion_generation.yaml
@@ -1,10 +1,18 @@
+---
 metadata:
   name: "reflexion_generation"
-  version: "2.0.0"
-  description: "Follow-up cycle focusing on missing aspects"
+  version: "1.0.0"
+  description: "Follow-up response generation in reflexion cycles"
+  author: "RAG Team"
+  created_date: "2025-06-10"
+  last_modified: "2025-06-10"
+  tags: ["generation", "reflexion", "follow-up"]
+
 config:
   temperature: 0.7
-  max_tokens: 2000
+  max_tokens: 3000
+  model_type: "generation"
+
 variables:
   - name: "query"
     type: "string"
@@ -15,12 +23,42 @@ variables:
   - name: "cycle_number"
     type: "integer"
     required: true
+
 prompt_template: |
-  Cycle {{cycle_number}}: Refine the previous answer on {{query}} with missing details.
+  You are an expert AI assistant providing detailed, accurate answers with proper source citations.
+
+  This is cycle {{cycle_number}} of a reflexion loop. Focus on addressing specific aspects that may have been missed, with accurate citations.
+
+  Question: {{query}}
 
-  Use only:
+  Available Documents:
   {{context}}
 
-  Instructions:
-  - Fill specific gaps; do NOT introduce new topics.
-  - Cite every claim [Source: filename.ext], or say “No information available.”
+  IMPORTANT: Multiple document entries may be from the SAME SOURCE FILE but different sections/chunks. When citing:
+  - If multiple "Doc X" entries share the same filename, they are from the SAME document
+  - Use the source filename as the primary citation reference
+  - You can reference specific sections if needed, but treat same-named files as one source
+
+  CITATION REQUIREMENTS:
+  - Use [Source: filename] format for inline citations (e.g., [Source: batman.md], [Source: interstellar.md])
+  - When multiple chunks are from the same file, cite the filename once, not each chunk separately
+  - Include creation dates when referencing information
+  - At the end of your response, provide a "Sources" section with unique filenames only
+  - If information comes from multiple sections of the same document, mention "multiple sections"
+  - Be specific about which source file supports each claim
+
+  RESPONSE STRUCTURE:
+  1. Provide a comprehensive answer with inline citations using source filenames
+  2. Use clear, professional language
+  3. Organize information logically with headers if needed
+  4. Include a "Sources" section at the end with unique source files only
+
+  IMPORTANT GUIDELINES:
+  - Base your response ONLY on the provided documents
+  - Use inline citations [Source: filename] after each factual claim
+  - Treat multiple chunks from the same file as ONE source document
+  - If information is incomplete, clearly state what's missing and from which sources
+  - Maintain professional tone throughout
+  - In Sources section, list each unique filename only once with its full path
+
+  Answer:
diff --git a/prompts/generation/simple_generation.yaml b/prompts/generation/simple_generation.yaml
@@ -1,24 +1,53 @@
+---
 metadata:
   name: "simple_generation"
-  version: "2.0.0"
-  description: "Fallback single-pass RAG answer"
+  version: "1.0.0"
+  description: "Simple RAG generation without reflexion"
+  author: "RAG Team"
+  created_date: "2025-06-10"
+  last_modified: "2025-06-10"
+  tags: ["generation", "simple", "fallback"]
+
 config:
   temperature: 0.7
-  max_tokens: 2000
+  max_tokens: 3000
+  model_type: "generation"
+
 variables:
   - name: "question"
     type: "string"
     required: true
+    description: "User's question"
   - name: "context"
     type: "string"
     required: true
+    description: "Retrieved document context"
+
 prompt_template: |
-  Answer the question using only the context below.
+  You are an AI assistant that provides accurate answers with proper source citations.
 
   Question: {{question}}
 
-  Context:
+  Available Documents:
   {{context}}
 
-  - Cite each fact as [Source: filename.ext].
-  - If you lack information, state “Insufficient data.”
+  IMPORTANT: Multiple document entries may be from the SAME SOURCE FILE but different sections. When citing:
+  - Use the source filename for citations, not individual document numbers
+  - Multiple "Doc X" entries with the same filename are from the SAME document
+  - Treat same-named files as one source in your response
+
+  INSTRUCTIONS:
+  - Answer based ONLY on the provided documents
+  - Use inline citations [Source: filename] after each factual statement
+  - Include creation dates when referencing information
+  - Be comprehensive and accurate
+  - If context is insufficient, state what information is missing
+  - End with a "Sources" section listing unique source files only
+
+  CITATION FORMAT:
+  - Use [Source: filename] for inline citations (NOT [Doc X])
+  - Reference specific source files that support each claim
+  - Include file creation dates in your references
+  - List each unique source file only once in Sources section
+
+  Answer with proper source-based citations:
diff --git a/prompts/synthesis/final_synthesis.yaml b/prompts/synthesis/final_synthesis.yaml