semantic-search/config.yaml at main · shrimpy8/semantic-search · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# Semantic Search Engine Configuration
# =====================================

# Model Configuration
models:
  embedding:
    name: "text-embedding-3-large"
    provider: "openai"

  chat:
    name: "gpt-4o-mini"
    provider: "openai"
    temperature: 0.0

# Document Processing Configuration
document_processing:
  chunk_size: 1000
  chunk_overlap: 200
  add_start_index: true

# Vector Store Configuration
vector_store:
  provider: "chroma"
  collection_name: "semantic_search_docs_streamlit"
  persist_directory: "./chroma/db"

  # ChromaDB connection mode
  # Set use_docker: true to connect to ChromaDB Docker container
  # Run: docker run -d -p 8000:8000 chromadb/chroma
  use_docker: true
  chroma_host: "localhost"
  chroma_port: 8000

  # Retrieval settings
  search_type: "similarity"
  search_k: 3  # Number of chunks to retrieve

# Hybrid Retrieval Configuration
hybrid_retrieval:
  # Enable hybrid search (BM25 + semantic)
  enabled: true

  # Retrieval method: "semantic", "bm25", "hybrid"
  default_method: "hybrid"

  # Weight for semantic search in hybrid mode (0-1)
  # 0 = BM25 only, 1 = semantic only, 0.5 = equal weight
  alpha: 0.5

  # RRF constant for rank fusion (typically 60)
  rrf_k: 60

  # BM25 parameters
  bm25:
    k1: 1.5  # Term frequency saturation
    b: 0.75  # Length normalization

  # Re-ranking configuration
  reranking:
    enabled: true
    # Provider options:
    #   - "auto": tries jina (local) first, then cohere (cloud)
    #   - "jina": force local Jina model (requires sentence-transformers)
    #   - "cohere": force cloud Cohere API (requires COHERE_API_KEY)
    provider: "auto"

    # Cohere model (if using cohere)
    cohere_model: "rerank-english-v3.0"

    # Jina model (if using jina)
    jina_model: "jinaai/jina-reranker-v1-tiny-en"

    # Number of candidates to fetch before reranking
    fetch_k_multiplier: 3

# Retrieval Presets Configuration
# Pre-configured profiles for different use cases
retrieval_presets:
  high_precision:
    display_name: "High Precision"
    description: "Fewer results, higher relevance. Best for specific questions."
    icon: "🎯"
    k: 3
    alpha: 0.7
    rerank: true
    method: "hybrid"

  balanced:
    display_name: "Balanced"
    description: "Good balance of precision and coverage. Recommended default."
    icon: "⚖️"
    k: 5
    alpha: 0.5
    rerank: true
    method: "hybrid"

  high_recall:
    display_name: "High Recall"
    description: "More results, broader coverage. Best for exploration."
    icon: "🔍"
    k: 10
    alpha: 0.3
    rerank: false
    method: "hybrid"

  # Default preset to use
  default_preset: "balanced"

# Conversation History Configuration
conversation:
  # Enable conversation history
  enabled: true

  # Storage directory for conversation JSON files
  storage_dir: "./conversation_history"

  # Maximum queries to retain per session
  max_history: 50

  # Enable follow-up question optimization
  follow_up_optimization: true

  # Number of recent Q&A pairs to use for follow-up context
  context_window: 3

# A/B Testing Configuration
ab_testing:
  # Enable A/B testing
  enabled: true

  # Storage directory for experiment results
  storage_dir: "./ab_testing_results"

  # Default variants to compare
  default_variants:
    - "semantic"
    - "bm25"
    - "hybrid"
    - "hybrid_rerank"

# Retry Configuration
retry:
  max_attempts: 3
  min_wait: 2  # seconds
  max_wait: 10  # seconds
  multiplier: 1

# Logging Configuration
logging:
  level: "INFO"
  format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
  file: "semantic_search.log"

# System Prompts
prompts:
  qa_system: |
    You're a helpful assistant.
    Please answer the following question {question} only using the following information {document}.
    If you can't answer the question, just say you can't answer that question.

  # Follow-up question prompt template
  follow_up_system: |
    You're a helpful assistant engaged in a conversation about a document.
    Use the previous conversation context to understand references and maintain continuity.

    Previous conversation:
    {conversation_context}

    Document context:
    {document}

    Please answer the following question: {question}
    If you can't answer the question, just say you can't answer that question.