-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.py
More file actions
103 lines (79 loc) · 3.56 KB
/
config.py
File metadata and controls
103 lines (79 loc) · 3.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Configuration settings for the RAG pipeline.
"""
from pathlib import Path
from typing import Literal
# ============================================================================
# PATHS
# ============================================================================
REPO_ROOT: Path = Path(__file__).parent.parent.resolve()
INDEX_DIR: Path = Path(__file__).parent / "index"
# ============================================================================
# CHUNKING CONFIGURATION
# ============================================================================
MAX_CHUNK_CHARS: int = 8000 # ~2000 tokens, safe for most embedding models
OVERLAP_CHARS: int = 200 # Overlap between chunks for continuity
# ============================================================================
# EMBEDDING MODEL OPTIONS
# ============================================================================
# Faster -> Slower, Less accurate -> More accurate
EMBEDDING_MODELS: dict[str, str] = {
"fast": "all-MiniLM-L6-v2", # ~23M params, fastest, good quality
"balanced": "all-MiniLM-L12-v2", # ~33M params, balanced
"accurate": "all-mpnet-base-v2", # ~110M params, best quality, slower
}
DEFAULT_MODEL: str = "fast" # Change to "balanced" or "accurate" if needed
# ============================================================================
# SEARCH CONFIGURATION
# ============================================================================
# Relevance score thresholds
LOW_RELEVANCE_THRESHOLD: float = 0.3
# Boosting factors for hybrid search
PACKAGE_INFO_BOOST: float = 0.8
FOLDER_MATCH_BOOST: float = 0.4
PARTIAL_FOLDER_BOOST: float = 0.2
EXACT_FILE_BOOST: float = 0.5
CLASS_IN_PATH_BOOST: float = 1.5
TERM_MATCH_BOOST: float = 0.1
MAIN_SOURCE_BOOST: float = 1.3
TEST_SOURCE_BOOST: float = 1.1
CHANGELOG_PENALTY: float = 0.5
# ============================================================================
# LLM CONFIGURATION
# ============================================================================
DEFAULT_LLM_PROVIDER: Literal["ollama", "openai"] = "ollama"
DEFAULT_OLLAMA_MODEL: str = "tinyllama"
DEFAULT_OLLAMA_URL: str = "http://localhost:11434/v1"
# ============================================================================
# DEVICE DETECTION
# ============================================================================
DeviceType = Literal["cuda", "mps", "cpu"]
def detect_device() -> DeviceType:
"""
Detect the best available device for embedding generation.
Returns:
'cuda' for NVIDIA GPU, 'mps' for Apple Silicon, or 'cpu'
"""
try:
import torch
if torch.cuda.is_available():
return "cuda"
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
return "mps"
except ImportError:
pass
return "cpu"