commons-rag-code-analyzer/rag/config.py at main · TrixiRa/commons-rag-code-analyzer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Configuration settings for the RAG pipeline.
"""

from pathlib import Path
from typing import Literal


# ============================================================================
# PATHS
# ============================================================================

REPO_ROOT: Path = Path(__file__).parent.parent.resolve()
INDEX_DIR: Path = Path(__file__).parent / "index"


# ============================================================================
# CHUNKING CONFIGURATION
# ============================================================================

MAX_CHUNK_CHARS: int = 8000  # ~2000 tokens, safe for most embedding models
OVERLAP_CHARS: int = 200     # Overlap between chunks for continuity


# ============================================================================
# EMBEDDING MODEL OPTIONS
# ============================================================================

# Faster -> Slower, Less accurate -> More accurate
EMBEDDING_MODELS: dict[str, str] = {
    "fast": "all-MiniLM-L6-v2",      # ~23M params, fastest, good quality
    "balanced": "all-MiniLM-L12-v2", # ~33M params, balanced
    "accurate": "all-mpnet-base-v2", # ~110M params, best quality, slower
}
DEFAULT_MODEL: str = "fast"  # Change to "balanced" or "accurate" if needed


# ============================================================================
# SEARCH CONFIGURATION
# ============================================================================

# Relevance score thresholds
LOW_RELEVANCE_THRESHOLD: float = 0.3

# Boosting factors for hybrid search
PACKAGE_INFO_BOOST: float = 0.8
FOLDER_MATCH_BOOST: float = 0.4
PARTIAL_FOLDER_BOOST: float = 0.2
EXACT_FILE_BOOST: float = 0.5
CLASS_IN_PATH_BOOST: float = 1.5
TERM_MATCH_BOOST: float = 0.1
MAIN_SOURCE_BOOST: float = 1.3
TEST_SOURCE_BOOST: float = 1.1
CHANGELOG_PENALTY: float = 0.5


# ============================================================================
# LLM CONFIGURATION
# ============================================================================

DEFAULT_LLM_PROVIDER: Literal["ollama", "openai"] = "ollama"
DEFAULT_OLLAMA_MODEL: str = "tinyllama"
DEFAULT_OLLAMA_URL: str = "http://localhost:11434/v1"


# ============================================================================
# DEVICE DETECTION
# ============================================================================

DeviceType = Literal["cuda", "mps", "cpu"]


def detect_device() -> DeviceType:
    """
    Detect the best available device for embedding generation.

    Returns:
        'cuda' for NVIDIA GPU, 'mps' for Apple Silicon, or 'cpu'
    """
    try:
        import torch
        if torch.cuda.is_available():
            return "cuda"
        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
            return "mps"
    except ImportError:
        pass
    return "cpu"