-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.py
More file actions
123 lines (98 loc) · 4.14 KB
/
config.py
File metadata and controls
123 lines (98 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
"""Configuration management for KURE API service."""
import os
from typing import List, Optional
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
"""Application settings."""
# API Configuration
app_name: str = "Ragnaforge RAG API"
app_version: str = "1.0.0"
app_description: str = "Korean-optimized RAG system with vector and text search capabilities"
# Server Configuration
host: str = "0.0.0.0"
port: int = 8000
workers: int = 1
# Model Configuration
default_model: str = "nlpai-lab/KURE-v1"
available_models: List[str] = [
"dragonkue/snowflake-arctic-embed-l-v2.0-ko",
"nlpai-lab/KURE-v1",
"nlpai-lab/KoE5"
]
cache_dir: Optional[str] = None
max_sequence_length: int = 512
# Vector Configuration
vector_dimension: int = 1024 # Default for KURE-v1
# API Limits
max_batch_size: int = 150
optimal_batch_size: int = 128
max_text_length: int = 8192
# Security
api_key: Optional[str] = "sk-ragnaforge-v1-test-key-12345" # Default test key for development
cors_origins: List[str] = ["*"]
# Logging
log_level: str = "INFO"
# Text Chunking Defaults (Research-based optimization for business documents)
default_chunk_strategy: str = "token" #semantic , recursive, sentence, token
default_chunk_size: int = 768 # Optimal range: 512-1024 tokens (research-backed)
default_chunk_overlap: int = 100 # ~13% overlap for better context continuity
default_chunk_language: str = "auto"
# Search Defaults
default_search_limit: int = 100 # Default number of search results
default_score_threshold: float = 0.0 # Default minimum similarity score
search_expansion_factor: int = 3 # Multiplier for initial search when using rerank
# OpenAI API Configuration
openai_api_key: Optional[str] = None
# GPU Optimization Settings
gpu_memory_fraction: float = 0.8
cuda_visible_devices: str = "0"
torch_cudnn_benchmark: bool = True
# Qdrant Vector Database Settings
qdrant_api_key: Optional[str] = None
qdrant_host: str = "localhost"
qdrant_port: int = 6333
qdrant_collection_name: str = "ragnaforge_documents"
# Search Backend Configuration
vector_backend: str = "qdrant" # qdrant, milvus, chroma, weaviate
text_backend: str = "meilisearch" # meilisearch, opensearch, elasticsearch, solr
# MeiliSearch Configuration
meilisearch_url: str = "http://localhost:7700/"
meilisearch_api_key: Optional[str] = None
meilisearch_index_name: str = "ragnaforge_documents"
# OpenSearch Configuration (for future use)
opensearch_host: str = "localhost"
opensearch_port: int = 9200
opensearch_username: Optional[str] = None
opensearch_password: Optional[str] = None
opensearch_index_name: str = "ragnaforge_documents"
# Milvus Configuration (for future use)
milvus_host: str = "localhost"
milvus_port: int = 19530
milvus_username: Optional[str] = None
milvus_password: Optional[str] = None
milvus_collection_name: str = "ragnaforge_documents"
# Storage Configuration
storage_base_path: str = "./data/storage"
upload_dir: str = "uploads"
processed_dir: str = "processed"
temp_dir: str = "temp"
max_file_size_mb: int = 50
# File Duplicate Detection Configuration
enable_hash_duplicate_check: bool = False
# Rerank Configuration (Optimized for 100→50 workflow)
rerank_enabled: bool = True
rerank_model: str = "dragonkue/bge-reranker-v2-m3-ko"
rerank_model_type: str = "bge_m3_ko"
rerank_top_k: int = 300 # Number of documents to rerank from initial search (3x for quality)
rerank_final_k: int = 50 # Number of final results to return after reranking
rerank_batch_size: int = 64 # Increased batch size for better throughput
rerank_device: Optional[str] = None # Auto-detect if None
rerank_cache_enabled: bool = True
rerank_cache_size: int = 2000 # Increased cache size for better performance
model_config = {
"env_file": ".env",
"env_file_encoding": "utf-8",
"protected_namespaces": ()
}
# Global settings instance
settings = Settings()