@@ -30,50 +30,7 @@ class RagConfig:
3030 rag_dir : str = ".rag_store"
3131
3232
33- _PLACEHOLDER_PATTERNS = [
34- r"^sk-your-.*-key-here$" ,
35- r"^sk-your-openai-api-key-here$" ,
36- r"^your-.*-api-key-here$" ,
37- r"^your_api_key$" ,
38- r"^api_key_here$" ,
39- r"^<.*>$" ,
40- r"^\[.*\]$" ,
41- r"^\{.*\}$" ,
42- ]
43-
44- # Mapping of known OpenAI-compatible providers to their defaults
45- # This allows using project-standard keys (e.g. DEEPSEEK_API_KEY) with RAG automatically.
46- _COMPATIBLE_PROVIDERS : Dict [str , Dict [str , str ]] = {
47- "deepseek" : {
48- "env_key" : "DEEPSEEK_API_KEY" ,
49- "base_url" : "https://api.deepseek.com/v1" ,
50- "default_model" : "" , # Let server decide or user override
51- },
52- "openrouter" : {
53- "env_key" : "OPENROUTER_API_KEY" ,
54- "base_url" : "https://openrouter.ai/api/v1" ,
55- "default_model" : "" ,
56- },
57- # Note: Gemini and Anthropic are not strictly OpenAI-compatible for embeddings (paths differ),
58- # so we do not auto-map them to AnyRoute to avoid runtime errors unless explicitly configured.
59- }
60-
61-
62- def _is_placeholder (value : Optional [str ]) -> bool :
63- if not value or not isinstance (value , str ):
64- return True
65- v = value .strip ().lower ()
66- if not v :
67- return True
68- for p in _PLACEHOLDER_PATTERNS :
69- if re .match (p , v ):
70- return True
71- # Common keywords that indicate examples
72- for k in ("placeholder" , "example" , "sample" , "demo" , "insert" , "replace" , "change-me" ):
73- if k in v :
74- return True
75- return False
76-
33+ from spoon_ai .llm .config import ConfigurationManager
7734
7835def get_default_config () -> RagConfig :
7936 backend = os .getenv ("RAG_BACKEND" , "faiss" ).lower ()
@@ -83,50 +40,66 @@ def get_default_config() -> RagConfig:
8340 chunk_size = int (os .getenv ("CHUNK_SIZE" , "800" ))
8441 chunk_overlap = int (os .getenv ("CHUNK_OVERLAP" , "120" ))
8542
86- # Embeddings provider selection
87- embeddings_provider = None
88-
89- # 1. AnyRoute (Explicit RAG config) - Highest Priority
90- anyroute_api_key = os .getenv ("ANYROUTE_API_KEY" )
91- anyroute_base = os .getenv ("ANYROUTE_BASE_URL" )
92- anyroute_model = os .getenv ("ANYROUTE_MODEL" )
43+ # Use LLM ConfigurationManager for standardized provider detection
44+ config_manager = ConfigurationManager ()
9345
94- # 2. OpenAI (Native support)
95- openai_key = os . getenv ( "OPENAI_API_KEY" )
96-
97- # Logic to determine provider
98- if ( anyroute_api_key and anyroute_base ) and not ( _is_placeholder ( anyroute_api_key ) or _is_placeholder ( anyroute_base ) ):
46+ # 1. Determine active provider
47+ # Try ANYROUTE_API_KEY explicitly first (legacy RAG priority )
48+ anyroute_key = os . getenv ( "ANYROUTE_API_KEY" )
49+ # Use static method from ConfigurationManager
50+ if anyroute_key and not ConfigurationManager . _is_placeholder_value ( anyroute_key ):
9951 embeddings_provider = "anyroute"
100- elif openai_key and not _is_placeholder (openai_key ):
101- embeddings_provider = "openai"
52+ anyroute_base = os .getenv ("ANYROUTE_BASE_URL" , "https://api.openai.com/v1" ) # Default generic
53+ anyroute_model = os .getenv ("ANYROUTE_MODEL" )
54+ openai_key = None
10255 else :
103- # 3. Try Auto-mapping compatible providers (DeepSeek, OpenRouter, etc.)
104- for name , defaults in _COMPATIBLE_PROVIDERS .items ():
105- key_val = os .getenv (defaults ["env_key" ])
106- if key_val and not _is_placeholder (key_val ):
107- embeddings_provider = "anyroute"
108- anyroute_api_key = key_val
109- # Use provider default base URL if explicit ANYROUTE_BASE_URL is missing
110- anyroute_base = anyroute_base or defaults ["base_url" ]
111- # Use provider default model if explicit ANYROUTE_MODEL is missing
112- if not anyroute_model and defaults ["default_model" ]:
113- anyroute_model = defaults ["default_model" ]
114- break
56+ # Fallback to LLM module's intelligent selection
57+ # This picks defaults based on available API keys (OpenAI > Anthropic > OpenRouter...)
58+ # Note: Anthropic/Gemini are not directly supported for embeddings here unless mapped
59+ provider = config_manager .get_default_provider ()
11560
116- # 4. Fallback
117- if not embeddings_provider :
118- embeddings_provider = "hash" # deterministic offline fallback
61+ # Load full config for the selected provider
62+ try :
63+ llm_config = config_manager .load_provider_config (provider )
64+ except Exception :
65+ llm_config = None
66+
67+ embeddings_provider = "hash" # Default fallback
68+ anyroute_key = None
69+ anyroute_base = None
70+ anyroute_model = None
71+ openai_key = None
11972
73+ if llm_config :
74+ if provider == "openai" :
75+ embeddings_provider = "openai"
76+ openai_key = llm_config .api_key
77+ elif provider in ("deepseek" , "openrouter" , "anyroute" ):
78+ # Map compatible OpenAI-like providers to AnyRoute client
79+ embeddings_provider = "anyroute"
80+ anyroute_key = llm_config .api_key
81+ anyroute_base = llm_config .base_url
82+
83+ # Check for explicit override or intelligent default
84+ env_model = os .getenv ("ANYROUTE_MODEL" )
85+ if env_model :
86+ anyroute_model = env_model
87+ elif provider == "openrouter" and "embedding" not in llm_config .model .lower ():
88+ # OpenRouter: Default to openai/text-embedding-3-small if main model is not an embedding model
89+ anyroute_model = "openai/text-embedding-3-small"
90+ else :
91+ anyroute_model = llm_config .model
92+
12093 return RagConfig (
12194 backend = backend ,
12295 collection = collection ,
12396 top_k = top_k ,
12497 chunk_size = chunk_size ,
12598 chunk_overlap = chunk_overlap ,
12699 embeddings_provider = embeddings_provider ,
127- anyroute_api_key = None if _is_placeholder ( anyroute_api_key ) else anyroute_api_key ,
128- anyroute_base_url = None if _is_placeholder ( anyroute_base ) else anyroute_base ,
100+ anyroute_api_key = anyroute_key ,
101+ anyroute_base_url = anyroute_base ,
129102 anyroute_model = anyroute_model ,
130- openai_api_key = None if _is_placeholder ( openai_key ) else openai_key ,
103+ openai_api_key = openai_key ,
131104 rag_dir = rag_dir ,
132105 )
0 commit comments