AsyncFuncAI
diff --git a/‎api/api.py
Lines changed: 7 additions & 2 deletions b/‎api/api.py
Lines changed: 7 additions & 2 deletions
diff --git a/‎api/config/generator.json
Lines changed: 5 additions & 1 deletion b/‎api/config/generator.json
Lines changed: 5 additions & 1 deletion
diff --git a/‎api/data_pipeline.py
Lines changed: 129 additions & 49 deletions b/‎api/data_pipeline.py
Lines changed: 129 additions & 49 deletions
diff --git a/‎api/ollama_patch.py
Lines changed: 22 additions & 4 deletions b/‎api/ollama_patch.py
Lines changed: 22 additions & 4 deletions
@@ -1,6 +1,6 @@
 import os
 import logging
-from fastapi import FastAPI, HTTPException, Query, Request
+from fastapi import FastAPI, HTTPException, Query, Request, WebSocket
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, Response
 from typing import List, Optional, Dict, Any, Literal
@@ -351,10 +351,14 @@ def generate_json_export(repo_url: str, pages: List[WikiPage]) -> str:
 
 # Import the simplified chat implementation
 from api.simple_chat import chat_completions_stream
+from api.websocket_wiki import handle_websocket_chat
 
 # Add the chat_completions_stream endpoint to the main app
 app.add_api_route("/chat/completions/stream", chat_completions_stream, methods=["POST"])
 
+# Add the WebSocket endpoint
+app.add_websocket_route("/ws/chat", handle_websocket_chat)
+
 # --- Wiki Cache Helper Functions ---
 
 WIKI_CACHE_DIR = os.path.join(get_adalflow_default_root_path(), "wikicache")
@@ -475,7 +479,8 @@ async def root():
         "version": "1.0.0",
         "endpoints": {
             "Chat": [
-                "POST /chat/completions/stream - Streaming chat completion",
+                "POST /chat/completions/stream - Streaming chat completion (HTTP)",
+                "WebSocket /ws/chat - WebSocket chat completion",
             ],
             "Wiki": [
                 "POST /export/wiki - Export wiki content as Markdown or JSON",
 
@@ -10,7 +10,7 @@
           "top_p": 0.8,
           "top_k": 20
         },
-        "gemini-2.5-flash-preview-04-17": {
+        "gemini-2.5-flash-preview-05-20": {
           "temperature": 0.7,
           "top_p": 0.8,
           "top_k": 20
@@ -56,6 +56,10 @@
           "temperature": 0.7,
           "top_p": 0.8
         },
+        "deepseek/deepseek-r1": {
+          "temperature": 0.7,
+          "top_p": 0.8
+        },
         "openai/gpt-4.1": {
           "temperature": 0.7,
           "top_p": 0.8
 
@@ -117,7 +117,8 @@ def download_repo(repo_url: str, local_path: str, type: str = "github", access_t
 # Alias for backward compatibility
 download_github_repo = download_repo
 
-def read_all_documents(path: str, local_ollama: bool = False, excluded_dirs: List[str] = None, excluded_files: List[str] = None):
+def read_all_documents(path: str, local_ollama: bool = False, excluded_dirs: List[str] = None, excluded_files: List[str] = None,
+                      included_dirs: List[str] = None, included_files: List[str] = None):
     """
     Recursively reads all documents in a directory and its subdirectories.
 
@@ -128,6 +129,10 @@ def read_all_documents(path: str, local_ollama: bool = False, excluded_dirs: Lis
             Overrides the default configuration if provided.
         excluded_files (List[str], optional): List of file patterns to exclude from processing.
             Overrides the default configuration if provided.
+        included_dirs (List[str], optional): List of directories to include exclusively.
+            When provided, only files in these directories will be processed.
+        included_files (List[str], optional): List of file patterns to include exclusively.
+            When provided, only files matching these patterns will be processed.
 
     Returns:
         list: A list of Document objects with metadata.
@@ -138,52 +143,130 @@ def read_all_documents(path: str, local_ollama: bool = False, excluded_dirs: Lis
                        ".jsx", ".tsx", ".html", ".css", ".php", ".swift", ".cs"]
     doc_extensions = [".md", ".txt", ".rst", ".json", ".yaml", ".yml"]
 
-    # Always start with default excluded directories and files
-    final_excluded_dirs = set(DEFAULT_EXCLUDED_DIRS)
-    final_excluded_files = set(DEFAULT_EXCLUDED_FILES)
+    # Determine filtering mode: inclusion or exclusion
+    use_inclusion_mode = (included_dirs is not None and len(included_dirs) > 0) or (included_files is not None and len(included_files) > 0)
 
-    # Add any additional excluded directories from config
-    if "file_filters" in configs and "excluded_dirs" in configs["file_filters"]:
-        final_excluded_dirs.update(configs["file_filters"]["excluded_dirs"])
+    if use_inclusion_mode:
+        # Inclusion mode: only process specified directories and files
+        final_included_dirs = set(included_dirs) if included_dirs else set()
+        final_included_files = set(included_files) if included_files else set()
 
-    # Add any additional excluded files from config
-    if "file_filters" in configs and "excluded_files" in configs["file_filters"]:
-        final_excluded_files.update(configs["file_filters"]["excluded_files"])
+        logger.info(f"Using inclusion mode")
+        logger.info(f"Included directories: {list(final_included_dirs)}")
+        logger.info(f"Included files: {list(final_included_files)}")
 
-    # Add any explicitly provided excluded directories and files
-    if excluded_dirs is not None:
-        final_excluded_dirs.update(excluded_dirs)
+        # Convert to lists for processing
+        included_dirs = list(final_included_dirs)
+        included_files = list(final_included_files)
+        excluded_dirs = []
+        excluded_files = []
+    else:
+        # Exclusion mode: use default exclusions plus any additional ones
+        final_excluded_dirs = set(DEFAULT_EXCLUDED_DIRS)
+        final_excluded_files = set(DEFAULT_EXCLUDED_FILES)
+
+        # Add any additional excluded directories from config
+        if "file_filters" in configs and "excluded_dirs" in configs["file_filters"]:
+            final_excluded_dirs.update(configs["file_filters"]["excluded_dirs"])
+
+        # Add any additional excluded files from config
+        if "file_filters" in configs and "excluded_files" in configs["file_filters"]:
+            final_excluded_files.update(configs["file_filters"]["excluded_files"])
+
+        # Add any explicitly provided excluded directories and files
+        if excluded_dirs is not None:
+            final_excluded_dirs.update(excluded_dirs)
 
-    if excluded_files is not None:
-        final_excluded_files.update(excluded_files)
+        if excluded_files is not None:
+            final_excluded_files.update(excluded_files)
 
-    # Convert back to lists for compatibility
-    excluded_dirs = list(final_excluded_dirs)
-    excluded_files = list(final_excluded_files)
+        # Convert back to lists for compatibility
+        excluded_dirs = list(final_excluded_dirs)
+        excluded_files = list(final_excluded_files)
+        included_dirs = []
+        included_files = []
 
-    logger.info(f"Using excluded directories: {excluded_dirs}")
-    logger.info(f"Using excluded files: {excluded_files}")
+        logger.info(f"Using exclusion mode")
+        logger.info(f"Excluded directories: {excluded_dirs}")
+        logger.info(f"Excluded files: {excluded_files}")
 
     logger.info(f"Reading documents from {path}")
 
-    # Process code files first
-    for ext in code_extensions:
-        files = glob.glob(f"{path}/**/*{ext}", recursive=True)
-        for file_path in files:
-            # Skip excluded directories and files
+    def should_process_file(file_path: str, use_inclusion: bool, included_dirs: List[str], included_files: List[str],
+                           excluded_dirs: List[str], excluded_files: List[str]) -> bool:
+        """
+        Determine if a file should be processed based on inclusion/exclusion rules.
+
+        Args:
+            file_path (str): The file path to check
+            use_inclusion (bool): Whether to use inclusion mode
+            included_dirs (List[str]): List of directories to include
+            included_files (List[str]): List of files to include
+            excluded_dirs (List[str]): List of directories to exclude
+            excluded_files (List[str]): List of files to exclude
+
+        Returns:
+            bool: True if the file should be processed, False otherwise
+        """
+        file_path_parts = os.path.normpath(file_path).split(os.sep)
+        file_name = os.path.basename(file_path)
+
+        if use_inclusion:
+            # Inclusion mode: file must be in included directories or match included files
+            is_included = False
+
+            # Check if file is in an included directory
+            if included_dirs:
+                for included in included_dirs:
+                    clean_included = included.strip("./").rstrip("/")
+                    if clean_included in file_path_parts:
+                        is_included = True
+                        break
+
+            # Check if file matches included file patterns
+            if not is_included and included_files:
+                for included_file in included_files:
+                    if file_name == included_file or file_name.endswith(included_file):
+                        is_included = True
+                        break
+
+            # If no inclusion rules are specified for a category, allow all files from that category
+            if not included_dirs and not included_files:
+                is_included = True
+            elif not included_dirs and included_files:
+                # Only file patterns specified, allow all directories
+                pass  # is_included is already set based on file patterns
+            elif included_dirs and not included_files:
+                # Only directory patterns specified, allow all files in included directories
+                pass  # is_included is already set based on directory patterns
+
+            return is_included
+        else:
+            # Exclusion mode: file must not be in excluded directories or match excluded files
             is_excluded = False
+
             # Check if file is in an excluded directory
-            file_path_parts = os.path.normpath(file_path).split(os.sep)
             for excluded in excluded_dirs:
-                # Remove ./ prefix and trailing slash if present
                 clean_excluded = excluded.strip("./").rstrip("/")
-                # Check if the excluded directory is in the path components
                 if clean_excluded in file_path_parts:
                     is_excluded = True
                     break
-            if not is_excluded and any(os.path.basename(file_path) == excluded for excluded in excluded_files):
-                is_excluded = True
-            if is_excluded:
+
+            # Check if file matches excluded file patterns
+            if not is_excluded:
+                for excluded_file in excluded_files:
+                    if file_name == excluded_file:
+                        is_excluded = True
+                        break
+
+            return not is_excluded
+
+    # Process code files first
+    for ext in code_extensions:
+        files = glob.glob(f"{path}/**/*{ext}", recursive=True)
+        for file_path in files:
+            # Check if file should be processed based on inclusion/exclusion rules
+            if not should_process_file(file_path, use_inclusion_mode, included_dirs, included_files, excluded_dirs, excluded_files):
                 continue
 
             try:
@@ -223,20 +306,8 @@ def read_all_documents(path: str, local_ollama: bool = False, excluded_dirs: Lis
     for ext in doc_extensions:
         files = glob.glob(f"{path}/**/*{ext}", recursive=True)
         for file_path in files:
-            # Skip excluded directories and files
-            is_excluded = False
-            # Check if file is in an excluded directory
-            file_path_parts = os.path.normpath(file_path).split(os.sep)
-            for excluded in excluded_dirs:
-                # Remove ./ prefix and trailing slash if present
-                clean_excluded = excluded.strip("./").rstrip("/")
-                # Check if the excluded directory is in the path components
-                if clean_excluded in file_path_parts:
-                    is_excluded = True
-                    break
-            if not is_excluded and any(os.path.basename(file_path) == excluded for excluded in excluded_files):
-                is_excluded = True
-            if is_excluded:
+            # Check if file should be processed based on inclusion/exclusion rules
+            if not should_process_file(file_path, use_inclusion_mode, included_dirs, included_files, excluded_dirs, excluded_files):
                 continue
 
             try:
@@ -572,7 +643,8 @@ def __init__(self):
         self.repo_paths = None
 
     def prepare_database(self, repo_url_or_path: str, type: str = "github", access_token: str = None, local_ollama: bool = False,
-                       excluded_dirs: List[str] = None, excluded_files: List[str] = None) -> List[Document]:
+                       excluded_dirs: List[str] = None, excluded_files: List[str] = None,
+                       included_dirs: List[str] = None, included_files: List[str] = None) -> List[Document]:
         """
         Create a new database from the repository.
 
@@ -582,13 +654,16 @@ def prepare_database(self, repo_url_or_path: str, type: str = "github", access_t
             local_ollama (bool): Whether to use local Ollama for embedding (default: False)
             excluded_dirs (List[str], optional): List of directories to exclude from processing
             excluded_files (List[str], optional): List of file patterns to exclude from processing
+            included_dirs (List[str], optional): List of directories to include exclusively
+            included_files (List[str], optional): List of file patterns to include exclusively
 
         Returns:
             List[Document]: List of Document objects
         """
         self.reset_database()
         self._create_repo(repo_url_or_path, type, access_token)
-        return self.prepare_db_index(local_ollama=local_ollama, excluded_dirs=excluded_dirs, excluded_files=excluded_files)
+        return self.prepare_db_index(local_ollama=local_ollama, excluded_dirs=excluded_dirs, excluded_files=excluded_files,
+                                   included_dirs=included_dirs, included_files=included_files)
 
     def reset_database(self):
         """
@@ -659,14 +734,17 @@ def _create_repo(self, repo_url_or_path: str, type: str = "github", access_token
             logger.error(f"Failed to create repository structure: {e}")
             raise
 
-    def prepare_db_index(self, local_ollama: bool = False, excluded_dirs: List[str] = None, excluded_files: List[str] = None) -> List[Document]:
+    def prepare_db_index(self, local_ollama: bool = False, excluded_dirs: List[str] = None, excluded_files: List[str] = None,
+                        included_dirs: List[str] = None, included_files: List[str] = None) -> List[Document]:
         """
         Prepare the indexed database for the repository.
 
         Args:
             local_ollama (bool): Whether to use local Ollama for embedding (default: False)
             excluded_dirs (List[str], optional): List of directories to exclude from processing
             excluded_files (List[str], optional): List of file patterns to exclude from processing
+            included_dirs (List[str], optional): List of directories to include exclusively
+            included_files (List[str], optional): List of file patterns to include exclusively
 
         Returns:
             List[Document]: List of Document objects
@@ -690,7 +768,9 @@ def prepare_db_index(self, local_ollama: bool = False, excluded_dirs: List[str]
             self.repo_paths["save_repo_dir"],
             local_ollama=local_ollama,
             excluded_dirs=excluded_dirs,
-            excluded_files=excluded_files
+            excluded_files=excluded_files,
+            included_dirs=included_dirs,
+            included_files=included_files
         )
         self.db = transform_documents_and_save_to_db(
             documents, self.repo_paths["save_db_file"], local_ollama=local_ollama
 
@@ -26,16 +26,34 @@ def __call__(self, documents: Sequence[Document]) -> Sequence[Document]:
         output = deepcopy(documents)
         logger.info(f"Processing {len(output)} documents individually for Ollama embeddings")
 
+        successful_docs = []
+        expected_embedding_size = None
+
         for i, doc in enumerate(tqdm(output, desc="Processing documents for Ollama embeddings")):
             try:
                 # Get embedding for a single document
                 result = self.embedder(input=doc.text)
                 if result.data and len(result.data) > 0:
+                    embedding = result.data[0].embedding
+
+                    # Validate embedding size consistency
+                    if expected_embedding_size is None:
+                        expected_embedding_size = len(embedding)
+                        logger.info(f"Expected embedding size set to: {expected_embedding_size}")
+                    elif len(embedding) != expected_embedding_size:
+                        file_path = getattr(doc, 'meta_data', {}).get('file_path', f'document_{i}')
+                        logger.warning(f"Document '{file_path}' has inconsistent embedding size {len(embedding)} != {expected_embedding_size}, skipping")
+                        continue
+
                     # Assign the embedding to the document
-                    output[i].vector = result.data[0].embedding
+                    output[i].vector = embedding
+                    successful_docs.append(output[i])
                 else:
-                    logger.warning(f"Failed to get embedding for document {i}")
+                    file_path = getattr(doc, 'meta_data', {}).get('file_path', f'document_{i}')
+                    logger.warning(f"Failed to get embedding for document '{file_path}', skipping")
             except Exception as e:
-                logger.error(f"Error processing document {i}: {e}")
+                file_path = getattr(doc, 'meta_data', {}).get('file_path', f'document_{i}')
+                logger.error(f"Error processing document '{file_path}': {e}, skipping")
 
-        return output
+        logger.info(f"Successfully processed {len(successful_docs)}/{len(output)} documents with consistent embeddings")
+        return successful_docs