diff --git a/.vscode/extensions.json b/.vscode/extensions.json index 3fbcf2e..5504bbe 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -1,8 +1,7 @@ { "recommendations": [ "dbaeumer.vscode-eslint", - "esbenp.prettier-vscode", "github.vscode-github-actions", - "vitest.explorer" + "esbenp.prettier-vscode" ] } diff --git a/.vscode/settings.json b/.vscode/settings.json index 9eb8646..6eed33f 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,17 +1,4 @@ { - "eslint.probe": [ - "javascript", - "javascriptreact", - "typescript", - "typescriptreact", - "markdown", - "html", - "json" - ], - - "editor.formatOnSave": true, - "editor.formatOnPaste": true, - "cSpell.words": [ "agentic", "agentify", @@ -60,6 +47,18 @@ "vinxi" ], + "eslint.probe": [ + "javascript", + "javascriptreact", + "typescript", + "typescriptreact", + "markdown", + "html", + "json" + ], + + "editor.formatOnSave": true, + "editor.formatOnPaste": true, "editor.rulers": [160], // Workbench @@ -74,16 +73,5 @@ "typescript.preferences.importModuleSpecifier": "relative", "editor.defaultFormatter": "esbenp.prettier-vscode", - "editor.codeActionsOnSave": ["source.formatDocument", "source.fixAll.eslint"], - "files.exclude": { - "**/.git": true, - "**/.nx": true - }, - - "css.validate": false, - "tailwindCSS.includeLanguages": { - "typescript": "javascript", - "typescriptreact": "javascript" - }, - "tailwindCSS.emmetCompletions": true + "editor.codeActionsOnSave": ["source.formatDocument", "source.fixAll.eslint"] } diff --git a/docs/LargeCodeBase_Plan.md b/docs/LargeCodeBase_Plan.md new file mode 100644 index 0000000..52a7781 --- /dev/null +++ b/docs/LargeCodeBase_Plan.md @@ -0,0 +1,243 @@ +# Handling Large Codebases in MyCoder: Research and Recommendations + +## Executive Summary + +This document presents research findings on how leading AI coding tools handle large codebases and provides strategic recommendations for enhancing MyCoder's performance with large projects. The focus is on understanding indexing and context management approaches used by Claude Code and Aider, and applying these insights to improve MyCoder's architecture. + +## Research Findings + +### Claude Code (Anthropic) + +While detailed technical documentation on Claude Code's internal architecture is limited in public sources, we can infer several approaches from Anthropic's general AI architecture and Claude Code's capabilities: + +1. **Chunking and Retrieval Augmentation**: + - Claude Code likely employs retrieval-augmented generation (RAG) to handle large codebases + - Files are likely chunked into manageable segments with semantic understanding + - Relevant code chunks are retrieved based on query relevance + +2. **Hierarchical Code Understanding**: + - Builds a hierarchical representation of code (project → modules → files → functions) + - Maintains a graph of relationships between code components + - Prioritizes context based on relevance to the current task + +3. **Incremental Context Management**: + - Dynamically adjusts the context window to include only relevant code + - Maintains a "working memory" of recently accessed or modified files + - Uses sliding context windows to process large files sequentially + +4. **Intelligent Caching**: + - Caches parsed code structures and embeddings to avoid repeated processing + - Prioritizes frequently accessed or modified files in the cache + - Implements a cache eviction strategy based on recency and relevance + +### Aider + +Aider's approach to handling large codebases can be inferred from its open-source codebase and documentation: + +1. **Git Integration**: + - Leverages Git to track file changes and understand repository structure + - Uses Git history to prioritize recently modified files + - Employs Git's diff capabilities to minimize context needed for changes + +2. **Selective File Context**: + - Only includes relevant files in the context rather than the entire codebase + - Uses heuristics to identify related files based on imports, references, and naming patterns + - Implements a "map-reduce" approach where it first analyzes the codebase structure, then selectively processes relevant files + +3. **Prompt Engineering and Chunking**: + - Designs prompts that can work with limited context by focusing on specific tasks + - Chunks large files and processes them incrementally + - Uses summarization to compress information about non-focal code parts + +4. **Caching Mechanisms**: + - Implements token usage optimization through caching + - Avoids redundant LLM calls for unchanged content + - Maintains a local database of file content and embeddings + +## Recommendations for MyCoder + +Based on the research findings, we recommend the following enhancements to MyCoder for better handling of large codebases: + +### 1. Implement a Multi-Level Indexing System + +``` +┌───────────────────┐ +│ Project Metadata │ +├───────────────────┤ +│ - Structure │ +│ - Dependencies │ +│ - Config Files │ +└───────┬───────────┘ + │ + ▼ +┌───────────────────┐ ┌───────────────────┐ +│ File Index │ │ Symbol Database │ +├───────────────────┤ ├───────────────────┤ +│ - Path │◄────────┤ - Functions │ +│ - Language │ │ - Classes │ +│ - Modified Date │ │ - Variables │ +│ - Size │ │ - Imports/Exports │ +└───────┬───────────┘ └───────────────────┘ + │ + ▼ +┌───────────────────┐ +│ Semantic Index │ +├───────────────────┤ +│ - Code Embeddings │ +│ - Doc Embeddings │ +│ - Relationships │ +└───────────────────┘ +``` + +**Implementation Details:** +- Create a lightweight indexer that runs during project initialization +- Generate embeddings for code files, focusing on API definitions, function signatures, and documentation +- Build a graph of relationships between files based on imports/exports and references +- Store indexes in a persistent local database for quick loading in future sessions + +### 2. Develop a Smart Context Management System + +``` +┌─────────────────────────┐ +│ Context Manager │ +├─────────────────────────┤ +│ ┌─────────────────────┐ │ +│ │ Working Set │ │ +│ │ (Currently relevant │ │ +│ │ files and symbols) │ │ +│ └─────────────────────┘ │ +│ │ +│ ┌─────────────────────┐ │ +│ │ Relevance Scoring │ │ +│ │ Algorithm │ │ +│ └─────────────────────┘ │ +│ │ +│ ┌─────────────────────┐ │ +│ │ Context Window │ │ +│ │ Optimization │ │ +│ └─────────────────────┘ │ +└─────────────────────────┘ +``` + +**Implementation Details:** +- Develop a working set manager that tracks currently relevant files +- Implement a relevance scoring algorithm that considers: + - Semantic similarity to the current task + - Recency of access or modification + - Dependency relationships + - User attention (files explicitly mentioned) +- Optimize context window usage by: + - Including full content for directly relevant files + - Including only signatures and documentation for related files + - Summarizing distant but potentially relevant code + - Dynamically adjusting the detail level based on available context space + +### 3. Implement Chunking and Progressive Loading + +``` +┌─────────────────────────┐ +│ Chunking Strategy │ +├─────────────────────────┤ +│ 1. Semantic Boundaries │ +│ (Classes/Functions) │ +│ 2. Size-based Chunks │ +│ with Overlap │ +│ 3. Progressive Detail │ +│ Loading │ +└─────────────────────────┘ +``` + +**Implementation Details:** +- Chunk files at meaningful boundaries (functions, classes, modules) +- Implement overlapping chunks to maintain context across boundaries +- Develop a progressive loading strategy: + - Start with high-level project structure and relevant file summaries + - Load detailed chunks as needed based on the task + - Implement a sliding context window for processing large files + +### 4. Create an Intelligent Caching System + +``` +┌─────────────────────────┐ +│ Caching System │ +├─────────────────────────┤ +│ ┌─────────────────────┐ │ +│ │ Token Cache │ │ +│ │ (Avoid repeated │ │ +│ │ tokenization) │ │ +│ └─────────────────────┘ │ +│ │ +│ ┌─────────────────────┐ │ +│ │ Embedding Cache │ │ +│ │ (Store vector │ │ +│ │ representations) │ │ +│ └─────────────────────┘ │ +│ │ +│ ┌─────────────────────┐ │ +│ │ Prompt Template │ │ +│ │ Cache │ │ +│ └─────────────────────┘ │ +└─────────────────────────┘ +``` + +**Implementation Details:** +- Implement a multi-level caching system: + - Token cache: Store tokenized representations of files to avoid re-tokenization + - Embedding cache: Store vector embeddings for semantic search + - Prompt template cache: Cache commonly used prompt templates +- Develop an efficient cache invalidation strategy based on file modifications +- Use persistent storage for caches to maintain performance across sessions + +### 5. Enhance Sub-Agent Coordination for Parallel Processing + +``` +┌─────────────────────────┐ +│ Sub-Agent Coordinator │ +├─────────────────────────┤ +│ ┌─────────────────────┐ │ +│ │ Task Decomposition │ │ +│ └─────────────────────┘ │ +│ │ +│ ┌─────────────────────┐ │ +│ │ Context Distribution│ │ +│ └─────────────────────┘ │ +│ │ +│ ┌─────────────────────┐ │ +│ │ Result Integration │ │ +│ └─────────────────────┘ │ +└─────────────────────────┘ +``` + +**Implementation Details:** +- Improve task decomposition to identify parallelizable sub-tasks +- Implement smart context distribution to sub-agents: + - Provide each sub-agent with only the context it needs + - Share common context like project structure across all sub-agents + - Use a shared index to avoid duplicating large context elements +- Develop better coordination mechanisms for sub-agents: + - Implement a message-passing system for inter-agent communication + - Create a shared memory space for efficient information exchange + - Design a result integration system to combine outputs from multiple sub-agents + +## Implementation Roadmap + +### Phase 1: Foundation (1-2 months) +- Develop the basic indexing system for project structure and file metadata +- Implement a simple relevance-based context selection mechanism +- Create a basic chunking strategy for large files + +### Phase 2: Advanced Features (2-3 months) +- Implement the semantic indexing system with code embeddings +- Develop the full context management system with working sets +- Create the multi-level caching system + +### Phase 3: Optimization and Integration (1-2 months) +- Enhance sub-agent coordination for parallel processing +- Optimize performance with better caching and context management +- Integrate all components into a cohesive system + +## Conclusion + +By implementing these recommendations, MyCoder can significantly improve its performance with large codebases. The multi-level indexing system will provide a comprehensive understanding of the codebase structure, while the smart context management system will ensure that the most relevant code is included in the context window. The chunking and progressive loading strategy will enable handling of files that exceed the context window size, and the intelligent caching system will optimize token usage and improve response times. Finally, enhanced sub-agent coordination will enable efficient parallel processing of large codebases. + +These enhancements will position MyCoder as a leading tool for AI-assisted coding, capable of handling projects of any size with intelligent context management and efficient resource utilization. diff --git a/docs/comparisons/mycoder-vs-aider.md b/docs/comparisons/mycoder-vs-aider.md new file mode 100644 index 0000000..21f0603 --- /dev/null +++ b/docs/comparisons/mycoder-vs-aider.md @@ -0,0 +1,104 @@ +--- +title: MyCoder vs Aider - AI Coding Assistant Comparison +description: A detailed comparison of features and capabilities between MyCoder and Aider AI coding assistants +--- + +# MyCoder vs Aider: AI Coding Assistant Comparison + +When choosing an AI coding assistant for your development workflow, it's important to understand the strengths and capabilities of available options. This comparison examines two popular tools: **MyCoder** and **Aider**. + +## Overview + +| Feature | MyCoder | Aider | +| --------------- | ------------------------- | --------------------------- | +| **Language** | TypeScript | Python | +| **Interface** | CLI with web capabilities | Terminal-based | +| **Source Code** | Open source | Open source | +| **LLM Support** | Multiple models | Multiple models | +| **Codebase** | Simple, easy to read | Complex but well-documented | + +## Key Differences + +### Architecture & Implementation + +**MyCoder** is built with TypeScript, offering a clean, modular codebase that's easy to understand and extend. Its architecture supports parallel execution through sub-agents, allowing for efficient handling of complex tasks. + +**Aider** is implemented in Python and runs primarily in the terminal. It provides a robust terminal-based UI that integrates deeply with your development environment. + +### Git Integration + +**MyCoder** currently has basic integration with local repositories but does not yet offer built-in commit capabilities. + +**Aider** shines with its comprehensive Git integration: + +- Works directly in your local Git repository +- Automatically stages and commits changes it makes +- Generates descriptive commit messages or accepts custom ones +- Respects .gitignore rules + +### Development Workflow + +**MyCoder** features: + +- Extensible tool system for various coding tasks +- Parallel execution with sub-agents +- Self-modification capabilities +- Smart logging with hierarchical, color-coded output + +**Aider** features: + +- Terminal-based UI optimized for coding workflows +- Voice-to-code support using Whisper +- Image and URL input analysis +- Prompt caching for speed optimization +- IDE plugins for editor integration + +## Use Case Recommendations + +**Consider MyCoder if:** + +- You prefer working with TypeScript/JavaScript +- You need a modular, extensible AI coding assistant +- You value parallel task execution for complex projects +- You want a simple, easy-to-understand codebase + +**Consider Aider if:** + +- You work extensively with Git and want automated commits +- You prefer a Python-based solution +- You want voice-to-code capabilities +- You need tight integration with your terminal workflow + +## Feature Comparison Table + +| Feature | MyCoder | Aider | +| ------------------------- | :-------: | :-----------: | +| **Version Control** | | | +| Git Integration | ✓ (Basic) | ✓✓ (Advanced) | +| Automatic Commits | ❌ | ✓ | +| Commit Message Generation | ❌ | ✓ | +| **Interface** | | | +| Terminal UI | ✓ | ✓ | +| Web Interface | ✓ | ✓ | +| IDE Integration | ❌ | ✓ | +| **Input Methods** | | | +| Text Commands | ✓ | ✓ | +| Voice Input | ❌ | ✓ | +| Image Analysis | ❌ | ✓ | +| URL Analysis | ❌ | ✓ | +| **Architecture** | | | +| Parallel Execution | ✓ | ❌ | +| Extensible Tools | ✓ | ✓ | +| Self-Modification | ✓ | ❌ | +| **Performance** | | | +| Prompt Caching | ❌ | ✓ | +| **Language Support** | | | +| Multiple LLM Support | ✓ | ✓ | + +## Conclusion + +Both MyCoder and Aider offer compelling features for developers looking to incorporate AI into their coding workflow. MyCoder stands out with its TypeScript implementation, modular architecture, and parallel execution capabilities, while Aider excels with its advanced Git integration, voice input support, and terminal-centric approach. + +The choice between these tools ultimately depends on your specific workflow needs, programming language preferences, and which features you prioritize in an AI coding assistant. + +As both tools are open source, they continue to evolve with new features and improvements, making them valuable additions to any developer's toolkit. diff --git a/docs/comparisons/mycoder-vs-claude-code.md b/docs/comparisons/mycoder-vs-claude-code.md new file mode 100644 index 0000000..e9bf27d --- /dev/null +++ b/docs/comparisons/mycoder-vs-claude-code.md @@ -0,0 +1,103 @@ +--- +title: MyCoder vs Claude Code - AI Coding Assistant Comparison +description: A detailed comparison of features and capabilities between MyCoder and Claude Code AI coding assistants +--- + +# MyCoder vs Claude Code: AI Coding Assistant Comparison + +When selecting an AI coding assistant to enhance your development workflow, understanding the differences between available options is crucial. This comparison examines two notable tools: **MyCoder** and **Claude Code**. + +## Overview + +| Feature | MyCoder | Claude Code | +| ------------------ | ------------------ | --------------------------- | +| **Source Code** | Open source | Closed source | +| **Availability** | Publicly available | Limited beta | +| **LLM Support** | Multiple models | Anthropic models only | +| **Implementation** | TypeScript | N/A (proprietary) | +| **Integration** | Flexible | Tied to Anthropic ecosystem | + +## Key Differences + +### Accessibility & Implementation + +**MyCoder** is an open-source solution built with TypeScript, offering transparency and the ability to customize the codebase to your specific needs. Its modular architecture makes it accessible and extensible for developers who want to understand or modify how it works. + +**Claude Code** is a closed-source solution developed by Anthropic, currently available as a limited beta. While it leverages Anthropic's powerful AI models, it doesn't provide the same level of transparency or customization as open-source alternatives. + +### Capabilities & Approach + +**MyCoder** features: + +- AI-powered coding assistance using various LLM models +- Extensible tool system for different coding tasks +- Parallel execution with sub-agents for complex projects +- Self-modification capabilities +- Smart logging with hierarchical, color-coded output + +**Claude Code** features: + +- Advanced task decomposition into manageable sub-tasks +- Code writing and editing capabilities +- Testing and debugging functionality +- Access to documentation within its context +- Support for various programming languages +- Detailed explanations of code and thought process + +### Limitations + +**MyCoder** limitations: + +- Currently lacks advanced Git integration with commit capabilities +- Does not yet have built-in voice input support + +**Claude Code** limitations: + +- Limited availability (closed beta) +- Cannot access the internet or external resources beyond provided context +- Tied exclusively to Anthropic's models +- Generated code may require significant human review + +## Use Case Recommendations + +**Consider MyCoder if:** + +- You value open-source transparency and customization +- You need support for multiple LLM providers +- You want a modular, extensible AI coding assistant +- You need parallel task execution for complex projects + +**Consider Claude Code if:** + +- You're already using Anthropic's ecosystem +- You need advanced task decomposition capabilities +- You value detailed explanations of code and reasoning +- You're able to access the limited beta + +## Feature Comparison Table + +| Feature | MyCoder | Claude Code | +| -------------------- | :-------: | :-----------------: | +| **Accessibility** | | | +| Open Source | ✓ | ❌ | +| Public Availability | ✓ | ❌ (Limited beta) | +| **Architecture** | | | +| Multiple LLM Support | ✓ | ❌ (Anthropic only) | +| Parallel Execution | ✓ | ✓ | +| Extensible Tools | ✓ | N/A | +| Self-Modification | ✓ | ❌ | +| **Capabilities** | | | +| Task Decomposition | ✓ (Basic) | ✓✓ (Advanced) | +| Code Generation | ✓ | ✓ | +| Code Explanation | ✓ | ✓✓ (Detailed) | +| Documentation Access | ✓ | ✓ | +| **Integration** | | | +| External Ecosystem | ✓ | ❌ (Anthropic only) | + +## Conclusion + +Both MyCoder and Claude Code offer valuable AI-powered coding assistance, but with different approaches and strengths. MyCoder provides an open-source, customizable solution with support for multiple LLMs and parallel execution, while Claude Code offers advanced task decomposition and detailed explanations within Anthropic's ecosystem. + +The choice between these tools depends on your specific needs, including how much you value open-source accessibility, whether you need multi-LLM support, and if you require advanced task decomposition capabilities. + +As AI coding assistants continue to evolve, both tools represent different approaches to enhancing developer productivity through artificial intelligence. diff --git a/docs/comparisons/mycoder-vs-codebuff.md b/docs/comparisons/mycoder-vs-codebuff.md new file mode 100644 index 0000000..4f57b5f --- /dev/null +++ b/docs/comparisons/mycoder-vs-codebuff.md @@ -0,0 +1,95 @@ +--- +title: MyCoder vs CodeBuff - AI Coding Assistant Comparison +description: A detailed comparison of features and capabilities between MyCoder and CodeBuff AI coding assistants +--- + +# MyCoder vs CodeBuff: AI Coding Assistant Comparison + +When evaluating AI coding assistants to enhance your development workflow, it's important to understand the differences between available tools. This comparison examines two notable options: **MyCoder** and **CodeBuff**. + +## Overview + +| Feature | MyCoder | CodeBuff | +| ------------------ | -------------------------- | ----------------------------- | +| **Source Code** | Open source | Closed source (commercial) | +| **Pricing** | Free | Paid subscription | +| **Implementation** | TypeScript | Not disclosed | +| **Interface** | CLI with web capabilities | Web-based UI | +| **Primary Focus** | Versatile coding assistant | Natural language code editing | + +## Key Differences + +### Business Model & Accessibility + +**MyCoder** is an open-source solution that's freely available to all developers. Its codebase is transparent, allowing for community contributions and customizations to fit specific needs. + +**CodeBuff** is a commercial product with a subscription-based model. While this means it's not free to use, it may offer more polished features and dedicated support. + +### Interface & User Experience + +**MyCoder** provides a command-line interface that's supplemented with web capabilities. This approach caters to developers who prefer terminal-based workflows while still offering flexibility. + +**CodeBuff** focuses on a web-based interface that emphasizes natural language interaction. Its design appears to prioritize ease of use and accessibility for developers who prefer a more visual approach. + +### Core Capabilities + +**MyCoder** features: + +- AI-powered coding assistance using various LLM models +- Extensible tool system for different coding tasks +- Parallel execution with sub-agents for complex projects +- Self-modification capabilities +- Smart logging with hierarchical, color-coded output + +**CodeBuff** features: + +- Natural language code editing +- Terminal command execution via natural language +- Codebase navigation and modification +- Integration with existing development environments +- Focus on speed and efficiency in development workflows + +## Use Case Recommendations + +**Consider MyCoder if:** + +- You value open-source transparency and customization +- You prefer working in a terminal/CLI environment +- You need support for multiple LLM providers +- You want a modular, extensible AI coding assistant +- You need parallel task execution for complex projects + +**Consider CodeBuff if:** + +- You prefer a polished, web-based interface +- You value natural language interaction for code editing +- You're willing to pay for a commercial solution +- You want a tool optimized for speed and efficiency +- You need dedicated support for your development workflow + +## Feature Comparison Table + +| Feature | MyCoder | CodeBuff | +| -------------------------- | :-------: | :------------: | +| **Accessibility** | | | +| Open Source | ✓ | ❌ | +| Free to Use | ✓ | ❌ | +| **Interface** | | | +| CLI | ✓ | ✓ | +| Web UI | ✓ (Basic) | ✓✓ (Advanced) | +| Natural Language Commands | ✓ | ✓✓ (Optimized) | +| **Architecture** | | | +| Multiple LLM Support | ✓ | Unknown | +| Parallel Execution | ✓ | Unknown | +| Self-Modification | ✓ | ❌ | +| **Integration** | | | +| Terminal Command Execution | ✓ | ✓ | +| IDE Integration | ❌ | Unknown | + +## Conclusion + +Both MyCoder and CodeBuff offer valuable AI-powered coding assistance, but with different approaches and business models. MyCoder provides an open-source, customizable solution with support for multiple LLMs and parallel execution, while CodeBuff offers a commercial product with an emphasis on natural language interaction and a polished web interface. + +The choice between these tools depends on your specific needs, including how much you value open-source accessibility, whether you prefer CLI or web-based interfaces, and if you're willing to pay for a commercial solution. + +As AI coding assistants continue to evolve, both tools represent different approaches to enhancing developer productivity through artificial intelligence.