added tests and ci/cd

nklsw · nklsw · commit cfc8452a64b4 · 2025-04-14T15:18:36.000+02:00
diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml
@@ -0,0 +1,45 @@
+name: Code Checks
+
+on:
+  push:
+    branches:
+    - main
+  pull_request:
+    branches:
+    - main
+
+
+concurrency:
+  group:
+    ${{ github.workflow }}-${{ github.ref_name }}-${{
+    github.event.pull_request.number || github.sha }}
+  cancel-in-progress: true
+
+jobs:
+  lint:
+    runs-on: "ubuntu-latest"
+    
+    steps:
+      - uses: "actions/checkout@v4"
+
+      - uses: astral-sh/ruff-action@v3
+      - run: ruff check
+      - run: ruff format --check
+  
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.11", "3.12", "3.13"]
+    
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+
+      - name: Install the project
+        run: uv sync --all-extras --dev
+
+      - name: Run tests
+        run: uv run pytest
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -0,0 +1,28 @@
+name: Publish
+
+on:
+  release:
+    types: ["published"]
+
+jobs:
+  run:
+    name: "Build and publish release"
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+          cache-dependency-glob: uv.lock
+
+      - name: Set up Python
+        run: uv uv sync --all-extras --dev
+
+      - name: Build
+        run: uv build
+
+      - name: Publish
+        run: uv publish --token ${{ secrets.PYPI_TOKEN }}
diff --git a/README.md b/README.md
@@ -0,0 +1,82 @@
+# docs2llm
+
+A command-line tool to extract documentation from local directories and GitHub repositories, formatting it for use as context with Large Language Models (LLMs).
+
+## Purpose
+
+docs2llm helps you capture documentation from codebases to use as context for AI assistants and large language models. It searches for documentation files (markdown, text, etc.), processes them, and creates a single consolidated file that can be used as reference material for LLMs.
+
+## Features
+
+- Extract documentation from local directories or GitHub repositories
+- Automatically identify and process common documentation files
+- Prioritize README files and important documentation
+- Support for multiple file formats (Markdown, RST, TXT)
+- Format output for optimal LLM context
+- Control scan depth to manage output size
+- Clone specific branches from Git repositories
+- Detailed logging with configurable verbosity
+
+## Installation
+
+```bash
+# Install from PyPI
+pip install docs2llm
+
+```
+
+## Usage
+
+### Command Line Interface
+
+```bash
+# Extract docs from a local directory
+docs2llm /path/to/project --output context.txt
+
+# Extract docs from a GitHub repository
+docs2llm --git owner/repo --output context.txt
+
+# Specify a branch
+docs2llm --git owner/repo --branch develop
+
+# Control scan depth
+docs2llm /path/to/project --max-depth 2
+
+# Enable verbose logging
+docs2llm /path/to/project -v
+
+# Write logs to a file
+docs2llm /path/to/project --log-file extraction.log
+```
+
+### Options
+
+- `PATH`: Local directory containing documentation files
+- `--git`: GitHub repository URL or owner/repo format
+- `--output`: Output file name (default: llm_context.txt)
+- `--max-depth`: Maximum directory depth to search (default: 3)
+- `--branch`: Specific branch to clone (only used with --git)
+- `--verbose`, `-v`: Enable verbose logging
+- `--log-file`: Log to this file in addition to console
+
+### Python API
+
+```python
+from docs2llm import extract_documentation
+
+# Extract from local directory
+success = extract_documentation(
+    local_path="/path/to/project",
+    output_file="context.txt",
+    max_depth=3,
+    verbose=True
+)
+
+# Extract from GitHub repository
+success = extract_documentation(
+    git_repo="owner/repo",
+    output_file="context.txt",
+    branch="main",
+    verbose=True
+)
+```
diff --git a/pyproject.toml b/pyproject.toml
@@ -24,5 +24,6 @@ build-backend = "hatchling.build"
 
 [dependency-groups]
 dev = [
+    "pytest>=8.3.5",
     "ruff>=0.11.5",
 ]
diff --git a/test.log b/test.log
@@ -0,0 +1,14 @@
+2025-04-14 14:57:06,943 - INFO - Logging to file: test.log
+2025-04-14 14:57:06,943 - DEBUG - Logging initialized in DEBUG mode
+2025-04-14 14:57:06,944 - INFO - Documentation Extractor started
+2025-04-14 14:57:06,944 - DEBUG - Created temporary directory: /var/folders/_b/8q27qz850hq967ptd74hy3mw0000gn/T/tmpl22fi5zv
+2025-04-14 14:57:06,945 - INFO - Cloning repository https://github.com/owner/repo.git to temporary directory
+2025-04-14 14:57:07,305 - ERROR - Git clone failed: Cloning into '/var/folders/_b/8q27qz850hq967ptd74hy3mw0000gn/T/tmpl22fi5zv'...
+remote: Repository not found.
+fatal: repository 'https://github.com/owner/repo.git/' not found
+
+2025-04-14 14:57:07,307 - ERROR - Error during clone: Failed to clone repository: Cloning into '/var/folders/_b/8q27qz850hq967ptd74hy3mw0000gn/T/tmpl22fi5zv'...
+remote: Repository not found.
+fatal: repository 'https://github.com/owner/repo.git/' not found
+
+2025-04-14 14:57:07,308 - ERROR - Repository cloning failed. Exiting.
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -0,0 +1,147 @@
+import os
+import tempfile
+import pytest
+from click.testing import CliRunner
+from unittest.mock import patch
+from docs2llm.cli import main
+
+
+@pytest.fixture
+def runner():
+    """Provides a Click test runner for CLI testing."""
+    return CliRunner()
+
+
+@pytest.fixture
+def temp_dir():
+    """Creates a temporary directory that is cleaned up after the test."""
+    with tempfile.TemporaryDirectory() as td:
+        yield td
+
+
+def test_cli_help(runner):
+    """Test the CLI help functionality."""
+    result = runner.invoke(main, ["--help"])
+    assert result.exit_code == 0
+    assert "Generate LLM context from documentation" in result.output
+    assert "--git" in result.output
+    assert "--output" in result.output
+    assert "--max-depth" in result.output
+
+
+def test_cli_missing_inputs(runner):
+    """Test that the CLI shows an error when no inputs are provided."""
+    result = runner.invoke(main, [])
+    assert result.exit_code == 1
+    assert "Error: Either a local path or --git option must be provided" in result.output
+
+
+def test_cli_conflicting_inputs(runner):
+    """Test that the CLI shows an error when both local path and git repo are provided."""
+    result = runner.invoke(main, ["local/path", "--git", "https://github.com/owner/repo.git"])
+    assert result.exit_code == 1
+    assert "Error: Cannot specify both a local path and --git" in result.output
+
+
+@patch("docs2llm.cli.extract_documentation")
+def test_cli_local_path(mock_extract, runner, temp_dir):
+    """Test CLI with local path input."""
+    # Configure the mock to return True (success)
+    mock_extract.return_value = True
+    
+    # Create a test directory
+    test_dir = os.path.join(temp_dir, "test_docs")
+    os.makedirs(test_dir)
+    
+    # Execute the CLI command
+    result = runner.invoke(main, [test_dir, "--output", "test_output.txt"])
+    
+    # Verify CLI behavior
+    assert result.exit_code == 0
+    
+    # Verify extract_documentation was called with correct arguments
+    mock_extract.assert_called_once_with(
+        local_path=test_dir,
+        git_repo=None,
+        output_file="test_output.txt",
+        max_depth=3,
+        branch=None,
+        verbose=False,
+        log_file=None
+    )
+
+
+@patch("docs2llm.cli.extract_documentation")
+def test_cli_git_repo(mock_extract, runner):
+    """Test CLI with git repository input."""
+    # Configure the mock to return True (success)
+    mock_extract.return_value = True
+    
+    # Test URL
+    test_repo = "https://github.com/owner/repo.git"
+    
+    # Execute the CLI command
+    result = runner.invoke(main, [
+        "--git", test_repo,
+        "--output", "git_output.txt",
+        "--branch", "main",
+        "--verbose"
+    ])
+    
+    # Verify CLI behavior
+    assert result.exit_code == 0
+    
+    # Verify extract_documentation was called with correct arguments
+    mock_extract.assert_called_once_with(
+        local_path=None,
+        git_repo=test_repo,
+        output_file="git_output.txt",
+        max_depth=3,
+        branch="main",
+        verbose=True,
+        log_file=None
+    )
+
+
+@patch("docs2llm.cli.extract_documentation")
+def test_cli_with_all_options(mock_extract, runner):
+    """Test CLI with all available options."""
+    # Configure the mock to return True (success)
+    mock_extract.return_value = True
+    
+    # Execute the CLI command with all options
+    result = runner.invoke(main, [
+        "--git", "https://github.com/owner/repo.git",
+        "--output", "full_options.txt",
+        "--max-depth", "5",
+        "--branch", "develop",
+        "--verbose",
+        "--log-file", "test.log"
+    ])
+    
+    # Verify CLI behavior
+    assert result.exit_code == 0
+    
+    # Verify extract_documentation was called with correct arguments
+    mock_extract.assert_called_once_with(
+        local_path=None,
+        git_repo="https://github.com/owner/repo.git",
+        output_file="full_options.txt",
+        max_depth=5,
+        branch="develop",
+        verbose=True,
+        log_file="test.log"
+    )
+
+
+@patch("docs2llm.cli.extract_documentation")
+def test_cli_failure_case(mock_extract, runner):
+    """Test CLI when extraction fails."""
+    # Configure the mock to return False (failure)
+    mock_extract.return_value = False
+    
+    # Execute the CLI command
+    result = runner.invoke(main, ["nonexistent/path"])
+    
+    # Verify CLI returns error code
+    assert result.exit_code == 1
diff --git a/tests/test_main.py b/tests/test_main.py
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -24,5 +24,6 @@ build-backend = "hatchling.build"`
`24`	`24`
`25`	`25`	`[dependency-groups]`
`26`	`26`	`dev = [`
	`27`	`+ "pytest>=8.3.5",`
`27`	`28`	`"ruff>=0.11.5",`
`28`	`29`	`]`