diff --git a/.github/workflows/publish-webapp-ghcr.yml b/.github/workflows/publish-webapp-ghcr.yml new file mode 100644 index 0000000..c8e5f7f --- /dev/null +++ b/.github/workflows/publish-webapp-ghcr.yml @@ -0,0 +1,192 @@ +name: Build and Push Webapp to GHCR + +on: + push: + branches: + - main + - 'claude/**' + paths: + - 'webapp/**' + - 'knowledgebase_processor/**' + - '.github/workflows/publish-webapp-ghcr.yml' + pull_request: + branches: + - main + paths: + - 'webapp/**' + - 'knowledgebase_processor/**' + workflow_dispatch: + inputs: + tag: + description: 'Custom tag for the image' + required: false + default: 'latest' + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }}/kb-processor-webapp + +jobs: + build-and-push: + name: Build and Push to GHCR + runs-on: ubuntu-latest + + permissions: + contents: read + packages: write + id-token: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata + id: meta + run: | + # Get short SHA + SHORT_SHA=$(git rev-parse --short HEAD) + echo "short_sha=${SHORT_SHA}" >> $GITHUB_OUTPUT + + # Get branch name + BRANCH_NAME=${GITHUB_REF##*/} + echo "branch=${BRANCH_NAME}" >> $GITHUB_OUTPUT + + # Get timestamp + TIMESTAMP=$(date +%Y%m%d-%H%M%S) + echo "timestamp=${TIMESTAMP}" >> $GITHUB_OUTPUT + + # Get repository name (lowercase) + REPO_LOWER=$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]') + echo "repo_lower=${REPO_LOWER}" >> $GITHUB_OUTPUT + + # Determine tags + if [[ "${{ github.event_name }}" == "workflow_dispatch" && -n "${{ github.event.inputs.tag }}" ]]; then + CUSTOM_TAG="${{ github.event.inputs.tag }}" + echo "custom_tag=${CUSTOM_TAG}" >> $GITHUB_OUTPUT + fi + + - name: Build Docker image tags + id: docker_tags + run: | + GHCR_IMAGE="ghcr.io/${{ steps.meta.outputs.repo_lower }}/kb-processor-webapp" + + TAGS="${GHCR_IMAGE}:${{ steps.meta.outputs.short_sha }}" + TAGS="${TAGS},${GHCR_IMAGE}:${{ steps.meta.outputs.timestamp }}" + + # Add branch-specific tag + if [[ "${{ steps.meta.outputs.branch }}" == "main" ]]; then + TAGS="${TAGS},${GHCR_IMAGE}:latest" + TAGS="${TAGS},${GHCR_IMAGE}:stable" + else + SAFE_BRANCH=$(echo "${{ steps.meta.outputs.branch }}" | sed 's/\//-/g') + TAGS="${TAGS},${GHCR_IMAGE}:${SAFE_BRANCH}" + fi + + # Add custom tag if provided + if [[ -n "${{ steps.meta.outputs.custom_tag }}" ]]; then + TAGS="${TAGS},${GHCR_IMAGE}:${{ steps.meta.outputs.custom_tag }}" + fi + + echo "tags=${TAGS}" >> $GITHUB_OUTPUT + echo "ghcr_image=${GHCR_IMAGE}" >> $GITHUB_OUTPUT + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + file: ./webapp/Dockerfile + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.docker_tags.outputs.tags }} + cache-from: type=gha + cache-to: type=gha,mode=max + platforms: linux/amd64,linux/arm64 + labels: | + org.opencontainers.image.title=Knowledge Base Processor Webapp + org.opencontainers.image.description=Web interface for the Knowledge Base Processor + org.opencontainers.image.url=https://github.com/${{ github.repository }} + org.opencontainers.image.source=https://github.com/${{ github.repository }} + org.opencontainers.image.revision=${{ github.sha }} + org.opencontainers.image.created=${{ steps.meta.outputs.timestamp }} + org.opencontainers.image.licenses=MIT + + - name: Generate deployment summary + if: github.event_name != 'pull_request' + run: | + echo "## 🚀 Deployment Summary" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### Image Details" >> $GITHUB_STEP_SUMMARY + echo "- **Registry:** GitHub Container Registry (GHCR)" >> $GITHUB_STEP_SUMMARY + echo "- **Image:** \`${{ steps.docker_tags.outputs.ghcr_image }}\`" >> $GITHUB_STEP_SUMMARY + echo "- **Commit SHA:** \`${{ steps.meta.outputs.short_sha }}\`" >> $GITHUB_STEP_SUMMARY + echo "- **Branch:** \`${{ steps.meta.outputs.branch }}\`" >> $GITHUB_STEP_SUMMARY + echo "- **Timestamp:** \`${{ steps.meta.outputs.timestamp }}\`" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### 🏷️ Tags" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY + echo "${{ steps.docker_tags.outputs.tags }}" | tr ',' '\n' >> $GITHUB_STEP_SUMMARY + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### 📦 Pull Command" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`bash" >> $GITHUB_STEP_SUMMARY + echo "docker pull ${{ steps.docker_tags.outputs.ghcr_image }}:${{ steps.meta.outputs.short_sha }}" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### 🚀 Run Command" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`bash" >> $GITHUB_STEP_SUMMARY + echo "docker run -p 8000:8000 ${{ steps.docker_tags.outputs.ghcr_image }}:${{ steps.meta.outputs.short_sha }}" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### 🔗 Package URL" >> $GITHUB_STEP_SUMMARY + echo "https://github.com/${{ github.repository }}/pkgs/container/kb-processor-webapp" >> $GITHUB_STEP_SUMMARY + + - name: Output image URL + if: github.event_name != 'pull_request' + run: | + echo "✅ Image published successfully!" + echo "📍 Image URL: ${{ steps.docker_tags.outputs.ghcr_image }}:${{ steps.meta.outputs.short_sha }}" + echo "🌐 Access the webapp at: http://localhost:8000 (after running the container)" + echo "📦 View package: https://github.com/${{ github.repository }}/pkgs/container/kb-processor-webapp" + + verify-image: + name: Verify Published Image + needs: build-and-push + runs-on: ubuntu-latest + if: github.event_name != 'pull_request' + + permissions: + packages: read + + steps: + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Verify image exists + run: | + SHORT_SHA=$(echo ${{ github.sha }} | cut -c1-7) + REPO_LOWER=$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]') + IMAGE="ghcr.io/${REPO_LOWER}/kb-processor-webapp:${SHORT_SHA}" + + echo "Verifying image: ${IMAGE}" + + if docker manifest inspect ${IMAGE} > /dev/null 2>&1; then + echo "✅ Image verified successfully!" + else + echo "❌ Image verification failed!" + exit 1 + fi diff --git a/webapp/.dockerignore b/webapp/.dockerignore new file mode 100644 index 0000000..7298f3c --- /dev/null +++ b/webapp/.dockerignore @@ -0,0 +1,55 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +*.egg-info/ +dist/ +build/ +*.egg + +# Virtual environments +env/ +venv/ +ENV/ +.venv + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Git +.git/ +.gitignore +.gitattributes + +# Documentation +*.md +!README.md + +# Testing +.pytest_cache/ +.coverage +htmlcov/ +*.log + +# Docker +Dockerfile +.dockerignore +docker-compose.yml + +# Database +*.db +*.sqlite + +# Environment +.env +.env.* diff --git a/webapp/.gitignore b/webapp/.gitignore new file mode 100644 index 0000000..a9ec37e --- /dev/null +++ b/webapp/.gitignore @@ -0,0 +1,15 @@ +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +*.egg-info/ +.env +.venv +*.db +*.sqlite +*.log +node_modules/ +.DS_Store diff --git a/webapp/Dockerfile b/webapp/Dockerfile new file mode 100644 index 0000000..fd1d1a7 --- /dev/null +++ b/webapp/Dockerfile @@ -0,0 +1,51 @@ +# Multi-stage build for Knowledge Base Processor Webapp +FROM python:3.12-slim AS builder + +# Set working directory +WORKDIR /app + +# Install build dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + g++ \ + && rm -rf /var/lib/apt/lists/* + +# Copy the entire project (needed for package installation) +COPY .. /app + +# Install the main package and webapp dependencies +RUN pip install --no-cache-dir --user -e . && \ + pip install --no-cache-dir --user \ + fastapi>=0.104.0 \ + uvicorn[standard]>=0.24.0 \ + python-multipart>=0.0.6 \ + jinja2>=3.1.2 + +# Production stage +FROM python:3.12-slim + +# Set working directory +WORKDIR /app + +# Copy Python packages from builder +COPY --from=builder /root/.local /root/.local +COPY --from=builder /app /app + +# Make sure scripts in .local are usable +ENV PATH=/root/.local/bin:$PATH + +# Set Python path to find the package +ENV PYTHONPATH=/app:$PYTHONPATH + +# Change to webapp directory +WORKDIR /app/webapp + +# Expose port +EXPOSE 8000 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/stats')" || exit 1 + +# Run the application +CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/webapp/GHCR_SETUP.md b/webapp/GHCR_SETUP.md new file mode 100644 index 0000000..9513e2d --- /dev/null +++ b/webapp/GHCR_SETUP.md @@ -0,0 +1,433 @@ +# GitHub Container Registry (GHCR) Setup + +This guide walks you through setting up GitHub Container Registry (GHCR) publishing for the Knowledge Base Processor webapp via GitHub Actions. + +## Prerequisites + +- GitHub repository with admin access +- GitHub Actions enabled in your repository + +## Overview + +GitHub Container Registry (GHCR) is GitHub's native container registry service that integrates seamlessly with GitHub Actions. Unlike GCR, GHCR requires **no external setup** - it works automatically with your GitHub account using the built-in `GITHUB_TOKEN`. + +## Automatic Setup + +The good news: **GHCR publishing is already configured!** The GitHub Actions workflow in `.github/workflows/publish-webapp-ghcr.yml` will automatically: + +1. Build the Docker image on push to `main` or `claude/**` branches +2. Authenticate using the built-in `GITHUB_TOKEN` +3. Push the image to `ghcr.io/owner/repo/kb-processor-webapp` +4. Create multiple tags (latest, SHA, timestamp, branch) + +**No secrets to configure!** The `GITHUB_TOKEN` is automatically provided by GitHub Actions. + +## Enabling Package Visibility + +By default, packages are private. To make your image public: + +1. Go to your repository on GitHub +2. Navigate to "Packages" (in the right sidebar) +3. Click on `kb-processor-webapp` +4. Click "Package settings" (bottom of the page) +5. Scroll to "Danger Zone" +6. Click "Change visibility" +7. Select "Public" and confirm + +## Authentication Options + +### For CI/CD (GitHub Actions) + +Already configured! Uses the built-in `GITHUB_TOKEN`: + +```yaml +- name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} +``` + +### For Local Development + +**Option 1: Personal Access Token (Recommended)** + +1. Create a Personal Access Token (classic): + - Go to Settings → Developer settings → Personal access tokens → Tokens (classic) + - Click "Generate new token (classic)" + - Name: `GHCR Access` + - Select scopes: + - `read:packages` - Download images + - `write:packages` - Push images + - `delete:packages` - Delete images (optional) + - Click "Generate token" + - **Copy the token** (you won't see it again!) + +2. Authenticate Docker: + ```bash + echo "YOUR_TOKEN" | docker login ghcr.io -u YOUR_USERNAME --password-stdin + ``` + +**Option 2: GitHub CLI** + +```bash +# Install GitHub CLI if not already installed +# https://cli.github.com/ + +# Login to GitHub +gh auth login + +# Configure Docker to use GitHub CLI +gh auth token | docker login ghcr.io -u YOUR_USERNAME --password-stdin +``` + +**Option 3: Using `.netrc` (Unix/Linux)** + +```bash +# Create/edit ~/.netrc +cat >> ~/.netrc << EOF +machine ghcr.io +login YOUR_USERNAME +password YOUR_TOKEN +EOF + +chmod 600 ~/.netrc +``` + +## Image URLs + +Your images will be available at: + +``` +ghcr.io/OWNER/REPO/kb-processor-webapp:TAG +``` + +For example: +```bash +# Main branch (latest) +ghcr.io/dstengle/knowledgebase-processor/kb-processor-webapp:latest + +# Specific commit +ghcr.io/dstengle/knowledgebase-processor/kb-processor-webapp:a1b2c3d + +# Specific branch +ghcr.io/dstengle/knowledgebase-processor/kb-processor-webapp:claude-feature-branch +``` + +## Using the Published Image + +### Pull and Run + +```bash +# Pull the latest image +docker pull ghcr.io/OWNER/REPO/kb-processor-webapp:latest + +# Run the container +docker run -p 8000:8000 ghcr.io/OWNER/REPO/kb-processor-webapp:latest + +# Access the webapp +open http://localhost:8000 +``` + +### With Docker Compose + +Update `docker-compose.yml`: + +```yaml +version: '3.8' + +services: + webapp: + image: ghcr.io/OWNER/REPO/kb-processor-webapp:latest + ports: + - "8000:8000" + restart: unless-stopped +``` + +Then run: +```bash +docker-compose up +``` + +## Managing Packages + +### Via GitHub Web UI + +1. Go to your repository +2. Click "Packages" in the right sidebar +3. Click on `kb-processor-webapp` +4. View available tags, downloads, and manifest + +### List Tags (CLI) + +```bash +# Using GitHub CLI +gh api "/users/OWNER/packages/container/kb-processor-webapp%2Fversions" \ + --jq '.[].metadata.container.tags[]' + +# Or view in browser +open https://github.com/OWNER/REPO/pkgs/container/kb-processor-webapp +``` + +### Delete a Tag + +```bash +# Via GitHub web UI +1. Go to Package settings +2. Find the tag under "Manage versions" +3. Click the trash icon + +# Or via API +gh api -X DELETE \ + "/user/packages/container/kb-processor-webapp%2Fversions/VERSION_ID" +``` + +## Workflow Triggers + +The workflow automatically runs on: + +- **Push to main**: Creates `latest` and `stable` tags +- **Push to claude/** branches**: Creates branch-specific tags +- **Pull requests**: Builds but doesn't push (verification only) +- **Manual dispatch**: Allows custom tags via workflow_dispatch + +### Trigger Manual Build + +```bash +# Via GitHub CLI +gh workflow run publish-webapp-ghcr.yml + +# With custom tag +gh workflow run publish-webapp-ghcr.yml -f tag=v1.0.0 +``` + +Or via GitHub UI: +1. Go to "Actions" tab +2. Select "Build and Push Webapp to GHCR" +3. Click "Run workflow" +4. Enter optional custom tag +5. Click "Run workflow" + +## Package Permissions + +### Organization Repositories + +If your repo is in an organization, you may need to: + +1. Go to Organization Settings → Packages +2. Enable "Improved container support" +3. Set default permissions for packages +4. Grant repository access to packages + +### Repository Access + +Control who can push/pull images: + +1. Go to Package settings +2. Scroll to "Manage Actions access" +3. Add repositories that can push images +4. Set permissions (read, write, admin) + +## Troubleshooting + +### Permission Denied When Pushing + +**Issue**: `denied: permission_denied: write_package` + +**Solution**: Ensure the workflow has `packages: write` permission: + +```yaml +permissions: + contents: read + packages: write +``` + +This is already configured in the workflow. + +### Cannot Pull Private Image + +**Issue**: `unauthorized: authentication required` + +**Solutions**: +1. Make the package public (see "Enabling Package Visibility" above) +2. Authenticate with a PAT that has `read:packages` scope +3. Use the GitHub CLI for authentication + +### Image Not Found After Push + +**Issue**: Image doesn't appear immediately after push. + +**Solution**: +- Check the Actions tab for workflow status +- Verify the workflow completed successfully +- Package may take a few seconds to appear in the UI +- Check the package URL directly: `https://github.com/OWNER/REPO/pkgs/container/kb-processor-webapp` + +### Rate Limiting + +**Issue**: `rate limit exceeded` + +**Solution**: +- GHCR has generous rate limits (unlimited for public images when authenticated) +- Ensure you're authenticated even for public images +- Use caching in GitHub Actions (already configured) + +### Multi-Architecture Build Fails + +**Issue**: ARM64 build fails or times out. + +**Solution**: +Remove the platforms line to build for amd64 only: + +```yaml +# In .github/workflows/publish-webapp-ghcr.yml +# Change: +platforms: linux/amd64,linux/arm64 +# To: +platforms: linux/amd64 +``` + +## Cost and Limits + +**GitHub Container Registry Pricing (2024)**: + +- **Public images**: Unlimited storage and bandwidth (FREE) +- **Private images**: + - Free tier: 500MB storage, 1GB bandwidth/month + - Paid plans: $0.25/GB storage, $0.50/GB bandwidth +- **Actions minutes**: Counted against your GitHub Actions quota + +This project (~500MB image): +- **Public**: Completely free +- **Private**: ~$0.13/month for 1 image + +[GitHub Packages Pricing](https://docs.github.com/en/billing/managing-billing-for-github-packages/about-billing-for-github-packages) + +## Best Practices + +### 1. Use Descriptive Tags + +```bash +# Good +ghcr.io/owner/repo/kb-processor-webapp:v1.2.3 +ghcr.io/owner/repo/kb-processor-webapp:2024-01-15 + +# Avoid +ghcr.io/owner/repo/kb-processor-webapp:test +ghcr.io/owner/repo/kb-processor-webapp:latest-2 +``` + +### 2. Clean Up Old Images + +Regularly delete unused tags to save storage: +- Keep `latest`, `stable`, and recent versions +- Delete old branch-specific builds +- Delete test/debug builds + +### 3. Use Image Digests for Production + +For reproducible deployments: + +```bash +# Instead of: +docker pull ghcr.io/owner/repo/kb-processor-webapp:latest + +# Use digest (immutable): +docker pull ghcr.io/owner/repo/kb-processor-webapp@sha256:abc123... +``` + +### 4. Scan Images for Vulnerabilities + +Enable vulnerability scanning: +1. Go to repository Settings → Code security and analysis +2. Enable "Dependency graph" +3. Enable "Dependabot alerts" +4. Review security advisories regularly + +### 5. Use `.dockerignore` + +Already configured! Reduces build context and image size: +- Excludes `.git`, `__pycache__`, `*.pyc` +- Includes only necessary files + +## Comparison: GHCR vs GCR + +| Feature | GHCR | GCR | +|---------|------|-----| +| Setup | None (automatic) | Service account + secrets | +| Authentication | GITHUB_TOKEN | GCP service account key | +| Cost (public) | Free | ~$0.13/month | +| Integration | Native GitHub | External GCP project | +| Rate Limits | Generous | Based on GCP quota | +| Multi-arch | Yes | Yes | +| Vulnerability Scanning | Yes | Yes | +| Best For | GitHub-first workflows | Multi-cloud, GKE | + +## Advanced: Using Multiple Registries + +You can push to both GHCR and other registries: + +```yaml +# In workflow file +- name: Login to GHCR + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + +- name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_TOKEN }} + +- name: Build and push + uses: docker/build-push-action@v5 + with: + tags: | + ghcr.io/${{ github.repository }}/kb-processor-webapp:latest + dockerhub/username/kb-processor-webapp:latest +``` + +## Support and Resources + +- [GitHub Container Registry Docs](https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-container-registry) +- [GitHub Actions Docker Docs](https://docs.github.com/en/actions/publishing-packages/publishing-docker-images) +- [Docker Build Push Action](https://github.com/docker/build-push-action) +- [Package Issues](https://github.com/orgs/community/discussions/categories/packages) + +## Quick Reference + +```bash +# Login to GHCR +echo $GITHUB_TOKEN | docker login ghcr.io -u USERNAME --password-stdin + +# Pull image +docker pull ghcr.io/OWNER/REPO/kb-processor-webapp:latest + +# Run container +docker run -p 8000:8000 ghcr.io/OWNER/REPO/kb-processor-webapp:latest + +# View packages +open https://github.com/OWNER/REPO/pkgs/container/kb-processor-webapp + +# Trigger workflow +gh workflow run publish-webapp-ghcr.yml + +# List tags +gh api "/users/OWNER/packages/container/kb-processor-webapp%2Fversions" --jq '.[].metadata.container.tags[]' +``` + +## Summary + +GitHub Container Registry provides the simplest way to publish Docker images for GitHub-hosted projects: + +✅ **No setup required** - works automatically with `GITHUB_TOKEN` +✅ **Free for public images** - unlimited storage and bandwidth +✅ **Native integration** - seamless with GitHub Actions +✅ **Multi-architecture** - amd64 and arm64 support +✅ **Secure** - built-in security scanning and access controls + +Just push to your repository and let GitHub Actions handle the rest! diff --git a/webapp/README.md b/webapp/README.md new file mode 100644 index 0000000..3dabe57 --- /dev/null +++ b/webapp/README.md @@ -0,0 +1,376 @@ +# Knowledge Base Processor - Test Webapp + +A simple web application for testing the Knowledge Base Processor. Process markdown content and query the generated RDF graph through an intuitive web interface. + +## Features + +- **Process Markdown**: Upload files or paste markdown content directly +- **Generate RDF Graph**: Automatically converts markdown to knowledge graph +- **Query Interface**: Search and filter entities by type and properties +- **Entity Types Supported**: + - Documents (KbDocument) + - Todo Items (KbTodoItem) + - Headings (KbHeading) + - Sections (KbSection) + - Lists & List Items (KbList, KbListItem) + - Tables (KbTable) + - Code Blocks (KbCodeBlock) + - Blockquotes (KbBlockquote) + - Wiki Links (KbWikiLink) + - Named Entities (KbPerson, KbOrganization, KbLocation, KbDateEntity) +- **Export Graph**: Download RDF graph in Turtle format +- **Live Statistics**: View graph metrics in real-time + +## Installation + +### 1. Install Dependencies + +From the `webapp` directory: + +```bash +cd webapp +pip install -r requirements.txt +``` + +### 2. Install the Knowledge Base Processor + +If not already installed, install the main package: + +```bash +cd .. +pip install -e . +``` + +## Running the Webapp + +### Option 1: Docker (Recommended) + +**Using Docker Compose (easiest):** + +```bash +cd webapp +docker-compose up +``` + +**Using Docker directly:** + +```bash +# Build the image +docker build -t kb-processor-webapp -f webapp/Dockerfile . + +# Run the container +docker run -p 8000:8000 kb-processor-webapp +``` + +**Pull from GitHub Container Registry:** + +```bash +# Pull the latest published image (replace owner/repo with your repository) +docker pull ghcr.io/owner/repo/kb-processor-webapp:latest + +# Run the container +docker run -p 8000:8000 ghcr.io/owner/repo/kb-processor-webapp:latest +``` + +### Option 2: Local Python Environment + +**Start the Server:** + +From the `webapp` directory: + +```bash +python backend/main.py +``` + +Or using uvicorn directly: + +```bash +uvicorn backend.main:app --reload --host 0.0.0.0 --port 8000 +``` + +**Using the startup script:** + +```bash +cd webapp +./start.sh +``` + +### Access the Interface + +Open your browser and navigate to: + +``` +http://localhost:8000 +``` + +## Usage + +### 1. Process Content + +**Option A: Upload File** +- Click "Choose File" under "Upload Markdown File" +- Select a `.md` or `.txt` file +- File is automatically processed on selection + +**Option B: Paste Content** +- Paste or type markdown content in the text area +- Optionally specify a Document ID +- Click "Process Content" + +**Option C: Use Example** +- Click "Load Example Markdown" to see sample content +- Click "Process Content" to generate the graph + +### 2. Query Entities + +**Filter by Entity Type** +- Select an entity type from the dropdown (populated after processing) +- Click "Query Entities" + +**Search by Property** +- Enter a property name (e.g., `label`, `text`, `description`) +- Enter a search value (partial matches supported) +- Click "Search by Property" + +**Adjust Results** +- Change the "Result Limit" value (default: 50, max: 1000) + +### 3. View Results + +Results are displayed as cards showing: +- Entity type (color-coded badge) +- Entity URI +- All properties and their values + +### 4. Export & Manage + +- **Export Graph**: Downloads the current RDF graph as a `.ttl` file +- **Clear Graph**: Removes the current graph from memory + +## API Endpoints + +The webapp exposes a REST API: + +### Process Content + +```bash +POST /api/process +Content-Type: application/json + +{ + "content": "# My Document\n\n- [ ] Todo item", + "document_id": "optional-doc-id" +} +``` + +### Upload File + +```bash +POST /api/process/file +Content-Type: multipart/form-data + +file: +``` + +### Query Entities + +```bash +GET /api/entities?entity_type=KbTodoItem&limit=100 +``` + +### Search by Property + +```bash +GET /api/entities/search?property_name=label&property_value=todo&limit=100 +``` + +### Get Entity Types + +```bash +GET /api/entities/types +``` + +### Export Graph + +```bash +GET /api/graph/export?format=turtle +``` + +Supported formats: `turtle`, `json-ld`, `xml` + +### Get Statistics + +```bash +GET /api/stats +``` + +### Clear Graph + +```bash +DELETE /api/graph +``` + +## Example Markdown + +The webapp includes example markdown content demonstrating: + +- Headings and sections +- Todo items with assignees and priorities +- Wiki links +- Tables +- Code blocks +- Blockquotes +- Lists (ordered and unordered) +- Dates and named entities + +Click "Load Example Markdown" to see it in action. + +## Architecture + +### Backend (FastAPI) +- **main.py**: FastAPI application with REST API endpoints +- Uses the Knowledge Base Processor's `Processor` class directly +- Stores graph in memory for querying +- Returns JSON responses with entity data + +### Frontend (HTML/JS) +- **index.html**: Bootstrap-based responsive UI +- **app.js**: Vanilla JavaScript for API interactions +- No build process required - works in any modern browser + +## Docker Deployment + +### Building the Image + +The Docker image is automatically built and published to GitHub Container Registry (GHCR) via GitHub Actions on: +- Pushes to `main` branch +- Pushes to `claude/**` branches +- Changes to `webapp/**` or `knowledgebase_processor/**` directories + +**Manual build:** + +```bash +# From project root +docker build -t kb-processor-webapp -f webapp/Dockerfile . +``` + +### Image Tags + +Published images are tagged with: +- `latest` - Latest stable version from main branch +- `stable` - Alias for latest from main branch +- `` - Git commit SHA (e.g., `a1b2c3d`) +- `` - Build timestamp (e.g., `20240115-143022`) +- `` - Branch name for non-main branches + +### Multi-Architecture Support + +Images are built for: +- `linux/amd64` (x86_64) +- `linux/arm64` (ARM64/Apple Silicon) + +### Setting Up GHCR Publishing + +See [GHCR_SETUP.md](./GHCR_SETUP.md) for detailed instructions on: +1. Enabling GitHub Container Registry +2. Managing package permissions +3. Authentication options +4. Verifying deployments + +## Development + +### Run in Development Mode + +**With Docker Compose (recommended):** + +```bash +cd webapp +docker-compose up +``` + +Uncomment the volumes section in `docker-compose.yml` to enable hot-reload: + +```yaml +volumes: + - ./backend:/app/webapp/backend + - ./frontend:/app/webapp/frontend +``` + +**Without Docker:** + +```bash +uvicorn backend.main:app --reload +``` + +The `--reload` flag enables auto-reload on code changes. + +### CORS Configuration + +CORS is enabled for all origins by default for development. For production, update the CORS middleware in `backend/main.py`: + +```python +app.add_middleware( + CORSMiddleware, + allow_origins=["https://yourdomain.com"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) +``` + +### Health Checks + +The Docker image includes a health check endpoint at `/api/stats` that runs every 30 seconds: + +```bash +curl http://localhost:8000/api/stats +``` + +## Troubleshooting + +### Port Already in Use + +If port 8000 is occupied, specify a different port: + +```bash +uvicorn backend.main:app --port 8080 +``` + +### Import Errors + +Ensure the knowledge base processor is installed: + +```bash +pip install -e .. +``` + +### Empty Entity Type Dropdown + +Process some content first. Entity types are populated after processing. + +### Graph Not Persisting + +The graph is stored in memory and will be cleared when the server restarts. For persistence, consider: +- Adding a SPARQL endpoint backend +- Saving graphs to disk +- Using a triple store database + +## Future Enhancements + +Potential improvements: +- [ ] SPARQL query editor +- [ ] Graph visualization (D3.js, Cytoscape.js) +- [ ] Persistent storage with triple store +- [ ] Batch file processing +- [ ] Real-time processing with WebSockets +- [ ] Advanced filtering and sorting +- [ ] Export in multiple RDF formats +- [ ] Import existing RDF graphs + +## License + +Same as the main Knowledge Base Processor project. + +## Support + +For issues or questions, refer to the main project repository. diff --git a/webapp/backend/main.py b/webapp/backend/main.py new file mode 100644 index 0000000..f534a3a --- /dev/null +++ b/webapp/backend/main.py @@ -0,0 +1,319 @@ +""" +FastAPI backend for Knowledge Base Processor Test Webapp +""" +import os +import tempfile +from pathlib import Path +from typing import List, Optional, Dict, Any +from datetime import datetime + +from fastapi import FastAPI, File, UploadFile, HTTPException, Query +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse, FileResponse +from fastapi.staticfiles import StaticFiles +from pydantic import BaseModel + +# Import the processor +import sys +sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + +from knowledgebase_processor.processor import Processor +from knowledgebase_processor.utils import DocumentRegistry, EntityIdGenerator +from knowledgebase_processor.config.vocabulary import KB +from rdflib import Graph, Namespace, RDF, RDFS +from rdflib.namespace import FOAF, DCTERMS + +app = FastAPI(title="Knowledge Base Processor Test Webapp") + +# Enable CORS +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Mount static files +static_path = Path(__file__).parent.parent / "frontend" / "static" +app.mount("/static", StaticFiles(directory=str(static_path)), name="static") + +# Global state +current_graph: Optional[Graph] = None +processing_history: List[Dict[str, Any]] = [] + + +class ProcessRequest(BaseModel): + content: str + document_id: Optional[str] = None + + +class QueryRequest(BaseModel): + entity_type: Optional[str] = None + property_name: Optional[str] = None + property_value: Optional[str] = None + limit: int = 100 + + +@app.get("/") +async def read_root(): + """Serve the frontend HTML""" + frontend_path = Path(__file__).parent.parent / "frontend" / "index.html" + return FileResponse(str(frontend_path)) + + +@app.post("/api/process") +async def process_content(request: ProcessRequest): + """Process markdown content and generate RDF graph""" + global current_graph, processing_history + + try: + # Initialize processor + processor = Processor( + document_registry=DocumentRegistry(), + id_generator=EntityIdGenerator() + ) + + # Generate document ID if not provided + doc_id = request.document_id or f"doc_{datetime.now().strftime('%Y%m%d_%H%M%S')}" + + # Process content to graph + graph = processor.process_content_to_graph( + content=request.content, + document_id=doc_id + ) + + # Store the graph globally + current_graph = graph + + # Count entities by type + entity_counts = {} + for s, p, o in graph.triples((None, RDF.type, None)): + entity_type = str(o).split('/')[-1] + entity_counts[entity_type] = entity_counts.get(entity_type, 0) + 1 + + # Add to history + processing_history.append({ + "timestamp": datetime.now().isoformat(), + "document_id": doc_id, + "triple_count": len(graph), + "entity_counts": entity_counts + }) + + return { + "success": True, + "document_id": doc_id, + "triple_count": len(graph), + "entity_counts": entity_counts, + "message": f"Processed successfully. Generated {len(graph)} triples." + } + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/process/file") +async def process_file(file: UploadFile = File(...)): + """Process uploaded markdown file""" + try: + content = (await file.read()).decode('utf-8') + doc_id = Path(file.filename).stem + + return await process_content(ProcessRequest( + content=content, + document_id=doc_id + )) + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/entities") +async def query_entities( + entity_type: Optional[str] = Query(None, description="Filter by entity type (e.g., KbTodoItem, KbPerson)"), + limit: int = Query(100, ge=1, le=1000, description="Maximum number of results") +): + """Query entities from the current graph""" + global current_graph + + if current_graph is None: + raise HTTPException(status_code=400, detail="No graph available. Process content first.") + + try: + entities = [] + + # Build SPARQL query + if entity_type: + # Query for specific entity type + full_type = f"{KB}{entity_type}" if not entity_type.startswith("http") else entity_type + triples = current_graph.triples((None, RDF.type, None)) + filtered_triples = [(s, p, o) for s, p, o in triples if str(o) == full_type] + else: + # Query all entities + filtered_triples = list(current_graph.triples((None, RDF.type, None))) + + # Extract entity details + for subject, _, entity_type_uri in filtered_triples[:limit]: + entity = { + "uri": str(subject), + "type": str(entity_type_uri).split('/')[-1], + "properties": {} + } + + # Get all properties for this entity + for p, o in current_graph.predicate_objects(subject): + prop_name = str(p).split('/')[-1].split('#')[-1] + if prop_name not in ['type']: # Skip rdf:type + entity["properties"][prop_name] = str(o) + + entities.append(entity) + + return { + "success": True, + "count": len(entities), + "total_in_graph": len(list(current_graph.triples((None, RDF.type, None)))), + "entities": entities + } + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/entities/types") +async def get_entity_types(): + """Get all entity types in the current graph""" + global current_graph + + if current_graph is None: + raise HTTPException(status_code=400, detail="No graph available. Process content first.") + + try: + entity_types = set() + for s, p, o in current_graph.triples((None, RDF.type, None)): + entity_types.add(str(o).split('/')[-1]) + + return { + "success": True, + "types": sorted(list(entity_types)) + } + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/entities/search") +async def search_entities( + property_name: str = Query(..., description="Property name to search (e.g., label, text)"), + property_value: str = Query(..., description="Property value to match (partial match)"), + limit: int = Query(100, ge=1, le=1000) +): + """Search entities by property value""" + global current_graph + + if current_graph is None: + raise HTTPException(status_code=400, detail="No graph available. Process content first.") + + try: + matching_entities = [] + + # Search through all triples + for subject in set(current_graph.subjects()): + entity_properties = {} + entity_type = None + + # Get entity type and properties + for p, o in current_graph.predicate_objects(subject): + prop_name = str(p).split('/')[-1].split('#')[-1] + if str(p) == str(RDF.type): + entity_type = str(o).split('/')[-1] + else: + entity_properties[prop_name] = str(o) + + # Check if property matches + if property_name in entity_properties: + if property_value.lower() in entity_properties[property_name].lower(): + matching_entities.append({ + "uri": str(subject), + "type": entity_type, + "properties": entity_properties + }) + + if len(matching_entities) >= limit: + break + + return { + "success": True, + "count": len(matching_entities), + "entities": matching_entities + } + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/graph/export") +async def export_graph(format: str = Query("turtle", regex="^(turtle|json-ld|xml)$")): + """Export the current graph in various formats""" + global current_graph + + if current_graph is None: + raise HTTPException(status_code=400, detail="No graph available. Process content first.") + + try: + # Serialize graph + serialized = current_graph.serialize(format=format) + + # Set appropriate content type + content_types = { + "turtle": "text/turtle", + "json-ld": "application/ld+json", + "xml": "application/rdf+xml" + } + + return JSONResponse( + content={"success": True, "data": serialized}, + media_type=content_types.get(format, "text/plain") + ) + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/api/stats") +async def get_stats(): + """Get statistics about the current graph and processing history""" + global current_graph, processing_history + + stats = { + "current_graph": None, + "processing_history": processing_history + } + + if current_graph: + entity_types = {} + for s, p, o in current_graph.triples((None, RDF.type, None)): + entity_type = str(o).split('/')[-1] + entity_types[entity_type] = entity_types.get(entity_type, 0) + 1 + + stats["current_graph"] = { + "triple_count": len(current_graph), + "entity_types": entity_types, + "unique_subjects": len(set(current_graph.subjects())), + "unique_predicates": len(set(current_graph.predicates())), + "unique_objects": len(set(current_graph.objects())) + } + + return stats + + +@app.delete("/api/graph") +async def clear_graph(): + """Clear the current graph""" + global current_graph + current_graph = None + return {"success": True, "message": "Graph cleared"} + + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/webapp/docker-compose.yml b/webapp/docker-compose.yml new file mode 100644 index 0000000..79c80b4 --- /dev/null +++ b/webapp/docker-compose.yml @@ -0,0 +1,37 @@ +version: '3.8' + +services: + webapp: + # Option 1: Build locally + build: + context: .. + dockerfile: webapp/Dockerfile + + # Option 2: Use published image from GHCR (uncomment to use) + # image: ghcr.io/owner/repo/kb-processor-webapp:latest + + container_name: kb-processor-webapp + ports: + - "8000:8000" + environment: + - PYTHONUNBUFFERED=1 + restart: unless-stopped + healthcheck: + test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/stats')"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 5s + volumes: + # Optional: Mount local directory for development + # - ./backend:/app/webapp/backend + # - ./frontend:/app/webapp/frontend + - webapp-data:/data + +volumes: + webapp-data: + driver: local + +networks: + default: + name: kb-processor-network diff --git a/webapp/frontend/index.html b/webapp/frontend/index.html new file mode 100644 index 0000000..e648440 --- /dev/null +++ b/webapp/frontend/index.html @@ -0,0 +1,240 @@ + + + + + + Knowledge Base Processor - Test Webapp + + + + + +
+ +
+

Knowledge Base Processor

+

Test Webapp - Process Markdown and Query RDF Graph

+
+ + +
+
+
+
+
Triples
+ 0 +
+
+
+
+
+
+
Entities
+ 0 +
+
+
+
+
+
+
Entity Types
+ 0 +
+
+
+
+
+
+
Processed Docs
+ 0 +
+
+
+
+ +
+ +
+ +
+
+
Process Markdown
+
+
+ +
+ + +
+ +
+ - OR - +
+ + +
+ + +
+ + +
+ + +
+ + + + +
+
+ Processing... +
+

Processing markdown...

+
+ + +
+
+
+ + +
+
+
Example Content
+
+
+ + + Loads sample markdown with todos, headings, lists, tables, and wiki links. + +
+
+
+ + +
+ +
+
+
Query Entities
+
+
+ +
+ + +
+ + +
+
+ + +
+
+ + +
+
+ + +
+ + +
+ + +
+ + + + +
+
+
+
+
+ + +
+
+
+
+
Results 0
+
+
+

Process content and query entities to see results here.

+
+
+
+
+
+ + + + + diff --git a/webapp/frontend/static/app.js b/webapp/frontend/static/app.js new file mode 100644 index 0000000..a5e8093 --- /dev/null +++ b/webapp/frontend/static/app.js @@ -0,0 +1,435 @@ +// Knowledge Base Processor - Frontend Application + +const API_BASE = ''; + +// Example markdown content +const EXAMPLE_MARKDOWN = `# Project Planning Document + +## Overview +This is a sample document to demonstrate the Knowledge Base Processor. + +## Tasks +- [ ] Design the database schema @john +- [x] Set up development environment @jane !high +- [ ] Implement user authentication !critical +- [ ] Write unit tests @team +- [x] Create API documentation + +## Team Members +Contact [[John Smith]] or [[Jane Doe]] for more information. + +## Features +1. User authentication with JWT +2. RESTful API endpoints +3. Real-time data synchronization +4. Advanced search capabilities + +## Technical Specs + +| Component | Technology | Status | +|-----------|-----------|--------| +| Backend | Python/FastAPI | In Progress | +| Frontend | React | Planned | +| Database | PostgreSQL | Completed | + +## Code Example + +\`\`\`python +def process_document(content: str): + """Process markdown content""" + processor = Processor() + return processor.process(content) +\`\`\` + +## Important Dates +- Project kickoff: 2024-01-15 +- Alpha release: 2024-03-01 +- Beta testing: 2024-04-15 + +> Note: This is a high-priority project. All deadlines are firm. + +## Related Documents +- [[Architecture Design]] +- [[API Specification]] +- [[Testing Strategy]] +`; + +// DOM Elements +let processBtn, queryBtn, searchBtn, exportBtn, clearBtn, loadExampleBtn; +let markdownInput, fileInput, docIdInput, entityTypeFilter, propertyName, propertyValue, resultLimit; +let resultsContainer, processResult, processingSpinner; +let tripleCount, entityCount, typeCount, docCount, resultCount; + +// Initialize on page load +document.addEventListener('DOMContentLoaded', () => { + initializeElements(); + attachEventListeners(); + loadEntityTypes(); + updateStats(); +}); + +function initializeElements() { + // Buttons + processBtn = document.getElementById('processBtn'); + queryBtn = document.getElementById('queryBtn'); + searchBtn = document.getElementById('searchBtn'); + exportBtn = document.getElementById('exportBtn'); + clearBtn = document.getElementById('clearBtn'); + loadExampleBtn = document.getElementById('loadExample'); + + // Inputs + markdownInput = document.getElementById('markdownInput'); + fileInput = document.getElementById('fileInput'); + docIdInput = document.getElementById('docId'); + entityTypeFilter = document.getElementById('entityTypeFilter'); + propertyName = document.getElementById('propertyName'); + propertyValue = document.getElementById('propertyValue'); + resultLimit = document.getElementById('resultLimit'); + + // Display elements + resultsContainer = document.getElementById('resultsContainer'); + processResult = document.getElementById('processResult'); + processingSpinner = document.getElementById('processingSpinner'); + + // Stats + tripleCount = document.getElementById('tripleCount'); + entityCount = document.getElementById('entityCount'); + typeCount = document.getElementById('typeCount'); + docCount = document.getElementById('docCount'); + resultCount = document.getElementById('resultCount'); +} + +function attachEventListeners() { + processBtn.addEventListener('click', processContent); + queryBtn.addEventListener('click', queryEntities); + searchBtn.addEventListener('click', searchEntities); + exportBtn.addEventListener('click', exportGraph); + clearBtn.addEventListener('click', clearGraph); + loadExampleBtn.addEventListener('click', loadExample); + fileInput.addEventListener('change', handleFileUpload); +} + +async function processContent() { + const content = markdownInput.value.trim(); + + if (!content) { + showAlert('Please enter some markdown content.', 'warning'); + return; + } + + try { + // Show spinner + processingSpinner.style.display = 'block'; + processBtn.disabled = true; + + const response = await fetch(`${API_BASE}/api/process`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + content: content, + document_id: docIdInput.value || null + }) + }); + + const data = await response.json(); + + if (data.success) { + showAlert(data.message, 'success'); + displayProcessingResult(data); + await loadEntityTypes(); + await updateStats(); + } else { + showAlert('Processing failed: ' + (data.detail || 'Unknown error'), 'danger'); + } + } catch (error) { + showAlert('Error: ' + error.message, 'danger'); + } finally { + processingSpinner.style.display = 'none'; + processBtn.disabled = false; + } +} + +async function handleFileUpload(event) { + const file = event.target.files[0]; + if (!file) return; + + try { + processingSpinner.style.display = 'block'; + processBtn.disabled = true; + + const formData = new FormData(); + formData.append('file', file); + + const response = await fetch(`${API_BASE}/api/process/file`, { + method: 'POST', + body: formData + }); + + const data = await response.json(); + + if (data.success) { + showAlert(`File processed: ${data.message}`, 'success'); + displayProcessingResult(data); + await loadEntityTypes(); + await updateStats(); + } else { + showAlert('File processing failed: ' + (data.detail || 'Unknown error'), 'danger'); + } + } catch (error) { + showAlert('Error: ' + error.message, 'danger'); + } finally { + processingSpinner.style.display = 'none'; + processBtn.disabled = false; + fileInput.value = ''; + } +} + +async function queryEntities() { + const entityType = entityTypeFilter.value; + const limit = parseInt(resultLimit.value) || 100; + + try { + queryBtn.disabled = true; + const params = new URLSearchParams(); + if (entityType) params.append('entity_type', entityType); + params.append('limit', limit); + + const response = await fetch(`${API_BASE}/api/entities?${params}`); + const data = await response.json(); + + if (data.success) { + displayEntities(data.entities, data.count, data.total_in_graph); + } else { + showAlert('Query failed: ' + (data.detail || 'Unknown error'), 'danger'); + } + } catch (error) { + showAlert('Error: ' + error.message, 'danger'); + } finally { + queryBtn.disabled = false; + } +} + +async function searchEntities() { + const propName = propertyName.value.trim(); + const propValue = propertyValue.value.trim(); + const limit = parseInt(resultLimit.value) || 100; + + if (!propName || !propValue) { + showAlert('Please enter both property name and value to search.', 'warning'); + return; + } + + try { + searchBtn.disabled = true; + const params = new URLSearchParams({ + property_name: propName, + property_value: propValue, + limit: limit + }); + + const response = await fetch(`${API_BASE}/api/entities/search?${params}`); + const data = await response.json(); + + if (data.success) { + displayEntities(data.entities, data.count); + } else { + showAlert('Search failed: ' + (data.detail || 'Unknown error'), 'danger'); + } + } catch (error) { + showAlert('Error: ' + error.message, 'danger'); + } finally { + searchBtn.disabled = false; + } +} + +async function loadEntityTypes() { + try { + const response = await fetch(`${API_BASE}/api/entities/types`); + const data = await response.json(); + + if (data.success) { + // Update dropdown + entityTypeFilter.innerHTML = ''; + data.types.forEach(type => { + const option = document.createElement('option'); + option.value = type; + option.textContent = type; + entityTypeFilter.appendChild(option); + }); + } + } catch (error) { + console.error('Failed to load entity types:', error); + } +} + +async function updateStats() { + try { + const response = await fetch(`${API_BASE}/api/stats`); + const data = await response.json(); + + if (data.current_graph) { + tripleCount.textContent = data.current_graph.triple_count; + entityCount.textContent = data.current_graph.unique_subjects; + typeCount.textContent = Object.keys(data.current_graph.entity_types).length; + } + + if (data.processing_history) { + docCount.textContent = data.processing_history.length; + } + } catch (error) { + console.error('Failed to update stats:', error); + } +} + +async function exportGraph() { + try { + exportBtn.disabled = true; + const response = await fetch(`${API_BASE}/api/graph/export?format=turtle`); + const data = await response.json(); + + if (data.success) { + // Download the RDF data + const blob = new Blob([data.data], { type: 'text/turtle' }); + const url = window.URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = `kb_graph_${new Date().toISOString()}.ttl`; + a.click(); + window.URL.revokeObjectURL(url); + showAlert('Graph exported successfully!', 'success'); + } else { + showAlert('Export failed: ' + (data.detail || 'Unknown error'), 'danger'); + } + } catch (error) { + showAlert('Error: ' + error.message, 'danger'); + } finally { + exportBtn.disabled = false; + } +} + +async function clearGraph() { + if (!confirm('Are you sure you want to clear the current graph?')) { + return; + } + + try { + clearBtn.disabled = true; + const response = await fetch(`${API_BASE}/api/graph`, { + method: 'DELETE' + }); + const data = await response.json(); + + if (data.success) { + showAlert('Graph cleared successfully!', 'success'); + resultsContainer.innerHTML = '

Graph cleared. Process content to create a new graph.

'; + resultCount.textContent = '0'; + await updateStats(); + await loadEntityTypes(); + } + } catch (error) { + showAlert('Error: ' + error.message, 'danger'); + } finally { + clearBtn.disabled = false; + } +} + +function loadExample() { + markdownInput.value = EXAMPLE_MARKDOWN; + showAlert('Example markdown loaded! Click "Process Content" to generate the graph.', 'info'); +} + +function displayProcessingResult(data) { + let html = ` +
+
Processing Complete
+
    +
  • Document ID: ${data.document_id}
  • +
  • Total Triples: ${data.triple_count}
  • +
  • Entity Types: ${Object.keys(data.entity_counts).length}
  • +
+
+ `; + + if (data.entity_counts && Object.keys(data.entity_counts).length > 0) { + html += '
Entity Breakdown:
'; + for (const [type, count] of Object.entries(data.entity_counts)) { + html += `${type}: ${count} `; + } + html += '
'; + } + + processResult.innerHTML = html; +} + +function displayEntities(entities, count, total = null) { + if (!entities || entities.length === 0) { + resultsContainer.innerHTML = '

No entities found.

'; + resultCount.textContent = '0'; + return; + } + + let html = ''; + + if (total !== null) { + html += `
Showing ${count} of ${total} entities
`; + } else { + html += `
Found ${count} matching entities
`; + } + + entities.forEach(entity => { + html += ` +
+
+
+ ${entity.type} + ${entity.uri} +
+
+ `; + + // Display properties + for (const [key, value] of Object.entries(entity.properties)) { + const displayValue = value.length > 100 ? value.substring(0, 100) + '...' : value; + html += ` +
+ ${key}: ${escapeHtml(displayValue)} +
+ `; + } + + html += ` +
+
+
+ `; + }); + + resultsContainer.innerHTML = html; + resultCount.textContent = count; +} + +function showAlert(message, type = 'info') { + const alertDiv = document.createElement('div'); + alertDiv.className = `alert alert-${type} alert-dismissible fade show`; + alertDiv.innerHTML = ` + ${message} + + `; + + processResult.innerHTML = ''; + processResult.appendChild(alertDiv); + + // Auto-dismiss after 5 seconds + setTimeout(() => { + alertDiv.classList.remove('show'); + setTimeout(() => alertDiv.remove(), 300); + }, 5000); +} + +function escapeHtml(text) { + const div = document.createElement('div'); + div.textContent = text; + return div.innerHTML; +} diff --git a/webapp/requirements.txt b/webapp/requirements.txt new file mode 100644 index 0000000..a0fde3c --- /dev/null +++ b/webapp/requirements.txt @@ -0,0 +1,4 @@ +fastapi>=0.104.0 +uvicorn[standard]>=0.24.0 +python-multipart>=0.0.6 +jinja2>=3.1.2 diff --git a/webapp/start.sh b/webapp/start.sh new file mode 100755 index 0000000..1bdede0 --- /dev/null +++ b/webapp/start.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# Start the Knowledge Base Processor webapp + +echo "Knowledge Base Processor - Test Webapp" +echo "======================================" +echo "" + +# Check if dependencies are installed +if ! python -c "import fastapi" 2>/dev/null; then + echo "Installing dependencies..." + pip install -r requirements.txt +fi + +# Check if main package is installed +if ! python -c "import knowledgebase_processor" 2>/dev/null; then + echo "Installing knowledgebase-processor..." + cd .. && pip install -e . && cd webapp +fi + +echo "" +echo "Starting server on http://localhost:8000" +echo "Press Ctrl+C to stop" +echo "" + +# Start the server +uvicorn backend.main:app --host 0.0.0.0 --port 8000 --reload diff --git a/webapp/test_webapp.py b/webapp/test_webapp.py new file mode 100644 index 0000000..39e699c --- /dev/null +++ b/webapp/test_webapp.py @@ -0,0 +1,85 @@ +""" +Quick test script for the webapp backend +""" +import sys +from pathlib import Path + +# Add parent directory to path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from knowledgebase_processor.processor import Processor +from knowledgebase_processor.utils import DocumentRegistry, EntityIdGenerator +from rdflib import RDF + +# Test markdown content +TEST_MARKDOWN = """# Test Document + +## Tasks +- [ ] Implement feature A +- [x] Complete feature B + +## People +Contact [[John Doe]] for questions. + +## Data Table +| Name | Status | +|------|--------| +| Task 1 | Done | +| Task 2 | Pending | + +```python +def hello(): + print("Hello, World!") +``` + +> Important: This is a test document +""" + +print("Testing Knowledge Base Processor...") +print("=" * 50) + +# Initialize processor +processor = Processor( + document_registry=DocumentRegistry(), + id_generator=EntityIdGenerator() +) + +# Process content +print("\n1. Processing markdown content...") +graph = processor.process_content_to_graph( + content=TEST_MARKDOWN, + document_id="test_doc" +) + +print(f"✓ Generated {len(graph)} triples") + +# Count entity types +print("\n2. Analyzing entity types...") +entity_types = {} +for s, p, o in graph.triples((None, RDF.type, None)): + entity_type = str(o).split('/')[-1] + entity_types[entity_type] = entity_types.get(entity_type, 0) + 1 + +print("✓ Entity breakdown:") +for entity_type, count in sorted(entity_types.items()): + print(f" - {entity_type}: {count}") + +# Test querying +print("\n3. Testing entity queries...") +todo_items = [] +for s, p, o in graph.triples((None, RDF.type, None)): + if 'TodoItem' in str(o): + todo_items.append(s) + +print(f"✓ Found {len(todo_items)} todo items") + +# Test export +print("\n4. Testing RDF export...") +ttl_output = graph.serialize(format='turtle') +print(f"✓ Exported {len(ttl_output)} bytes of Turtle RDF") + +print("\n" + "=" * 50) +print("✅ All tests passed!") +print("\nWebapp backend is ready to use.") +print("Run: python backend/main.py") +print("Or: uvicorn backend.main:app --reload")