diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..1ce7525 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,56 @@ +# Git files +.git +.gitignore +.github + +# Documentation +README.md +CLAUDE.md +LICENSE +CHANGELOG.md +*.md + +# Development files +*.pyc +__pycache__ +.pytest_cache +.coverage +htmlcov +.env +.env.* + +# IDE files +.vscode +.idea +*.swp +*.swo +*.swn +.DS_Store + +# Local data and logs +data/ +logs/ +*.db +*.log + +# Config files (except template) +config.json + +# Test files +test_*.py +tests/ +debug_*.py + +# Build artifacts +build/ +dist/ +*.egg-info/ + +# Systemd files +systemd/ +*.service + +# Docker files (not needed in build context) +docker-compose.yml +docker-compose.*.yml +Dockerfile.* diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..44d4351 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,55 @@ +# EditorConfig is awesome: https://EditorConfig.org + +# top-most EditorConfig file +root = true + +# Unix-style newlines with a newline ending every file +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +# Python files +[*.py] +indent_style = space +indent_size = 4 + +# JSON files +[*.json] +indent_style = space +indent_size = 2 + +# YAML files +[*.{yml,yaml}] +indent_style = space +indent_size = 2 + +# Markdown files +[*.md] +trim_trailing_whitespace = false +max_line_length = off + +# Shell scripts +[*.sh] +indent_style = space +indent_size = 2 + +# Dockerfile +[Dockerfile*] +indent_style = space +indent_size = 2 + +# Docker Compose +[docker-compose*.yml] +indent_style = space +indent_size = 2 + +# Makefile +[Makefile] +indent_style = tab + +# Configuration files +[*.{cfg,ini,toml}] +indent_style = space +indent_size = 2 diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml new file mode 100644 index 0000000..c1c8da8 --- /dev/null +++ b/.github/workflows/docker-build.yml @@ -0,0 +1,123 @@ +name: Docker Build and Publish + +on: + push: + branches: + - main + - develop + - 'feature/docker-*' + tags: + - 'v*' + pull_request: + branches: + - main + workflow_dispatch: + inputs: + push: + description: 'Push images to registry' + required: false + default: 'false' + type: choice + options: + - 'true' + - 'false' + +env: + REGISTRY: ghcr.io + IMAGE_NAME: bakerboy448/redditmodlog + +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + id-token: write + outputs: + image-tag: ${{ steps.meta.outputs.version }} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GitHub Container Registry + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=raw,value=latest,enable={{is_default_branch}} + type=sha + labels: | + org.opencontainers.image.title=Reddit ModLog Wiki Publisher + org.opencontainers.image.description=Automated Reddit moderation log publisher to wiki pages + org.opencontainers.image.vendor=bakerboy448 + org.opencontainers.image.licenses=GPL-3.0 + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + platforms: linux/amd64,linux/arm64 + push: ${{ github.event_name != 'pull_request' || github.event.inputs.push == 'true' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + build-args: | + BUILD_DATE=${{ github.event.head_commit.timestamp }} + VCS_REF=${{ github.sha }} + VERSION=${{ steps.meta.outputs.version }} + + - name: Generate SBOM + if: github.event_name != 'pull_request' + uses: anchore/sbom-action@v0 + with: + image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.meta.outputs.version }} + format: spdx-json + output-file: sbom.spdx.json + + - name: Upload SBOM + if: github.event_name != 'pull_request' + uses: actions/upload-artifact@v4 + with: + name: sbom + path: sbom.spdx.json + + security-scan: + needs: build + runs-on: ubuntu-latest + if: github.event_name != 'pull_request' + permissions: + contents: read + security-events: write + + steps: + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@master + with: + image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ needs.build.outputs.image-tag }} + format: 'sarif' + output: 'trivy-results.sarif' + + - name: Upload Trivy results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v3 + if: always() + with: + sarif_file: 'trivy-results.sarif' diff --git a/.gitignore b/.gitignore index fe7dfdd..e85be1c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,12 +2,16 @@ config.json *.config.json config.*.json +/etc/redditmodlog/*.json +!config_template.json +!*.json.example # Database files *.db *.db-journal *.sqlite *.sqlite3 +data/ # Python __pycache__/ @@ -38,7 +42,13 @@ venv/ ENV/ env/ .venv + +# Environment files with credentials .env +.env.* +*.env +!example.env +!*.env.example # IDE .vscode/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..2eca111 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,60 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + - id: check-merge-conflict + - id: debug-statements + - id: check-executables-have-shebangs + + - repo: https://github.com/psf/black + rev: 23.7.0 + hooks: + - id: black + language_version: python3 + args: [--line-length=180] + + - repo: https://github.com/pycqa/flake8 + rev: 6.0.0 + hooks: + - id: flake8 + args: ['--max-line-length=180', '--ignore=E203,W503,E231,E226,E241,E722,F401,F403,F405,F541,F811,E402'] + + - repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + args: [--profile=black] + + - repo: https://github.com/Yelp/detect-secrets + rev: v1.4.0 + hooks: + - id: detect-secrets + args: ['--baseline', '.secrets.baseline'] + exclude: | + (?x)^( + config_template\.json| + \.secrets\.baseline| + systemd/.*| + README\.md| + CLAUDE\.md + )$ + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.4.1 + hooks: + - id: mypy + additional_dependencies: [types-requests] + args: [--ignore-missing-imports] + + - repo: local + hooks: + - id: reddit-config-check + name: Check Reddit config safety + entry: python -c "import json; config = json.load(open('config.json')) if __import__('os').path.exists('config.json') else {}; exit(1) if not config.get('anonymize_moderators', True) else exit(0)" + language: system + files: config\.json$ + pass_filenames: false diff --git a/.secrets.baseline b/.secrets.baseline new file mode 100644 index 0000000..4c62e11 --- /dev/null +++ b/.secrets.baseline @@ -0,0 +1,6 @@ +{ + "version": "1.4.0", + "plugins_used": [], + "filters_used": [], + "results": {} +} diff --git a/CLAUDE.md b/CLAUDE.md index f688b4d..047c11b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -40,7 +40,7 @@ cp config_template.json config.json /opt/.venv/redditbot/bin/python modlog_wiki_publisher.py --source-subreddit SUBREDDIT_NAME --force-wiki # Debug authentication issues -/opt/.venv/redditbot/bin/python debug_auth.py +/opt/.venv/redditbot/bin/python scripts/debug_auth.py ``` ### Database Operations @@ -51,7 +51,7 @@ sqlite3 modlog.db "SELECT action_id, action_type, moderator, removal_reason, sub # View actions by subreddit sqlite3 modlog.db "SELECT action_type, moderator, target_author, removal_reason FROM processed_actions WHERE subreddit = 'usenet' ORDER BY created_at DESC LIMIT 5;" -# Track content lifecycle by target ID +# Track content lifecycle by target ID sqlite3 modlog.db "SELECT target_id, action_type, moderator, removal_reason, datetime(created_at, 'unixepoch') FROM processed_actions WHERE target_id LIKE '%1mkz4jm%' ORDER BY created_at;" # Manual cleanup of old entries @@ -62,7 +62,7 @@ sqlite3 modlog.db "DELETE FROM processed_actions WHERE created_at < date('now', The application supports multiple configuration methods with the following priority (highest to lowest): 1. **Command line arguments** (highest priority) -2. **Environment variables** (override config file) +2. **Environment variables** (override config file) 3. **JSON config file** (base configuration) ### Environment Variables @@ -71,7 +71,7 @@ All configuration options can be set via environment variables: #### Reddit Credentials - `REDDIT_CLIENT_ID`: Reddit app client ID -- `REDDIT_CLIENT_SECRET`: Reddit app client secret +- `REDDIT_CLIENT_SECRET`: Reddit app client secret - `REDDIT_USERNAME`: Reddit bot username - `REDDIT_PASSWORD`: Reddit bot password @@ -132,7 +132,7 @@ python modlog_wiki_publisher.py --debug --batch-size 25 # CLI takes priority ### Display Options - `anonymize_moderators`: **REQUIRED** to be `true` for security (default: true) - - `true` (ENFORCED): Shows "AutoModerator", "Reddit", or "HumanModerator" + - `true` (ENFORCED): Shows "AutoModerator", "Reddit", or "HumanModerator" - `false`: **BLOCKED** - Would expose moderator identities publicly **SECURITY NOTE**: Setting `anonymize_moderators=false` is permanently disabled to protect moderator privacy. The application will refuse to start if this is attempted. @@ -143,7 +143,7 @@ The application uses configurable action type variables for flexibility: #### Default Configuration - **REMOVAL_ACTIONS**: `removelink`, `removecomment`, `spamlink`, `spamcomment` -- **APPROVAL_ACTIONS**: `approvelink`, `approvecomment` +- **APPROVAL_ACTIONS**: `approvelink`, `approvecomment` - **REASON_ACTIONS**: `addremovalreason` - **DEFAULT_WIKI_ACTIONS**: All above combined @@ -171,9 +171,10 @@ The bot account needs: ## File Structure - `modlog_wiki_publisher.py`: Main application -- `debug_auth.py`: Authentication debugging utility +- `scripts/debug_auth.py`: Authentication debugging utility +- `tests/test_removal_reasons.py`: Test suite for removal reason processing - `config.json`: Runtime configuration (created from template) -- `modlog.db`: SQLite database for processed actions +- `data/`: Runtime data directory (database files) - `logs/`: Per-subreddit log files - `requirements.txt`: Python dependencies @@ -184,7 +185,7 @@ Use `--test` flag to verify configuration and Reddit API connectivity without ma ## Content Link Guidelines **CRITICAL**: Content links in the modlog should NEVER point to user profiles (`/u/username`). Links should only point to: -- Actual removed posts (`/comments/postid/`) +- Actual removed posts (`/comments/postid/`) - Actual removed comments (`/comments/postid/_/commentid/`) - No link at all if no actual content is available @@ -231,14 +232,14 @@ User profile links are a privacy concern and not useful for modlog purposes. ### Content Linking and Display - ✅ Content links point to actual Reddit posts/comments, never user profiles for privacy -- ✅ Fixed target authors showing as [deleted] - now displays actual usernames +- ✅ Fixed target authors showing as [deleted] - now displays actual usernames - ✅ Proper content titles extracted from Reddit API data - ✅ AutoModerator displays as "AutoModerator" (not anonymized) - ✅ Configurable anonymization for human moderators ### Data Integrity - ✅ Pipe character escaping for markdown table compatibility -- ✅ Robust error handling for mixed subreddit scenarios +- ✅ Robust error handling for mixed subreddit scenarios - ✅ Database schema at version 5 with all required columns - ✅ Consistent Reddit API field usage (action.details vs action.description) @@ -266,4 +267,4 @@ User profile links are a privacy concern and not useful for modlog purposes. - **401 errors**: Check app type is "script" and verify client_id/client_secret - **Wiki permission denied**: Ensure bot has moderator or wiki contributor access - **Rate limiting**: Increase `--interval` and/or reduce `--batch-size` -- **Module not found**: Always use `/opt/.venv/redditbot/bin/python` instead of system python \ No newline at end of file +- **Module not found**: Always use `/opt/.venv/redditbot/bin/python` instead of system python diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..9417b12 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,145 @@ +# Build stage +FROM python:3.11-slim AS builder + +# Install build dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +# Create virtual environment +RUN python -m venv /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +# Install Python dependencies +COPY requirements.txt /tmp/ +RUN pip install --no-cache-dir -r /tmp/requirements.txt + +# Runtime stage +FROM python:3.11-slim + +# OCI Labels +LABEL org.opencontainers.image.title="Reddit ModLog Wiki Publisher" \ + org.opencontainers.image.description="Automated Reddit moderation log publisher to wiki pages" \ + org.opencontainers.image.authors="bakerboy448" \ + org.opencontainers.image.source="https://github.com/bakerboy448/RedditModLog" \ + org.opencontainers.image.documentation="https://github.com/bakerboy448/RedditModLog/blob/main/README.md" \ + org.opencontainers.image.licenses="GPL-3.0" \ + org.opencontainers.image.vendor="bakerboy448" \ + org.opencontainers.image.base.name="python:3.11-slim" + +# Install runtime dependencies and s6-overlay for user management +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + xz-utils \ + && rm -rf /var/lib/apt/lists/* + +# Install s6-overlay for proper init and user management +ARG S6_OVERLAY_VERSION=3.1.6.2 +ARG TARGETARCH +ADD https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}/s6-overlay-noarch.tar.xz /tmp +RUN case ${TARGETARCH} in \ + "amd64") S6_ARCH=x86_64 ;; \ + "arm64") S6_ARCH=aarch64 ;; \ + "arm/v7") S6_ARCH=arm ;; \ + *) echo "Unsupported architecture: ${TARGETARCH}" && exit 1 ;; \ + esac && \ + curl -L "https://github.com/just-containers/s6-overlay/releases/download/v${S6_OVERLAY_VERSION}/s6-overlay-${S6_ARCH}.tar.xz" -o /tmp/s6-overlay-arch.tar.xz && \ + tar -C / -Jxpf /tmp/s6-overlay-noarch.tar.xz && \ + tar -C / -Jxpf /tmp/s6-overlay-arch.tar.xz && \ + rm /tmp/s6-overlay-*.tar.xz + +# Create default user and group +RUN groupadd -g 1000 modlogbot && \ + useradd -u 1000 -g modlogbot -d /config -s /bin/bash modlogbot + +# Copy virtual environment from builder +COPY --from=builder /opt/venv /opt/venv + +# Set environment variables +ENV PATH="/opt/venv/bin:$PATH" \ + PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PUID=1000 \ + PGID=1000 \ + S6_CMD_WAIT_FOR_SERVICES_MAXTIME=0 \ + DATABASE_PATH=/config/data/modlog.db \ + LOGS_DIR=/config/logs + +# Create application directories +RUN mkdir -p /config /config/data /config/logs /app /etc/s6-overlay/s6-rc.d/modlog-bot /etc/s6-overlay/s6-rc.d/init-modlogbot /etc/s6-overlay/scripts + +# Create s6 init script for user/group management +RUN echo '#!/command/with-contenv bash\n\ +set -e\n\ +\n\ +# Validate critical environment variables\n\ +echo "Validating required environment variables..."\n\ +\n\ +missing_vars=()\n\ +\n\ +[ -z "$REDDIT_CLIENT_ID" ] && missing_vars+=("REDDIT_CLIENT_ID")\n\ +[ -z "$REDDIT_CLIENT_SECRET" ] && missing_vars+=("REDDIT_CLIENT_SECRET")\n\ +[ -z "$REDDIT_USERNAME" ] && missing_vars+=("REDDIT_USERNAME")\n\ +[ -z "$REDDIT_PASSWORD" ] && missing_vars+=("REDDIT_PASSWORD")\n\ +[ -z "$SOURCE_SUBREDDIT" ] && missing_vars+=("SOURCE_SUBREDDIT")\n\ +\n\ +if [ ${#missing_vars[@]} -ne 0 ]; then\n\ + echo "ERROR: Missing required environment variables:" >&2\n\ + printf " - %s\n" "${missing_vars[@]}" >&2\n\ + echo "" >&2\n\ + echo "Please set all required environment variables and restart the container." >&2\n\ + exit 1\n\ +fi\n\ +\n\ +echo "All required environment variables are set."\n\ +\n\ +PUID=${PUID:-1000}\n\ +PGID=${PGID:-1000}\n\ +\n\ +echo "Setting UID:GID to ${PUID}:${PGID}"\n\ +\n\ +# Update user and group IDs\n\ +groupmod -o -g "$PGID" modlogbot\n\ +usermod -o -u "$PUID" modlogbot\n\ +\n\ +# Fix ownership\n\ +echo "Fixing ownership of /config and /app"\n\ +chown -R modlogbot:modlogbot /config /app\n\ +\n\ +# Ensure data directory has correct permissions\n\ +if [ ! -f /config/data/modlog.db ]; then\n\ + echo "Initializing database directory"\n\ + touch /config/data/modlog.db\n\ + chown modlogbot:modlogbot /config/data/modlog.db\n\ +fi' > /etc/s6-overlay/scripts/init-modlogbot-run && \ + chmod +x /etc/s6-overlay/scripts/init-modlogbot-run + +# Create s6 service run script +RUN echo '#!/command/with-contenv bash\n\ +cd /app\n\ +exec s6-setuidgid modlogbot python modlog_wiki_publisher.py --continuous' > /etc/s6-overlay/scripts/modlog-bot-run && \ + chmod +x /etc/s6-overlay/scripts/modlog-bot-run + +# Setup s6 service definitions +RUN echo 'oneshot' > /etc/s6-overlay/s6-rc.d/init-modlogbot/type && \ + echo '/etc/s6-overlay/scripts/init-modlogbot-run' > /etc/s6-overlay/s6-rc.d/init-modlogbot/up && \ + echo 'longrun' > /etc/s6-overlay/s6-rc.d/modlog-bot/type && \ + echo '/etc/s6-overlay/scripts/modlog-bot-run' > /etc/s6-overlay/s6-rc.d/modlog-bot/run && \ + echo 'init-modlogbot' > /etc/s6-overlay/s6-rc.d/modlog-bot/dependencies && \ + touch /etc/s6-overlay/s6-rc.d/user/contents.d/init-modlogbot && \ + touch /etc/s6-overlay/s6-rc.d/user/contents.d/modlog-bot + +# Set working directory +WORKDIR /app + +# Copy application files +COPY --chown=modlogbot:modlogbot modlog_wiki_publisher.py /app/ +COPY --chown=modlogbot:modlogbot config_template.json /app/ + +# Health check +HEALTHCHECK --interval=5m --timeout=10s --start-period=30s --retries=3 \ + CMD python -c "import os, sys; sys.exit(0 if os.path.exists(os.getenv('DATABASE_PATH', '/config/data/modlog.db')) else 1)" + +# Use s6-overlay as entrypoint +ENTRYPOINT ["/init"] diff --git a/README.md b/README.md index b62e0f8..59074ff 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Automatically publishes Reddit moderation logs to a subreddit wiki page with mod * 🗄️ SQLite database for deduplication and retention with **multi-subreddit support** * ⏰ Configurable update intervals with continuous daemon mode * 🔒 Automatic cleanup of old entries with configurable retention -* ⚡ Handles Reddit's 524KB wiki size limit automatically +* ⚡ Handles Reddit's 524KB wiki size limit automatically * 🧩 Fully CLI-configurable (no need to edit `config.json`) * 📁 Per-subreddit log files for debugging and monitoring * 🔒 Configurable moderator anonymization (AutoModerator/HumanModerator) @@ -18,7 +18,15 @@ Automatically publishes Reddit moderation logs to a subreddit wiki page with mod * 🆔 **Unique content IDs** - comments show comment IDs, posts show post IDs for precise tracking * ✅ **Multi-subreddit database support** - single database handles multiple subreddits safely -## Quick Start +## Deployment Options + +Choose your preferred deployment method: + +- **🐳 Docker** (Recommended) - Containerized deployment with s6-overlay init system. See [Docker Deployment](#docker-deployment) +- **⚙️ Systemd** (Production) - Native Linux service with automatic restart and log rotation. See [Systemd Service](#systemd-service-production) +- **🐍 Python Native** (Development/Testing) - Direct Python execution. See [Quick Start](#quick-start) below + +## Quick Start (Python Native) 1. **Install dependencies** @@ -154,7 +162,7 @@ Options: ### Force Commands Explained **--force-modlog**: Complete rebuild from Reddit -- Fetches ALL recent modlog actions from Reddit API +- Fetches ALL recent modlog actions from Reddit API - Stores them in database - Rebuilds entire wiki page from database - Use when: Starting fresh, major updates, or troubleshooting @@ -173,7 +181,7 @@ Options: # Complete rebuild from Reddit API python modlog_wiki_publisher.py --source-subreddit usenet --force-modlog -# Update wiki with current database data (bypass cache) +# Update wiki with current database data (bypass cache) python modlog_wiki_publisher.py --source-subreddit usenet --force-wiki # Do both (equivalent to old --force) @@ -216,29 +224,105 @@ The database includes comprehensive moderation data with full transparency: - **`subreddit` column**: Multi-subreddit support with proper data separation - **Unique content IDs**: Comments show comment IDs (e.g., n7ravg2), posts show post IDs -## Systemd Service (Optional) +## Docker Deployment -```ini -[Unit] -Description=Reddit Modlog Wiki Publisher -After=network.target +### Quick Start with Docker -[Service] -Type=simple -User=YOUR_USER -WorkingDirectory=/opt/RedditModLog -ExecStart=/usr/bin/python3 modlog_wiki_publisher.py --source-subreddit yoursubreddit --continuous -Restart=always +```bash +# Using Docker Compose (recommended) +docker-compose up -d + +# Using Docker directly +docker run -d \ + --name reddit-modlog \ + -e REDDIT_CLIENT_ID=your_client_id \ + -e REDDIT_CLIENT_SECRET=your_client_secret \ + -e REDDIT_USERNAME=your_username \ + -e REDDIT_PASSWORD=your_password \ + -e SOURCE_SUBREDDIT=yoursubreddit \ + -e PUID=1000 \ + -e PGID=1000 \ + -v ./data:/config/data \ + -v ./logs:/config/logs \ + ghcr.io/bakerboy448/redditmodlog:latest +``` -[Install] -WantedBy=multi-user.target +### Docker Environment Variables + +```env +# User/Group IDs for file permissions +PUID=1000 +PGID=1000 + +# Reddit API credentials (REQUIRED) +REDDIT_CLIENT_ID=your_client_id +REDDIT_CLIENT_SECRET=your_client_secret +REDDIT_USERNAME=your_bot_username +REDDIT_PASSWORD=your_bot_password + +# Application settings +SOURCE_SUBREDDIT=yoursubreddit +WIKI_PAGE=modlog +RETENTION_DAYS=30 +BATCH_SIZE=100 +UPDATE_INTERVAL=300 +ANONYMIZE_MODERATORS=true ``` +### Docker Image + +Pre-built images available at: +- `ghcr.io/bakerboy448/redditmodlog:latest` +- Multi-architecture: `linux/amd64`, `linux/arm64` + +## Systemd Service (Production) + +### Installation + ```bash -sudo systemctl enable modlog-wiki -sudo systemctl start modlog-wiki +# Run the installation script +cd systemd +sudo ./install.sh + +# Copy and edit configs for your subreddits +sudo cp /etc/redditmodlog/opensignups.json.example /etc/redditmodlog/opensignups.json +sudo nano /etc/redditmodlog/opensignups.json + +# Start services +sudo systemctl start modlog@opensignups +sudo systemctl enable modlog@opensignups + +# Check logs +tail -f /var/log/redditmodlog/opensignups.log +``` + +### Service Template + +The systemd template (`modlog@.service`) supports multiple instances: + +```bash +# Start multiple subreddit services +sudo systemctl start modlog@subreddit1 +sudo systemctl start modlog@subreddit2 + +# Each service uses its own config file +# /etc/redditmodlog/subreddit1.json +# /etc/redditmodlog/subreddit2.json + +# Logs go to separate files +# /var/log/redditmodlog/subreddit1.log +# /var/log/redditmodlog/subreddit2.log ``` +### Features + +- ✅ Per-subreddit configuration files +- ✅ Automatic log rotation (30 days retention, 100MB max size) +- ✅ Security hardening (read-only filesystem, private /tmp) +- ✅ Resource limits (256MB RAM, 25% CPU) +- ✅ Automatic restart on failure +- ✅ Proper user/group management + ## Permissions Required Your bot account needs: diff --git a/config_template.json b/config_template.json index 600fa16..38cd89f 100644 --- a/config_template.json +++ b/config_template.json @@ -20,4 +20,4 @@ "show_full_ids": false, "id_format": "prefixed" } -} \ No newline at end of file +} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..08b1065 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,64 @@ +services: + modlog-bot: + build: . + image: ghcr.io/bakerboy448/redditmodlog:latest + container_name: reddit-modlog-bot + restart: unless-stopped + environment: + # User/Group IDs for file permissions + - PUID=${PUID:-1000} + - PGID=${PGID:-1000} + + # Reddit API credentials - REQUIRED + - REDDIT_CLIENT_ID=${REDDIT_CLIENT_ID} + - REDDIT_CLIENT_SECRET=${REDDIT_CLIENT_SECRET} + - REDDIT_USERNAME=${REDDIT_USERNAME} + - REDDIT_PASSWORD=${REDDIT_PASSWORD} + + # Application settings + - SOURCE_SUBREDDIT=${SOURCE_SUBREDDIT} + - WIKI_PAGE=${WIKI_PAGE:-modlog} + - RETENTION_DAYS=${RETENTION_DAYS:-30} + - BATCH_SIZE=${BATCH_SIZE:-100} + - UPDATE_INTERVAL=${UPDATE_INTERVAL:-300} + - ANONYMIZE_MODERATORS=${ANONYMIZE_MODERATORS:-true} + + # Advanced settings (optional) + - WIKI_ACTIONS=${WIKI_ACTIONS:-removelink,removecomment,addremovalreason,spamlink,spamcomment,approvelink,approvecomment} + - IGNORED_MODERATORS=${IGNORED_MODERATORS:-} + + # Note: MAX_WIKI_ENTRIES_PER_PAGE and MAX_CONTINUOUS_ERRORS require config.json mount + # These settings are not supported via environment variables + + volumes: + # Persistent data storage + - ./data:/config/data + - ./logs:/config/logs + + # Optional: Mount config file instead of using env vars + # - ./config.json:/app/config.json:ro + + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "5" + + # Resource limits (Compose v2 syntax) + mem_limit: 256m + mem_reservation: 128m + cpus: 0.5 + + # Optional: Multiple bot instances for different subreddits + # modlog-bot-2: + # extends: modlog-bot + # container_name: reddit-modlog-bot-2 + # environment: + # - REDDIT_CLIENT_ID=${REDDIT_CLIENT_ID_2} + # - REDDIT_CLIENT_SECRET=${REDDIT_CLIENT_SECRET_2} + # - REDDIT_USERNAME=${REDDIT_USERNAME_2} + # - REDDIT_PASSWORD=${REDDIT_PASSWORD_2} + # - SOURCE_SUBREDDIT=${SOURCE_SUBREDDIT_2} + # volumes: + # - ./data2:/config/data + # - ./logs2:/config/logs diff --git a/modlog_wiki_publisher.py b/modlog_wiki_publisher.py index 4a426c8..0a06def 100644 --- a/modlog_wiki_publisher.py +++ b/modlog_wiki_publisher.py @@ -3,112 +3,151 @@ Reddit Modlog Wiki Publisher Scrapes moderation logs and publishes them to a subreddit wiki page """ -import os -import sys -import json -import sqlite3 -import time import argparse +import hashlib +import json import logging +import os import re -import hashlib +import sqlite3 +import sys +import time from datetime import datetime, timezone -from typing import Dict, List, Optional, Any +from typing import Any, Dict, List, Optional import praw -DB_PATH = os.getenv('DATABASE_PATH', "modlog.db") -LOGS_DIR = os.getenv('LOGS_DIR', "logs") +DB_PATH = os.getenv("DATABASE_PATH", "modlog.db") +LOGS_DIR = os.getenv("LOGS_DIR", "logs") BASE_BACKOFF_WAIT = 30 MAX_BACKOFF_WAIT = 300 logger = logging.getLogger(__name__) # Action type configurations -REMOVAL_ACTIONS = ['removelink', 'removecomment', 'spamlink', 'spamcomment'] -APPROVAL_ACTIONS = ['approvelink', 'approvecomment'] -REASON_ACTIONS = ['addremovalreason'] +REMOVAL_ACTIONS = ["removelink", "removecomment", "spamlink", "spamcomment"] +APPROVAL_ACTIONS = ["approvelink", "approvecomment"] +REASON_ACTIONS = ["addremovalreason"] DEFAULT_WIKI_ACTIONS = REMOVAL_ACTIONS + REASON_ACTIONS + APPROVAL_ACTIONS # Valid Reddit modlog actions (hardcoded for validation) VALID_MODLOG_ACTIONS = [ # Content removal - 'removelink', 'removecomment', 'spamlink', 'spamcomment', - # Content approval - 'approvelink', 'approvecomment', + "removelink", + "removecomment", + "spamlink", + "spamcomment", + # Content approval + "approvelink", + "approvecomment", # Moderation reasons and notes - 'addremovalreason', 'addnote', 'deletenote', + "addremovalreason", + "addnote", + "deletenote", # User actions - 'banuser', 'unbanuser', 'muteuser', 'unmuteuser', 'invitemoderator', 'acceptmoderatorinvite', + "banuser", + "unbanuser", + "muteuser", + "unmuteuser", + "invitemoderator", + "acceptmoderatorinvite", # Post management - 'distinguish', 'undistinguish', 'sticky', 'unsticky', 'lock', 'unlock', 'marknsfw', 'unmarknsfw', + "distinguish", + "undistinguish", + "sticky", + "unsticky", + "lock", + "unlock", + "marknsfw", + "unmarknsfw", # Wiki actions - 'wikirevise', 'wikipagelisted', 'wikipermlevel', 'wikibanned', 'wikicontributor', + "wikirevise", + "wikipagelisted", + "wikipermlevel", + "wikibanned", + "wikicontributor", # Settings and rules - 'editsettings', 'editflair', 'createrule', 'editrule', 'deleterule', 'reorderrules', + "editsettings", + "editflair", + "createrule", + "editrule", + "deleterule", + "reorderrules", # Reports and modmail - 'ignorereports', 'unignorereports', 'request_assistance', + "ignorereports", + "unignorereports", + "request_assistance", # Community features - 'community_widgets', 'community_welcome_page', 'edit_post_requirements', 'edit_comment_requirements', + "community_widgets", + "community_welcome_page", + "edit_post_requirements", + "edit_comment_requirements", # Saved responses - 'edit_saved_response', + "edit_saved_response", # Collections - 'addtocollection', 'removefromcollection' + "addtocollection", + "removefromcollection", ] # Configuration limits and defaults CONFIG_LIMITS = { - 'retention_days': {'min': 1, 'max': 365, 'default': 90}, - 'batch_size': {'min': 10, 'max': 500, 'default': 50}, - 'update_interval': {'min': 60, 'max': 3600, 'default': 600}, - 'max_wiki_entries_per_page': {'min': 100, 'max': 2000, 'default': 1000}, - 'max_continuous_errors': {'min': 1, 'max': 50, 'default': 5}, - 'rate_limit_buffer': {'min': 30, 'max': 300, 'default': 60}, - 'max_batch_retries': {'min': 1, 'max': 10, 'default': 3}, - 'archive_threshold_days': {'min': 1, 'max': 30, 'default': 7} + "retention_days": {"min": 1, "max": 365, "default": 90}, + "batch_size": {"min": 10, "max": 500, "default": 50}, + "update_interval": {"min": 60, "max": 3600, "default": 600}, + "max_wiki_entries_per_page": {"min": 100, "max": 2000, "default": 1000}, + "max_continuous_errors": {"min": 1, "max": 50, "default": 5}, + "rate_limit_buffer": {"min": 30, "max": 300, "default": 60}, + "max_batch_retries": {"min": 1, "max": 10, "default": 3}, + "archive_threshold_days": {"min": 1, "max": 30, "default": 7}, } # Database schema version CURRENT_DB_VERSION = 5 + def get_db_version(): """Get current database schema version""" try: conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() - + # Check if version table exists - cursor.execute(""" - SELECT name FROM sqlite_master + cursor.execute( + """ + SELECT name FROM sqlite_master WHERE type='table' AND name='schema_version' - """) - + """ + ) + if not cursor.fetchone(): conn.close() return 0 - + cursor.execute("SELECT version FROM schema_version ORDER BY id DESC LIMIT 1") result = cursor.fetchone() conn.close() - + return result[0] if result else 0 except Exception as e: logger.warning(f"Could not determine database version: {e}") return 0 + def set_db_version(version): """Set database schema version""" try: conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() - - cursor.execute(""" + + cursor.execute( + """ CREATE TABLE IF NOT EXISTS schema_version ( id INTEGER PRIMARY KEY AUTOINCREMENT, version INTEGER NOT NULL, applied_at INTEGER DEFAULT (strftime('%s', 'now')) ) - """) - + """ + ) + cursor.execute("INSERT INTO schema_version (version) VALUES (?)", (version,)) conn.commit() conn.close() @@ -117,120 +156,126 @@ def set_db_version(version): logger.error(f"Failed to set database version: {e}") raise + def validate_config_value(key, value, config_limits): """Validate and enforce configuration limits""" if key not in config_limits: return value - + limits = config_limits[key] - if value < limits['min']: + if value < limits["min"]: logger.warning(f"{key} value {value} below minimum {limits['min']}, using minimum") - return limits['min'] - elif value > limits['max']: + return limits["min"] + elif value > limits["max"]: logger.warning(f"{key} value {value} above maximum {limits['max']}, using maximum") - return limits['max'] - + return limits["max"] + return value + def validate_wiki_actions(wiki_actions): """Validate wiki_actions against known Reddit modlog actions""" if not isinstance(wiki_actions, list): raise ValueError("wiki_actions must be a list") - + if not wiki_actions: logger.info("Empty wiki_actions, using defaults") return DEFAULT_WIKI_ACTIONS - + invalid_actions = [action for action in wiki_actions if action not in VALID_MODLOG_ACTIONS] - + if invalid_actions: raise ValueError(f"Invalid modlog actions: {invalid_actions}. Valid actions: {sorted(VALID_MODLOG_ACTIONS)}") - + logger.info(f"Validated {len(wiki_actions)} wiki_actions: {wiki_actions}") return wiki_actions + def apply_config_defaults_and_limits(config): """Apply default values and enforce limits on configuration""" for key, limits in CONFIG_LIMITS.items(): if key not in config: - config[key] = limits['default'] + config[key] = limits["default"] logger.info(f"Using default value for {key}: {limits['default']}") else: config[key] = validate_config_value(key, config[key], CONFIG_LIMITS) - + # Set default wiki actions if not specified - if 'wiki_actions' not in config: - config['wiki_actions'] = DEFAULT_WIKI_ACTIONS + if "wiki_actions" not in config: + config["wiki_actions"] = DEFAULT_WIKI_ACTIONS logger.info("Using default wiki_actions: removals, removal reasons, and approvals") else: - config['wiki_actions'] = validate_wiki_actions(config['wiki_actions']) - + config["wiki_actions"] = validate_wiki_actions(config["wiki_actions"]) + # Validate required fields - required_fields = ['reddit', 'source_subreddit'] + required_fields = ["reddit", "source_subreddit"] for field in required_fields: if field not in config: raise ValueError(f"Missing required configuration field: {field}") - + # Validate reddit credentials - reddit_config = config.get('reddit', {}) - required_reddit_fields = ['client_id', 'client_secret', 'username', 'password'] + reddit_config = config.get("reddit", {}) + required_reddit_fields = ["client_id", "client_secret", "username", "password"] for field in required_reddit_fields: if field not in reddit_config or not reddit_config[field]: raise ValueError(f"Missing required reddit configuration field: {field}") - + # CRITICAL SECURITY CHECK: Never allow moderator de-anonymization on live Reddit - if not config.get('anonymize_moderators', True): + if not config.get("anonymize_moderators", True): raise ValueError("SECURITY: anonymize_moderators=false is not allowed. This would expose moderator identities publicly.") - + return config + def migrate_database(): """Run database migrations to current version""" current_version = get_db_version() target_version = CURRENT_DB_VERSION - + if current_version >= target_version: logger.info(f"Database already at version {current_version}, no migration needed") return - + logger.info(f"Migrating database from version {current_version} to {target_version}") - + try: conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() - + # Migration from version 0 to 1: Initial schema if current_version < 1: logger.info("Applying migration: Initial schema (v0 -> v1)") - cursor.execute(""" + cursor.execute( + """ CREATE TABLE IF NOT EXISTS processed_actions ( id INTEGER PRIMARY KEY AUTOINCREMENT, action_id TEXT UNIQUE NOT NULL, created_at INTEGER NOT NULL, processed_at INTEGER DEFAULT (strftime('%s', 'now')) ) - """) + """ + ) cursor.execute("CREATE INDEX IF NOT EXISTS idx_action_id ON processed_actions(action_id)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_created_at ON processed_actions(created_at)") set_db_version(1) - + # Migration from version 1 to 2: Add tracking columns if current_version < 2: logger.info("Applying migration: Add tracking columns (v1 -> v2)") - + # Check if columns already exist to handle partial migrations cursor.execute("PRAGMA table_info(processed_actions)") existing_columns = [row[1] for row in cursor.fetchall()] - + columns_to_add = [ - ('action_type', 'TEXT'), - ('moderator', 'TEXT'), - ('target_id', 'TEXT'), - ('target_type', 'TEXT'), - ('display_id', 'TEXT'), - ('target_permalink', 'TEXT') + ("action_type", "TEXT"), + ("moderator", "TEXT"), + ("target_id", "TEXT"), + ("target_type", "TEXT"), + ("display_id", "TEXT"), + ("target_permalink", "TEXT"), ] - + for column_name, column_type in columns_to_add: if column_name not in existing_columns: try: @@ -239,38 +284,39 @@ def migrate_database(): except sqlite3.OperationalError as e: if "duplicate column name" not in str(e): raise - + # Add new indexes cursor.execute("CREATE INDEX IF NOT EXISTS idx_display_id ON processed_actions(display_id)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_target_id ON processed_actions(target_id)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_target_type ON processed_actions(target_type)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_moderator ON processed_actions(moderator)") - + set_db_version(2) - + # Migration from version 2 to 3: Add removal reason column if current_version < 3: logger.info("Applying migration: Add removal reason column (v2 -> v3)") - + # Check if column already exists cursor.execute("PRAGMA table_info(processed_actions)") existing_columns = [row[1] for row in cursor.fetchall()] - - if 'removal_reason' not in existing_columns: + + if "removal_reason" not in existing_columns: try: cursor.execute("ALTER TABLE processed_actions ADD COLUMN removal_reason TEXT") logger.info("Added column: removal_reason") except sqlite3.OperationalError as e: if "duplicate column name" not in str(e): raise - + set_db_version(3) - + # Migration from version 3 to 4: Add wiki hash caching table if current_version < 4: logger.info("Applying migration: Add wiki hash caching table (v3 -> v4)") - - cursor.execute(""" + + cursor.execute( + """ CREATE TABLE IF NOT EXISTS wiki_hash_cache ( id INTEGER PRIMARY KEY AUTOINCREMENT, subreddit TEXT NOT NULL, @@ -279,40 +325,42 @@ def migrate_database(): last_updated INTEGER DEFAULT (strftime('%s', 'now')), UNIQUE(subreddit, wiki_page) ) - """) + """ + ) cursor.execute("CREATE INDEX IF NOT EXISTS idx_subreddit_page ON wiki_hash_cache(subreddit, wiki_page)") logger.info("Created wiki_hash_cache table") - + set_db_version(4) - + # Migration from version 4 to 5: Add subreddit column if current_version < 5: logger.info("Applying migration: Add subreddit column (v4 -> v5)") - + # Check if column already exists cursor.execute("PRAGMA table_info(processed_actions)") existing_columns = [row[1] for row in cursor.fetchall()] - - if 'subreddit' not in existing_columns: + + if "subreddit" not in existing_columns: try: cursor.execute("ALTER TABLE processed_actions ADD COLUMN subreddit TEXT") logger.info("Added column: subreddit") except sqlite3.OperationalError as e: if "duplicate column name" not in str(e): raise - + cursor.execute("CREATE INDEX IF NOT EXISTS idx_subreddit ON processed_actions(subreddit)") - + set_db_version(5) - + conn.commit() conn.close() logger.info(f"Database migration completed successfully to version {target_version}") - + except Exception as e: logger.error(f"Database migration failed: {e}") raise + def setup_database(): """Initialize and migrate database""" try: @@ -323,19 +371,18 @@ def setup_database(): logger.error(f"Database setup failed: {e}") raise + def get_content_hash(content: str) -> str: """Calculate SHA-256 hash of content""" - return hashlib.sha256(content.encode('utf-8')).hexdigest() + return hashlib.sha256(content.encode("utf-8")).hexdigest() + def get_cached_wiki_hash(subreddit: str, wiki_page: str) -> Optional[str]: """Get cached wiki content hash for subreddit/page""" try: conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() - cursor.execute( - "SELECT content_hash FROM wiki_hash_cache WHERE subreddit = ? AND wiki_page = ?", - (subreddit, wiki_page) - ) + cursor.execute("SELECT content_hash FROM wiki_hash_cache WHERE subreddit = ? AND wiki_page = ?", (subreddit, wiki_page)) result = cursor.fetchone() conn.close() return result[0] if result else None @@ -343,28 +390,35 @@ def get_cached_wiki_hash(subreddit: str, wiki_page: str) -> Optional[str]: logger.warning(f"Failed to get cached wiki hash: {e}") return None + def update_cached_wiki_hash(subreddit: str, wiki_page: str, content_hash: str): """Update cached wiki content hash for subreddit/page""" try: conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() - cursor.execute(""" + cursor.execute( + """ INSERT OR REPLACE INTO wiki_hash_cache (subreddit, wiki_page, content_hash, last_updated) VALUES (?, ?, ?, strftime('%s', 'now')) - """, (subreddit, wiki_page, content_hash)) + """, + (subreddit, wiki_page, content_hash), + ) conn.commit() conn.close() logger.debug(f"Updated cached hash for /r/{subreddit}/wiki/{wiki_page}") except Exception as e: logger.warning(f"Failed to update cached wiki hash: {e}") + def censor_email_addresses(text): """Censor email addresses in removal reasons""" if not text: return text import re + # Replace email addresses with [EMAIL] - return re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '[EMAIL]', text) + return re.sub(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", "[EMAIL]", text) + def sanitize_for_markdown(text: str) -> str: """Sanitize text for use in markdown tables by escaping pipe characters""" @@ -372,11 +426,13 @@ def sanitize_for_markdown(text: str) -> str: return "" return str(text).replace("|", " ") + def get_config_with_default(config: Dict[str, Any], key: str) -> Any: """Get config value with fallback to CONFIG_LIMITS default""" if key not in CONFIG_LIMITS: raise ValueError(f"Unknown config key: {key}") - return config.get(key, CONFIG_LIMITS[key]['default']) + return config.get(key, CONFIG_LIMITS[key]["default"]) + def get_action_datetime(action): """Convert action.created_utc to datetime object regardless of input type""" @@ -385,134 +441,133 @@ def get_action_datetime(action): else: return action.created_utc + def get_moderator_name(action, anonymize=True): """Get moderator name with optional anonymization for human moderators""" if not action.mod: return None - + # Extract the actual moderator name if isinstance(action.mod, str): mod_name = action.mod else: mod_name = action.mod.name - + # Handle special cases - don't censor these, match main branch exactly - if mod_name.lower() in ['automoderator', 'reddit']: - if mod_name.lower() == 'automoderator': - return 'AutoModerator' # Match main branch exactly + if mod_name.lower() in ["automoderator", "reddit"]: + if mod_name.lower() == "automoderator": + return "AutoModerator" # Match main branch exactly else: - return 'Reddit' - + return "Reddit" + # For human moderators, show generic label or actual name based on config if anonymize: - return 'HumanModerator' + return "HumanModerator" else: return mod_name + def extract_target_id(action): """Extract Reddit ID from action target - NEVER return user ID""" # Priority order: get actual post/comment ID first - if hasattr(action, 'target_submission') and action.target_submission: - if hasattr(action.target_submission, 'id'): + if hasattr(action, "target_submission") and action.target_submission: + if hasattr(action.target_submission, "id"): return action.target_submission.id else: # Extract ID from submission object string representation target_str = str(action.target_submission) - if target_str.startswith('t3_'): + if target_str.startswith("t3_"): return target_str[3:] # Remove t3_ prefix return target_str - elif hasattr(action, 'target_comment') and action.target_comment: - if hasattr(action.target_comment, 'id'): + elif hasattr(action, "target_comment") and action.target_comment: + if hasattr(action.target_comment, "id"): return action.target_comment.id else: # Extract ID from comment object string representation target_str = str(action.target_comment) - if target_str.startswith('t1_'): + if target_str.startswith("t1_"): return target_str[3:] # Remove t1_ prefix return target_str else: # For user-related actions, use action ID instead of user ID return action.id + def get_target_type(action): """Determine target type for ID prefix""" - if hasattr(action, 'target_submission') and action.target_submission: - return 'post' - elif hasattr(action, 'target_comment') and action.target_comment: - return 'comment' - elif hasattr(action, 'target_author'): - return 'user' + if hasattr(action, "target_submission") and action.target_submission: + return "post" + elif hasattr(action, "target_comment") and action.target_comment: + return "comment" + elif hasattr(action, "target_author"): + return "user" else: - return 'action' + return "action" + def generate_display_id(action): """Generate human-readable display ID - NEVER use user ID""" target_id = extract_target_id(action) target_type = get_target_type(action) - - prefixes = { - 'post': 'P', - 'comment': 'C', - 'user': 'U', # Use 'A' for action ID when dealing with user actions - 'action': 'A' - } - - prefix = prefixes.get(target_type, 'ZZU') - + + # Prefix mapping: P=post, C=comment, U=user, A=action + prefixes = {"post": "P", "comment": "C", "user": "U", "action": "A"} + + prefix = prefixes.get(target_type, "ZZU") + # Shorten long IDs for display - if len(str(target_id)) > 8 and target_type in ['post', 'comment']: + if len(str(target_id)) > 8 and target_type in ["post", "comment"]: short_id = str(target_id)[:6] return f"{prefix}{short_id}" else: return f"{prefix}{target_id}" + def get_target_permalink(action): """Get permalink for the target content - prioritize actual content over user profiles""" # Check if we have a cached permalink from database - if hasattr(action, 'target_permalink_cached') and action.target_permalink_cached: + if hasattr(action, "target_permalink_cached") and action.target_permalink_cached: return action.target_permalink_cached - + try: # Priority 1: get actual post/comment permalinks from Reddit API - if hasattr(action, 'target_submission') and action.target_submission: - if hasattr(action.target_submission, 'permalink'): + if hasattr(action, "target_submission") and action.target_submission: + if hasattr(action.target_submission, "permalink"): return f"https://reddit.com{action.target_submission.permalink}" - elif hasattr(action.target_submission, 'id'): + elif hasattr(action.target_submission, "id"): # Construct permalink from submission ID return f"https://reddit.com/comments/{action.target_submission.id}/" - elif hasattr(action, 'target_comment') and action.target_comment: - if hasattr(action.target_comment, 'permalink'): + elif hasattr(action, "target_comment") and action.target_comment: + if hasattr(action.target_comment, "permalink"): return f"https://reddit.com{action.target_comment.permalink}" - elif hasattr(action.target_comment, 'id') and hasattr(action.target_comment, 'submission'): + elif hasattr(action.target_comment, "id") and hasattr(action.target_comment, "submission"): # For comments, construct proper permalink with submission ID return f"https://reddit.com/comments/{action.target_comment.submission.id}/_/{action.target_comment.id}/" - elif hasattr(action.target_comment, 'id'): + elif hasattr(action.target_comment, "id"): # Fallback for comment without submission info return f"https://reddit.com/comments/{action.target_comment.id}/" - + # Priority 2: Try to get content permalink from action.target_permalink if it's not a user profile - if hasattr(action, 'target_permalink') and action.target_permalink: + if hasattr(action, "target_permalink") and action.target_permalink: permalink = action.target_permalink # Only use if it's actual content (contains /comments/) not user profile (/u/) - if '/comments/' in permalink and '/u/' not in permalink: - return f"https://reddit.com{permalink}" if not permalink.startswith('http') else permalink - + if "/comments/" in permalink and "/u/" not in permalink: + return f"https://reddit.com{permalink}" if not permalink.startswith("http") else permalink + # NEVER fall back to user profiles - only link to actual content except: pass return None + def is_duplicate_action(action_id: str) -> bool: """Check if action has already been processed""" try: conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() - - cursor.execute( - "SELECT 1 FROM processed_actions WHERE action_id = ? LIMIT 1", - (action_id,) - ) - + + cursor.execute("SELECT 1 FROM processed_actions WHERE action_id = ? LIMIT 1", (action_id,)) + result = cursor.fetchone() is not None conn.close() return result @@ -520,146 +575,151 @@ def is_duplicate_action(action_id: str) -> bool: logger.error(f"Error checking duplicate action: {e}") return False + def extract_subreddit_from_permalink(permalink): """Extract subreddit name from Reddit permalink URL""" if not permalink: return None - + import re + # Match patterns like /r/subreddit/ or https://reddit.com/r/subreddit/ - match = re.search(r'/r/([^/]+)/', permalink) + match = re.search(r"/r/([^/]+)/", permalink) return match.group(1) if match else None + def store_processed_action(action, subreddit_name=None): """Store processed action to prevent duplicates""" try: conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() - + # Process removal reason properly - ALWAYS prefer actual text over numeric details removal_reason = None - + # For addremovalreason actions, use description field (contains actual text) - if action.action == 'addremovalreason' and hasattr(action, 'description') and action.description: + if action.action == "addremovalreason" and hasattr(action, "description") and action.description: removal_reason = censor_email_addresses(str(action.description).strip()) # First priority: mod_note (actual removal reason text) - elif hasattr(action, 'mod_note') and action.mod_note: + elif hasattr(action, "mod_note") and action.mod_note: removal_reason = censor_email_addresses(str(action.mod_note).strip()) # Second priority: details (accept ALL details text, including numbers) - elif hasattr(action, 'details') and action.details: + elif hasattr(action, "details") and action.details: details_str = str(action.details).strip() removal_reason = censor_email_addresses(details_str) - + # Extract subreddit from URL if not provided target_permalink = get_target_permalink(action) if not subreddit_name and target_permalink: subreddit_name = extract_subreddit_from_permalink(target_permalink) - + # Add subreddit column if it doesn't exist cursor.execute("PRAGMA table_info(processed_actions)") columns = [row[1] for row in cursor.fetchall()] - if 'subreddit' not in columns: + if "subreddit" not in columns: cursor.execute("ALTER TABLE processed_actions ADD COLUMN subreddit TEXT") - + # Add target_author column if it doesn't exist - if 'target_author' not in columns: + if "target_author" not in columns: cursor.execute("ALTER TABLE processed_actions ADD COLUMN target_author TEXT") - + # Extract target author target_author = None - if hasattr(action, 'target_author') and action.target_author: - if hasattr(action.target_author, 'name'): + if hasattr(action, "target_author") and action.target_author: + if hasattr(action.target_author, "name"): target_author = action.target_author.name else: target_author = str(action.target_author) - - cursor.execute(""" - INSERT OR REPLACE INTO processed_actions - (action_id, action_type, moderator, target_id, target_type, + + cursor.execute( + """ + INSERT OR REPLACE INTO processed_actions + (action_id, action_type, moderator, target_id, target_type, display_id, target_permalink, removal_reason, target_author, created_at, subreddit) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """, ( - action.id, - action.action, - get_moderator_name(action, False), # Store actual name in database - extract_target_id(action), - get_target_type(action), - generate_display_id(action), - target_permalink, - sanitize_for_markdown(removal_reason), # Store properly processed removal reason - target_author, - int(action.created_utc) if isinstance(action.created_utc, (int, float)) else int(action.created_utc.timestamp()), - subreddit_name or 'unknown' - )) - + """, + ( + action.id, + action.action, + get_moderator_name(action, False), # Store actual name in database + extract_target_id(action), + get_target_type(action), + generate_display_id(action), + target_permalink, + sanitize_for_markdown(removal_reason), # Store properly processed removal reason + target_author, + int(action.created_utc) if isinstance(action.created_utc, (int, float)) else int(action.created_utc.timestamp()), + subreddit_name or "unknown", + ), + ) + conn.commit() conn.close() except Exception as e: logger.error(f"Error storing processed action: {e}") raise + def update_missing_subreddits(): """Update NULL subreddit entries by extracting from permalinks""" try: conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() - + # Get entries with NULL subreddit but valid permalink - cursor.execute(""" - SELECT id, target_permalink FROM processed_actions + cursor.execute( + """ + SELECT id, target_permalink FROM processed_actions WHERE subreddit IS NULL AND target_permalink IS NOT NULL - """) - + """ + ) + updates = [] for row_id, permalink in cursor.fetchall(): subreddit = extract_subreddit_from_permalink(permalink) if subreddit: updates.append((subreddit, row_id)) - + # Update entries in batches if updates: - cursor.executemany( - "UPDATE processed_actions SET subreddit = ? WHERE id = ?", - updates - ) + cursor.executemany("UPDATE processed_actions SET subreddit = ? WHERE id = ?", updates) logger.info(f"Updated {len(updates)} entries with extracted subreddit names") - + conn.commit() conn.close() - + except Exception as e: logger.error(f"Error updating missing subreddits: {e}") + def cleanup_old_entries(retention_days: int): """Remove entries older than retention_days""" if retention_days <= 0: - retention_days = CONFIG_LIMITS['retention_days']['default'] # No config object available here - + retention_days = CONFIG_LIMITS["retention_days"]["default"] # No config object available here + try: conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() - + cutoff_timestamp = int((datetime.now() - datetime.fromtimestamp(0)).total_seconds()) - (retention_days * 86400) - - cursor.execute( - "DELETE FROM processed_actions WHERE created_at < ?", - (cutoff_timestamp,) - ) - + + cursor.execute("DELETE FROM processed_actions WHERE created_at < ?", (cutoff_timestamp,)) + deleted_count = cursor.rowcount conn.commit() conn.close() - + if deleted_count > 0: logger.info(f"Cleaned up {deleted_count} old entries") except Exception as e: logger.error(f"Error during cleanup: {e}") + def get_recent_actions_from_db(config: Dict[str, Any], force_all_actions: bool = False, show_only_removals: bool = True) -> List: """Fetch recent actions from database for force refresh""" try: conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() - + # For force refresh, get ALL actions, not just wiki_actions filter if force_all_actions: # Get all unique action types in database @@ -670,71 +730,79 @@ def get_recent_actions_from_db(config: Dict[str, Any], force_all_actions: bool = wiki_actions = set(DEFAULT_WIKI_ACTIONS) else: # Get configurable list of actions to show in wiki - wiki_actions = set(config.get('wiki_actions', DEFAULT_WIKI_ACTIONS)) - + wiki_actions = set(config.get("wiki_actions", DEFAULT_WIKI_ACTIONS)) + # Get recent actions within retention period - retention_days = get_config_with_default(config, 'retention_days') + retention_days = get_config_with_default(config, "retention_days") cutoff_timestamp = int((datetime.now() - datetime.fromtimestamp(0)).total_seconds()) - (retention_days * 86400) - + # Limit to max wiki entries - max_entries = get_config_with_default(config, 'max_wiki_entries_per_page') - - placeholders = ','.join(['?'] * len(wiki_actions)) + max_entries = get_config_with_default(config, "max_wiki_entries_per_page") + + placeholders = ",".join(["?"] * len(wiki_actions)) # STRICT subreddit filtering - only exact matches, no nulls - subreddit_name = config.get('source_subreddit', '') - + subreddit_name = config.get("source_subreddit", "") + logger.debug(f"Query parameters - cutoff: {cutoff_timestamp}, wiki_actions: {wiki_actions}, subreddit: '{subreddit_name}', max_entries: {max_entries}") - + # Check if actions exist for the requested subreddit - cursor.execute(""" - SELECT COUNT(*) FROM processed_actions - WHERE created_at >= ? AND action_type IN ({}) + cursor.execute( + """ + SELECT COUNT(*) FROM processed_actions + WHERE created_at >= ? AND action_type IN ({}) AND LOWER(subreddit) = LOWER(?) - """.format(placeholders), [cutoff_timestamp] + list(wiki_actions) + [subreddit_name]) - + """.format( + placeholders + ), + [cutoff_timestamp] + list(wiki_actions) + [subreddit_name], + ) + action_count = cursor.fetchone()[0] - + # If no actions exist for this subreddit, return empty list if action_count == 0: logger.info(f"No actions found for subreddit '{subreddit_name}' in the specified time range") conn.close() return [] - + logger.debug(f"Found {action_count} actions for subreddit '{subreddit_name}'") - + # Get list of all subreddits for informational purposes - cursor.execute(""" - SELECT DISTINCT LOWER(subreddit) FROM processed_actions + cursor.execute( + """ + SELECT DISTINCT LOWER(subreddit) FROM processed_actions WHERE created_at >= ? AND subreddit IS NOT NULL - """, [cutoff_timestamp]) - + """, + [cutoff_timestamp], + ) + all_subreddits = [row[0] for row in cursor.fetchall() if row[0]] if len(all_subreddits) > 1: logger.info(f"Multi-subreddit database contains data for: {sorted(all_subreddits)}") logger.info(f"Retrieving actions for subreddit: '{subreddit_name}'") - + query = f""" - SELECT action_id, action_type, moderator, target_id, target_type, - display_id, target_permalink, removal_reason, target_author, created_at - FROM processed_actions + SELECT action_id, action_type, moderator, target_id, target_type, + display_id, target_permalink, removal_reason, target_author, created_at + FROM processed_actions WHERE created_at >= ? AND action_type IN ({placeholders}) AND LOWER(subreddit) = LOWER(?) - ORDER BY created_at DESC + ORDER BY created_at DESC LIMIT ? """ - + cursor.execute(query, [cutoff_timestamp] + list(wiki_actions) + [subreddit_name, max_entries]) rows = cursor.fetchall() conn.close() - + logger.debug(f"Database query returned {len(rows)} rows") - + # Convert database rows to mock action objects for compatibility with existing functions mock_actions = [] for row in rows: action_id, action_type, moderator, target_id, target_type, display_id, target_permalink, removal_reason, target_author, created_at = row logger.debug(f"Processing cached action: {action_type} by {moderator} at {created_at}") - + # Create a mock action object with the data we have class MockAction: def __init__(self, action_id, action_type, moderator, target_id, target_type, display_id, target_permalink, removal_reason, target_author, created_at): @@ -745,161 +813,167 @@ def __init__(self, action_id, action_type, moderator, target_id, target_type, di self.created_utc = created_at self.details = removal_reason self.display_id = display_id - self.target_permalink = target_permalink.replace('https://reddit.com', '') if target_permalink and target_permalink.startswith('https://reddit.com') else target_permalink + self.target_permalink = ( + target_permalink.replace("https://reddit.com", "") if target_permalink and target_permalink.startswith("https://reddit.com") else target_permalink + ) self.target_permalink_cached = target_permalink - + # Use actual target_author from database self.target_title = None self.target_author = target_author # Use actual target_author from database - + mock_actions.append(MockAction(action_id, action_type, moderator, target_id, target_type, display_id, target_permalink, removal_reason, target_author, created_at)) - + logger.info(f"Retrieved {len(mock_actions)} actions from database for force refresh") return mock_actions - + except Exception as e: logger.error(f"Error fetching actions from database: {e}") return [] + def format_content_link(action) -> str: """Format content link for wiki table - matches main branch approach exactly""" - + # Use actual Reddit API data like main branch does - formatted_link = '' - if hasattr(action, 'target_permalink') and action.target_permalink: + formatted_link = "" + if hasattr(action, "target_permalink") and action.target_permalink: formatted_link = f"https://www.reddit.com{action.target_permalink}" - elif hasattr(action, 'target_permalink_cached') and action.target_permalink_cached: + elif hasattr(action, "target_permalink_cached") and action.target_permalink_cached: formatted_link = action.target_permalink_cached - + # Check if comment using main branch logic - is_comment = bool(hasattr(action, 'target_permalink') and action.target_permalink - and '/comments/' in action.target_permalink and action.target_permalink.count('/') > 6) - + is_comment = bool(hasattr(action, "target_permalink") and action.target_permalink and "/comments/" in action.target_permalink and action.target_permalink.count("/") > 6) + # Determine title using main branch approach - formatted_title = '' - if is_comment and hasattr(action, 'target_title') and action.target_title: + formatted_title = "" + if is_comment and hasattr(action, "target_title") and action.target_title: formatted_title = action.target_title - elif is_comment and (not hasattr(action, 'target_title') or not action.target_title): - target_author = action.target_author if hasattr(action, 'target_author') and action.target_author else '[deleted]' + elif is_comment and (not hasattr(action, "target_title") or not action.target_title): + target_author = action.target_author if hasattr(action, "target_author") and action.target_author else "[deleted]" formatted_title = f"Comment by u/{target_author}" - elif not is_comment and hasattr(action, 'target_title') and action.target_title: + elif not is_comment and hasattr(action, "target_title") and action.target_title: formatted_title = action.target_title - elif not is_comment and (not hasattr(action, 'target_title') or not action.target_title): - target_author = action.target_author if hasattr(action, 'target_author') and action.target_author else '[deleted]' + elif not is_comment and (not hasattr(action, "target_title") or not action.target_title): + target_author = action.target_author if hasattr(action, "target_author") and action.target_author else "[deleted]" formatted_title = f"Post by u/{target_author}" else: - formatted_title = 'Unknown content' - + formatted_title = "Unknown content" + # Format with link like main branch if formatted_link: formatted_title = f"[{formatted_title}]({formatted_link})" return sanitize_for_markdown(formatted_title) + def extract_content_id_from_permalink(permalink): """Extract the actual post/comment ID from Reddit permalink URL""" if not permalink: return None - + import re + # Check for comment ID first - URLs like /comments/abc123/title/def456/ - comment_match = re.search(r'/comments/[a-zA-Z0-9]+/[^/]*/([a-zA-Z0-9]+)/?', permalink) + comment_match = re.search(r"/comments/[a-zA-Z0-9]+/[^/]*/([a-zA-Z0-9]+)/?", permalink) if comment_match: return f"t1_{comment_match.group(1)}" - + # Extract post ID from URLs like /comments/abc123/ (only if no comment ID found) - post_match = re.search(r'/comments/([a-zA-Z0-9]+)/', permalink) + post_match = re.search(r"/comments/([a-zA-Z0-9]+)/", permalink) if post_match: return f"t3_{post_match.group(1)}" - + return None + def format_modlog_entry(action, config: Dict[str, Any]) -> Dict[str, str]: """Format modlog entry - matches main branch approach exactly""" - + reason_text = "-" - - if hasattr(action, 'combined_reason') and action.combined_reason: + + if hasattr(action, "combined_reason") and action.combined_reason: reason_text = str(action.combined_reason).strip() - elif hasattr(action, 'approval_context') and action.approval_context: + elif hasattr(action, "approval_context") and action.approval_context: reason_text = str(action.approval_context).strip() else: - parsed_mod_note = '' - if hasattr(action, 'mod_note') and action.mod_note: + parsed_mod_note = "" + if hasattr(action, "mod_note") and action.mod_note: parsed_mod_note = str(action.mod_note).strip() - elif hasattr(action, 'details') and action.details: + elif hasattr(action, "details") and action.details: parsed_mod_note = str(action.details).strip() - - if hasattr(action, 'details') and action.details: + + if hasattr(action, "details") and action.details: reason_text = str(action.details).strip() - if action.action in ['addremovalreason']: + if action.action in ["addremovalreason"]: reason_text = parsed_mod_note if parsed_mod_note else reason_text elif parsed_mod_note: reason_text = parsed_mod_note - + content_id = "-" - if hasattr(action, 'target_permalink') and action.target_permalink: + if hasattr(action, "target_permalink") and action.target_permalink: extracted_id = extract_content_id_from_permalink(action.target_permalink) if extracted_id: - content_id = extracted_id.replace('t3_', '').replace('t1_', '')[:8] - + content_id = extracted_id.replace("t3_", "").replace("t1_", "")[:8] + display_action = action.action - if action.action in REMOVAL_ACTIONS and get_moderator_name(action, False) == 'AutoModerator': + if action.action in REMOVAL_ACTIONS and get_moderator_name(action, False) == "AutoModerator": display_action = f"filter-{action.action}" - + return { - 'time': get_action_datetime(action).strftime('%H:%M:%S UTC'), - 'action': display_action, - 'id': content_id, - 'moderator': get_moderator_name(action, config.get('anonymize_moderators', True)) or 'Unknown', - 'content': format_content_link(action), - 'reason': sanitize_for_markdown(str(reason_text)), - 'inquire': generate_modmail_link(config['source_subreddit'], action) + "time": get_action_datetime(action).strftime("%H:%M:%S UTC"), + "action": display_action, + "id": content_id, + "moderator": get_moderator_name(action, config.get("anonymize_moderators", True)) or "Unknown", + "content": format_content_link(action), + "reason": sanitize_for_markdown(str(reason_text)), + "inquire": generate_modmail_link(config["source_subreddit"], action), } + def generate_modmail_link(subreddit: str, action) -> str: """Generate modmail link for user inquiries with content ID for tracking""" from urllib.parse import quote - + # Determine removal type like main branch type_map = { - 'removelink': 'Post', - 'removepost': 'Post', - 'removecomment': 'Comment', - 'spamlink': 'Spam Post', - 'spamcomment': 'Spam Comment', - 'removecontent': 'Content', - 'addremovalreason': 'Removal Reason', + "removelink": "Post", + "removepost": "Post", + "removecomment": "Comment", + "spamlink": "Spam Post", + "spamcomment": "Spam Comment", + "removecontent": "Content", + "addremovalreason": "Removal Reason", } - removal_type = type_map.get(action.action, 'Content') - + removal_type = type_map.get(action.action, "Content") + # Get content ID for tracking content_id = "-" - if hasattr(action, 'target_permalink') and action.target_permalink: + if hasattr(action, "target_permalink") and action.target_permalink: extracted_id = extract_content_id_from_permalink(action.target_permalink) if extracted_id: - content_id = extracted_id.replace('t3_', '').replace('t1_', '')[:8] - + content_id = extracted_id.replace("t3_", "").replace("t1_", "")[:8] + # Get title and truncate if needed - if hasattr(action, 'target_title') and action.target_title: + if hasattr(action, "target_title") and action.target_title: title = action.target_title else: - title = f"Content by u/{action.target_author}" if hasattr(action, 'target_author') and action.target_author else "Unknown content" - + title = f"Content by u/{action.target_author}" if hasattr(action, "target_author") and action.target_author else "Unknown content" + # Truncate title if too long max_title_length = 50 if len(title) > max_title_length: - title = title[:max_title_length-3] + "..." - + title = title[: max_title_length - 3] + "..." + # Get URL url = "" - if hasattr(action, 'target_permalink_cached') and action.target_permalink_cached: + if hasattr(action, "target_permalink_cached") and action.target_permalink_cached: url = action.target_permalink_cached - elif hasattr(action, 'target_permalink') and action.target_permalink: - url = f"https://www.reddit.com{action.target_permalink}" if not action.target_permalink.startswith('http') else action.target_permalink - + elif hasattr(action, "target_permalink") and action.target_permalink: + url = f"https://www.reddit.com{action.target_permalink}" if not action.target_permalink.startswith("http") else action.target_permalink + # Create subject line with content ID for tracking subject = f"{removal_type} Removal Inquiry - {title} [ID: {content_id}]" - + # Create body with content ID for easier modmail tracking body = ( f"Hello Moderators of /r/{subreddit},\n\n" @@ -911,100 +985,104 @@ def generate_modmail_link(subreddit: str, action) -> str: "Please provide details regarding this action.\n\n" "Thank you!" ) - + modmail_url = f"https://www.reddit.com/message/compose?to=/r/{subreddit}&subject={quote(subject)}&message={quote(body)}" return f"[Contact Mods]({modmail_url})" + def build_wiki_content(actions: List, config: Dict[str, Any]) -> str: """Build wiki page content from actions""" # Add timestamp header at the top current_time = datetime.now(timezone.utc) timestamp_header = f"**Last Updated:** {current_time.strftime('%Y-%m-%d %H:%M:%S UTC')}\n\n---\n\n" - + if not actions: return timestamp_header + "No recent moderation actions found." - + # CRITICAL: Validate all actions belong to the same subreddit before building content - target_subreddit = config.get('source_subreddit', '') + target_subreddit = config.get("source_subreddit", "") mixed_subreddits = set() - + for action in actions: # Check if action has subreddit info and if it matches (case-insensitive) - if hasattr(action, 'subreddit') and action.subreddit: + if hasattr(action, "subreddit") and action.subreddit: if action.subreddit.lower() != target_subreddit.lower(): mixed_subreddits.add(action.subreddit) - + if mixed_subreddits: logger.error(f"CRITICAL: Mixed subreddit data in actions for {target_subreddit}: {mixed_subreddits}") raise ValueError(f"Cannot build wiki content - mixed subreddit data detected: {mixed_subreddits}") - + filtered_actions = [] for action in actions: if action.action in APPROVAL_ACTIONS: should_include = False content_id = extract_content_id_from_permalink(get_target_permalink(action)) if content_id: - content_id = content_id.replace('t3_', '').replace('t1_', '') - + content_id = content_id.replace("t3_", "").replace("t1_", "") + if content_id: try: conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() - - removal_placeholders = ','.join(['?'] * len(REMOVAL_ACTIONS)) - cursor.execute(f""" - SELECT moderator, removal_reason FROM processed_actions + + removal_placeholders = ",".join(["?"] * len(REMOVAL_ACTIONS)) + cursor.execute( + f""" + SELECT moderator, removal_reason FROM processed_actions WHERE target_permalink LIKE ? AND action_type IN ({removal_placeholders}) AND LOWER(moderator) IN ('reddit', 'automoderator') ORDER BY created_at DESC LIMIT 1 - """, [f'%{content_id}%'] + REMOVAL_ACTIONS) - + """, + [f"%{content_id}%"] + REMOVAL_ACTIONS, + ) + prior_removal = cursor.fetchone() conn.close() - + if prior_removal: should_include = True original_moderator, original_reason = prior_removal - + approval_reason = f"Approved {original_moderator} removal" if original_reason and original_reason.strip() and original_reason != "-": approval_reason += f": {original_reason.strip()}" - + action.approval_context = approval_reason logger.debug(f"Including approval {action.id} - content {content_id} was previously removed by {original_moderator}") else: logger.debug(f"Excluding approval {action.id} - no prior Reddit/AutoMod removal found for content {content_id}") - + except Exception as e: logger.warning(f"Error checking prior removals for approval {action.id}: {e}") should_include = False - + if should_include: filtered_actions.append(action) else: filtered_actions.append(action) - + actions = filtered_actions - + combined_actions = [] - actions_by_target = {} - + actions_by_target: Dict[str, List[Any]] = {} + for action in actions: content_id = extract_content_id_from_permalink(get_target_permalink(action)) if content_id: - content_id = content_id.replace('t3_', '').replace('t1_', '') + content_id = content_id.replace("t3_", "").replace("t1_", "") else: content_id = extract_target_id(action) - + if content_id not in actions_by_target: actions_by_target[content_id] = [] actions_by_target[content_id].append(action) - + for target_id, target_actions in actions_by_target.items(): removal_action = None removal_reason_action = None other_actions = [] - + for action in target_actions: if action.action in REMOVAL_ACTIONS: if not removal_action: @@ -1018,38 +1096,38 @@ def build_wiki_content(actions: List, config: Dict[str, Any]) -> str: other_actions.append(action) else: other_actions.append(action) - + if removal_action and removal_reason_action: - if hasattr(removal_reason_action, 'details') and removal_reason_action.details: + if hasattr(removal_reason_action, "details") and removal_reason_action.details: removal_action.combined_reason = removal_reason_action.details - elif hasattr(removal_reason_action, 'mod_note') and removal_reason_action.mod_note: + elif hasattr(removal_reason_action, "mod_note") and removal_reason_action.mod_note: removal_action.combined_reason = removal_reason_action.mod_note - + combined_actions.append(removal_action) else: if removal_action: combined_actions.append(removal_action) if removal_reason_action: combined_actions.append(removal_reason_action) - + combined_actions.extend(other_actions) - + actions = combined_actions - + # Enforce wiki entry limits - max_entries = get_config_with_default(config, 'max_wiki_entries_per_page') + max_entries = get_config_with_default(config, "max_wiki_entries_per_page") if len(actions) > max_entries: logger.warning(f"Truncating wiki content to {max_entries} entries (was {len(actions)})") actions = actions[:max_entries] - + # Group actions by date - actions_by_date = {} + actions_by_date: Dict[str, List[Any]] = {} for action in actions: - date_str = get_action_datetime(action).strftime('%Y-%m-%d') + date_str = get_action_datetime(action).strftime("%Y-%m-%d") if date_str not in actions_by_date: actions_by_date[date_str] = [] actions_by_date[date_str].append(action) - + # Build content - include ID column for tracking actions across the table # Reddit wiki page size limit (512 KB) REDDIT_WIKI_LIMIT = 524288 @@ -1075,11 +1153,11 @@ def build_wiki_content(actions: List, config: Dict[str, Any]) -> str: # Check size and trim if necessary included_dates = [] - current_size = len((timestamp_header + "\n".join(footer_parts)).encode('utf-8')) + current_size = len((timestamp_header + "\n".join(footer_parts)).encode("utf-8")) skipped_days = 0 for i, date_content in enumerate(full_content_parts): - test_size = current_size + len(date_content.encode('utf-8')) + test_size = current_size + len(date_content.encode("utf-8")) # If adding this date would exceed the warning threshold, stop adding dates if test_size > WARNING_THRESHOLD: @@ -1101,7 +1179,7 @@ def build_wiki_content(actions: List, config: Dict[str, Any]) -> str: content_parts.extend(footer_parts) final_content = "\n".join(content_parts) - final_size = len(final_content.encode('utf-8')) + final_size = len(final_content.encode("utf-8")) if skipped_days > 0: logger.info(f"Wiki content size after trimming: {final_size:,} bytes ({(final_size/REDDIT_WIKI_LIMIT)*100:.1f}% of limit)") @@ -1109,17 +1187,18 @@ def build_wiki_content(actions: List, config: Dict[str, Any]) -> str: return final_content + def setup_reddit_client(config: Dict[str, Any]): """Initialize Reddit API client""" try: reddit = praw.Reddit( - client_id=config['reddit']['client_id'], - client_secret=config['reddit']['client_secret'], - username=config['reddit']['username'], - password=config['reddit']['password'], - user_agent=f"ModlogWikiPublisher/2.0 by /u/{config['reddit']['username']}" + client_id=config["reddit"]["client_id"], + client_secret=config["reddit"]["client_secret"], + username=config["reddit"]["username"], + password=config["reddit"]["password"], + user_agent=f"ModlogWikiPublisher/2.0 by /u/{config['reddit']['username']}", ) - + # Test authentication me = reddit.user.me() logger.info(f"Successfully authenticated as: /u/{me.name}") @@ -1128,6 +1207,7 @@ def setup_reddit_client(config: Dict[str, Any]): logger.error(f"Failed to authenticate with Reddit: {e}") raise + def update_wiki_page(reddit, subreddit_name: str, wiki_page: str, content: str, force: bool = False): """Update wiki page with content, using hash caching to avoid unnecessary updates""" try: @@ -1135,7 +1215,7 @@ def update_wiki_page(reddit, subreddit_name: str, wiki_page: str, content: str, REDDIT_WIKI_LIMIT = 524288 # Check content size - content_size = len(content.encode('utf-8')) + content_size = len(content.encode("utf-8")) if content_size > REDDIT_WIKI_LIMIT: logger.error(f"Wiki content size ({content_size:,} bytes) exceeds Reddit's limit ({REDDIT_WIKI_LIMIT:,} bytes)") logger.error(f"Content is {content_size - REDDIT_WIKI_LIMIT:,} bytes over the limit") @@ -1164,7 +1244,7 @@ def update_wiki_page(reddit, subreddit_name: str, wiki_page: str, content: str, subreddit = reddit.subreddit(subreddit_name) try: existing_wiki = subreddit.wiki[wiki_page] - existing_size = len(existing_wiki.content_md.encode('utf-8')) + existing_size = len(existing_wiki.content_md.encode("utf-8")) logger.debug(f"Existing wiki page size: {existing_size:,} bytes") # If new content would make page exceed limit, we need to handle it @@ -1183,10 +1263,7 @@ def update_wiki_page(reddit, subreddit_name: str, wiki_page: str, content: str, # Update the wiki page logger.info(f"Attempting to update wiki page with {content_size:,} bytes of content") - subreddit.wiki[wiki_page].edit( - content=content, - reason="Automated modlog update" - ) + subreddit.wiki[wiki_page].edit(content=content, reason="Automated modlog update") # Update the cached hash update_cached_wiki_hash(subreddit_name, wiki_page, content_hash) @@ -1205,7 +1282,7 @@ def update_wiki_page(reddit, subreddit_name: str, wiki_page: str, content: str, logger.error(f"Reddit API error updating wiki page: {', '.join(error_messages)}") # Check if it's a size-related error - if any('too long' in msg.lower() or 'size' in msg.lower() for msg in error_messages): + if any("too long" in msg.lower() or "size" in msg.lower() for msg in error_messages): logger.error(f"Wiki content size ({content_size:,} bytes) likely exceeds Reddit's limit") logger.error("Try reducing retention_days or max_wiki_entries_per_page in config") @@ -1230,7 +1307,7 @@ def update_wiki_page(reddit, subreddit_name: str, wiki_page: str, content: str, # Try to check existing page size for context try: existing_wiki = subreddit.wiki[wiki_page] - existing_size = len(existing_wiki.content_md.encode('utf-8')) + existing_size = len(existing_wiki.content_md.encode("utf-8")) logger.error(f"Current wiki page size: {existing_size:,} bytes") if existing_size > REDDIT_WIKI_LIMIT * 0.95: logger.error("Wiki page is already near Reddit's size limit!") @@ -1246,152 +1323,164 @@ def update_wiki_page(reddit, subreddit_name: str, wiki_page: str, content: str, raise + def process_modlog_actions(reddit, config: Dict[str, Any]) -> List: """Fetch and process new modlog actions""" try: # Validate batch size - batch_size = validate_config_value('batch_size', config.get('batch_size', 50), CONFIG_LIMITS) - if batch_size != config.get('batch_size'): - config['batch_size'] = batch_size - - subreddit = reddit.subreddit(config['source_subreddit']) - ignored_mods = set(config.get('ignored_moderators', [])) - + batch_size = validate_config_value("batch_size", config.get("batch_size", 50), CONFIG_LIMITS) + if batch_size != config.get("batch_size"): + config["batch_size"] = batch_size + + subreddit = reddit.subreddit(config["source_subreddit"]) + ignored_mods = set(config.get("ignored_moderators", [])) + new_actions = [] processed_count = 0 - + logger.info(f"Fetching modlog entries from /r/{config['source_subreddit']}") - + # Get configurable list of actions to show in wiki - wiki_actions = set(config.get('wiki_actions', DEFAULT_WIKI_ACTIONS)) - + wiki_actions = set(config.get("wiki_actions", DEFAULT_WIKI_ACTIONS)) + for action in subreddit.mod.log(limit=batch_size): mod_name = get_moderator_name(action, False) # Use actual name for ignore check if mod_name and mod_name in ignored_mods: continue - + if is_duplicate_action(action.id): continue - + # Store ALL actions to database to prevent duplicates - store_processed_action(action, config['source_subreddit']) + store_processed_action(action, config["source_subreddit"]) processed_count += 1 - + # Only include specific action types in the wiki display if action.action in wiki_actions: new_actions.append(action) - + if processed_count >= batch_size: break - + logger.info(f"Processed {processed_count} new modlog actions") return new_actions except Exception as e: logger.error(f"Error processing modlog actions: {e}") raise + def load_env_config() -> Dict[str, Any]: """Load configuration from environment variables""" - env_config = {} - + env_config: Dict[str, Any] = {} + # Reddit credentials - reddit_config = {} - if os.getenv('REDDIT_CLIENT_ID'): - reddit_config['client_id'] = os.getenv('REDDIT_CLIENT_ID') - if os.getenv('REDDIT_CLIENT_SECRET'): - reddit_config['client_secret'] = os.getenv('REDDIT_CLIENT_SECRET') - if os.getenv('REDDIT_USERNAME'): - reddit_config['username'] = os.getenv('REDDIT_USERNAME') - if os.getenv('REDDIT_PASSWORD'): - reddit_config['password'] = os.getenv('REDDIT_PASSWORD') - + reddit_config: Dict[str, Any] = {} + if os.getenv("REDDIT_CLIENT_ID"): + reddit_config["client_id"] = os.getenv("REDDIT_CLIENT_ID") + if os.getenv("REDDIT_CLIENT_SECRET"): + reddit_config["client_secret"] = os.getenv("REDDIT_CLIENT_SECRET") + if os.getenv("REDDIT_USERNAME"): + reddit_config["username"] = os.getenv("REDDIT_USERNAME") + if os.getenv("REDDIT_PASSWORD"): + reddit_config["password"] = os.getenv("REDDIT_PASSWORD") + if reddit_config: - env_config['reddit'] = reddit_config - + env_config["reddit"] = reddit_config + # Application settings - if os.getenv('SOURCE_SUBREDDIT'): - env_config['source_subreddit'] = os.getenv('SOURCE_SUBREDDIT') - if os.getenv('WIKI_PAGE'): - env_config['wiki_page'] = os.getenv('WIKI_PAGE') - if os.getenv('RETENTION_DAYS'): - env_config['retention_days'] = int(os.getenv('RETENTION_DAYS')) - if os.getenv('BATCH_SIZE'): - env_config['batch_size'] = int(os.getenv('BATCH_SIZE')) - if os.getenv('UPDATE_INTERVAL'): - env_config['update_interval'] = int(os.getenv('UPDATE_INTERVAL')) - if os.getenv('ANONYMIZE_MODERATORS'): - env_config['anonymize_moderators'] = os.getenv('ANONYMIZE_MODERATORS').lower() == 'true' - + source_subreddit = os.getenv("SOURCE_SUBREDDIT") + if source_subreddit: + env_config["source_subreddit"] = source_subreddit + wiki_page = os.getenv("WIKI_PAGE") + if wiki_page: + env_config["wiki_page"] = wiki_page + retention_days = os.getenv("RETENTION_DAYS") + if retention_days: + env_config["retention_days"] = int(retention_days) + batch_size = os.getenv("BATCH_SIZE") + if batch_size: + env_config["batch_size"] = int(batch_size) + update_interval = os.getenv("UPDATE_INTERVAL") + if update_interval: + env_config["update_interval"] = int(update_interval) + anonymize_moderators = os.getenv("ANONYMIZE_MODERATORS") + if anonymize_moderators: + env_config["anonymize_moderators"] = anonymize_moderators.lower() == "true" + # Wiki actions (comma-separated list) - if os.getenv('WIKI_ACTIONS'): + wiki_actions = os.getenv("WIKI_ACTIONS") + if wiki_actions: try: - raw_actions = [action.strip() for action in os.getenv('WIKI_ACTIONS').split(',')] - env_config['wiki_actions'] = validate_wiki_actions(raw_actions) + raw_actions = [action.strip() for action in wiki_actions.split(",")] + env_config["wiki_actions"] = validate_wiki_actions(raw_actions) except ValueError as e: logger.error(f"WIKI_ACTIONS environment variable invalid: {e}") raise - + # Ignored moderators (comma-separated list) - if os.getenv('IGNORED_MODERATORS'): - env_config['ignored_moderators'] = [mod.strip() for mod in os.getenv('IGNORED_MODERATORS').split(',')] - + ignored_moderators = os.getenv("IGNORED_MODERATORS") + if ignored_moderators: + env_config["ignored_moderators"] = [mod.strip() for mod in ignored_moderators.split(",")] + return env_config + def load_config(config_path: str, auto_update: bool = True) -> Dict[str, Any]: """Load and validate configuration from file and environment variables""" try: # Load existing config from file original_config = {} config_updated = False - + try: - with open(config_path, 'r') as f: + with open(config_path, "r") as f: original_config = json.load(f) except FileNotFoundError: logger.warning(f"Config file not found: {config_path}, using environment variables only") original_config = {} - + # Override with environment variables env_config = load_env_config() if env_config: logger.info("Using environment variable overrides for configuration") original_config.update(env_config) - + # Store original config for comparison config_before = original_config.copy() - + # Apply defaults and validate limits config = apply_config_defaults_and_limits(original_config) - + # Check if any new defaults were added for key, limits in CONFIG_LIMITS.items(): if key not in config_before: config_updated = True logger.info(f"Added new configuration field '{key}' with default value: {limits['default']}") - + # Auto-update config file if new defaults were added and auto_update is enabled if config_updated and auto_update: try: # Create backup of original config backup_path = f"{config_path}.backup" import shutil + shutil.copy2(config_path, backup_path) logger.info(f"Created backup of original config: {backup_path}") - + # Write updated config - with open(config_path, 'w') as f: + with open(config_path, "w") as f: json.dump(config, f, indent=2) logger.info(f"Auto-updated config file '{config_path}' with new defaults") - + except Exception as e: logger.warning(f"Could not auto-update config file: {e}") logger.info("Configuration will still work with in-memory defaults") elif config_updated and not auto_update: logger.info("Config file updates available but auto-update disabled. Run without --no-auto-update-config to update.") - + logger.info("Configuration loaded and validated successfully") return config - + except json.JSONDecodeError as e: logger.error(f"Invalid JSON in config file: {e}") raise @@ -1400,91 +1489,43 @@ def load_config(config_path: str, auto_update: bool = True) -> Dict[str, Any]: logger.error("Please check your configuration file format and required fields") raise + def create_argument_parser(): """Create command line argument parser""" - parser = argparse.ArgumentParser( - description='Reddit Modlog Wiki Publisher', - formatter_class=argparse.RawDescriptionHelpFormatter - ) - - parser.add_argument( - '--config', default='config.json', - help='Path to configuration file' - ) - parser.add_argument( - '--source-subreddit', - help='Source subreddit name' - ) - parser.add_argument( - '--wiki-page', default='modlog', - help='Wiki page name' - ) - parser.add_argument( - '--retention-days', type=int, - help='Database retention period in days' - ) - parser.add_argument( - '--batch-size', type=int, - help='Number of entries to fetch per run' - ) - parser.add_argument( - '--interval', type=int, - help='Update interval in seconds for continuous mode' - ) - parser.add_argument( - '--continuous', action='store_true', - help='Run continuously with interval updates' - ) - parser.add_argument( - '--test', action='store_true', - help='Test configuration and Reddit API access' - ) - parser.add_argument( - '--debug', action='store_true', - help='Enable debug logging' - ) - parser.add_argument( - '--show-config-limits', action='store_true', - help='Show configuration limits and defaults' - ) - parser.add_argument( - '--force-migrate', action='store_true', - help='Force database migration (use with caution)' - ) - parser.add_argument( - '--no-auto-update-config', action='store_true', - help='Disable automatic config file updates' - ) - parser.add_argument( - '--force-modlog', action='store_true', - help='Fetch ALL modlog actions from Reddit API and completely rebuild wiki from database' - ) - parser.add_argument( - '--force-wiki', action='store_true', - help='Force wiki page update even if content appears unchanged (bypasses hash check)' - ) - parser.add_argument( - '--force-all', action='store_true', - help='Equivalent to --force-modlog + --force-wiki (complete rebuild and force update)' - ) - + parser = argparse.ArgumentParser(description="Reddit Modlog Wiki Publisher", formatter_class=argparse.RawDescriptionHelpFormatter) + + parser.add_argument("--config", default="config.json", help="Path to configuration file") + parser.add_argument("--source-subreddit", help="Source subreddit name") + parser.add_argument("--wiki-page", default="modlog", help="Wiki page name") + parser.add_argument("--retention-days", type=int, help="Database retention period in days") + parser.add_argument("--batch-size", type=int, help="Number of entries to fetch per run") + parser.add_argument("--interval", type=int, help="Update interval in seconds for continuous mode") + parser.add_argument("--continuous", action="store_true", help="Run continuously with interval updates") + parser.add_argument("--test", action="store_true", help="Test configuration and Reddit API access") + parser.add_argument("--debug", action="store_true", help="Enable debug logging") + parser.add_argument("--show-config-limits", action="store_true", help="Show configuration limits and defaults") + parser.add_argument("--force-migrate", action="store_true", help="Force database migration (use with caution)") + parser.add_argument("--no-auto-update-config", action="store_true", help="Disable automatic config file updates") + parser.add_argument("--force-modlog", action="store_true", help="Fetch ALL modlog actions from Reddit API and completely rebuild wiki from database") + parser.add_argument("--force-wiki", action="store_true", help="Force wiki page update even if content appears unchanged (bypasses hash check)") + parser.add_argument("--force-all", action="store_true", help="Equivalent to --force-modlog + --force-wiki (complete rebuild and force update)") + return parser + def setup_logging(debug: bool = False): """Setup logging configuration""" os.makedirs(LOGS_DIR, exist_ok=True) - + level = logging.DEBUG if debug else logging.INFO - logging.basicConfig( - level=level, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' - ) - + logging.basicConfig(level=level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") + # Set prawcore and urllib3 to TRACE level for Reddit API debugging when debug is enabled if debug: logging.getLogger("prawcore").setLevel(5) # TRACE level (below DEBUG) logging.getLogger("urllib3.connectionpool").setLevel(5) # TRACE level + def show_config_limits(): """Display configuration limits and defaults""" print("Configuration Limits and Defaults:") @@ -1495,7 +1536,7 @@ def show_config_limits(): print(f" Minimum: {limits['min']}") print(f" Maximum: {limits['max']}") print() - + print("Required Configuration Fields:") print("- reddit.client_id") print("- reddit.client_secret") @@ -1503,166 +1544,166 @@ def show_config_limits(): print("- reddit.password") print("- source_subreddit") + def run_continuous_mode(reddit, config: Dict[str, Any], force: bool = False): """Run in continuous monitoring mode""" logger.info("Starting continuous mode...") - + error_count = 0 - max_errors = get_config_with_default(config, 'max_continuous_errors') + max_errors = get_config_with_default(config, "max_continuous_errors") first_run_force = force - + while True: try: error_count = 0 # Reset on successful run new_actions = process_modlog_actions(reddit, config) - + if new_actions: logger.info(f"Processed {len(new_actions)} new modlog actions") - + # Always rebuild wiki from ALL relevant actions in database (within retention period) # This matches the behavior of single-run mode all_actions = get_recent_actions_from_db(config, force_all_actions=False, show_only_removals=True) if all_actions: logger.info(f"Found {len(all_actions)} total actions in database for wiki update") content = build_wiki_content(all_actions, config) - wiki_page = config.get('wiki_page', 'modlog') - update_wiki_page(reddit, config['source_subreddit'], wiki_page, content, force=first_run_force) + wiki_page = config.get("wiki_page", "modlog") + update_wiki_page(reddit, config["source_subreddit"], wiki_page, content, force=first_run_force) first_run_force = False else: logger.warning("No actions found in database for wiki update") - - cleanup_old_entries(get_config_with_default(config, 'retention_days')) - - interval = validate_config_value('update_interval', - get_config_with_default(config, 'update_interval'), - CONFIG_LIMITS) + + cleanup_old_entries(get_config_with_default(config, "retention_days")) + + interval = validate_config_value("update_interval", get_config_with_default(config, "update_interval"), CONFIG_LIMITS) logger.info(f"Waiting {interval} seconds until next update...") time.sleep(interval) - + except KeyboardInterrupt: logger.info("Received interrupt signal, shutting down...") break except Exception as e: error_count += 1 logger.error(f"Error in continuous mode (attempt {error_count}/{max_errors}): {e}") - + if error_count >= max_errors: logger.error(f"Maximum error count ({max_errors}) reached, shutting down") break - + # Exponential backoff for errors wait_time = min(BASE_BACKOFF_WAIT * (2 ** (error_count - 1)), MAX_BACKOFF_WAIT) # Max 5 minutes logger.info(f"Waiting {wait_time} seconds before retry...") time.sleep(wait_time) + def main(): parser = create_argument_parser() args = parser.parse_args() - + setup_logging(args.debug) - + try: # Show configuration limits if requested if args.show_config_limits: show_config_limits() return - + # Force migration if requested if args.force_migrate: logger.info("Forcing database migration...") migrate_database() logger.info("Database migration completed") return - + setup_database() - + config = load_config(args.config, auto_update=not args.no_auto_update_config) - + # Override config with CLI args if args.source_subreddit: - config['source_subreddit'] = args.source_subreddit + config["source_subreddit"] = args.source_subreddit if args.wiki_page: - config['wiki_page'] = args.wiki_page + config["wiki_page"] = args.wiki_page if args.retention_days is not None: - config['retention_days'] = args.retention_days + config["retention_days"] = args.retention_days if args.batch_size is not None: - config['batch_size'] = args.batch_size + config["batch_size"] = args.batch_size if args.interval is not None: - config['update_interval'] = args.interval - + config["update_interval"] = args.interval + reddit = setup_reddit_client(config) - + if args.test: logger.info("Running connection test...") # Basic test - try to fetch one modlog entry - subreddit = reddit.subreddit(config['source_subreddit']) + subreddit = reddit.subreddit(config["source_subreddit"]) test_entry = next(subreddit.mod.log(limit=1), None) if test_entry: logger.info("✓ Successfully connected and can read modlog") else: logger.warning("⚠ Connected but no modlog entries found") return - + # Handle force commands if args.force_all: args.force_modlog = True args.force_wiki = True logger.info("Force all requested - will fetch from Reddit AND force wiki update") - + if args.force_modlog: logger.info("Force modlog requested - fetching ALL modlog actions from Reddit and rebuilding wiki...") # First, fetch all recent modlog actions to populate database logger.info("Fetching all modlog actions from Reddit...") process_modlog_actions(reddit, config) - + # Then rebuild wiki from database (showing only removal actions) logger.info("Rebuilding wiki from database...") - actions = get_recent_actions_from_db(config, force_all_actions=False,show_only_removals=True) + actions = get_recent_actions_from_db(config, force_all_actions=False, show_only_removals=True) if actions: logger.info(f"Found {len(actions)} removal actions in database for wiki") content = build_wiki_content(actions, config) - wiki_page = config.get('wiki_page', 'modlog') - update_wiki_page(reddit, config['source_subreddit'], wiki_page, content, force=args.force_wiki) + wiki_page = config.get("wiki_page", "modlog") + update_wiki_page(reddit, config["source_subreddit"], wiki_page, content, force=args.force_wiki) else: logger.warning("No removal actions found in database for wiki refresh") return - - # Handle force-wiki: rebuild from database without hitting modlog API + + # Handle force-wiki: rebuild from database without hitting modlog API if args.force_wiki and not args.force_modlog: logger.info("Force wiki requested - rebuilding from database without API calls") actions = get_recent_actions_from_db(config, force_all_actions=False) if actions: logger.info(f"Found {len(actions)} actions in database for wiki rebuild") content = build_wiki_content(actions, config) - wiki_page = config.get('wiki_page', 'modlog') - update_wiki_page(reddit, config['source_subreddit'], wiki_page, content, force=True) + wiki_page = config.get("wiki_page", "modlog") + update_wiki_page(reddit, config["source_subreddit"], wiki_page, content, force=True) else: logger.warning("No actions found in database for wiki rebuild") return - + # Process modlog actions (normal operation) new_actions = process_modlog_actions(reddit, config) - + if new_actions: logger.info(f"Processed {len(new_actions)} new modlog actions") - + # Always rebuild wiki from ALL relevant actions in database (within retention period) all_actions = get_recent_actions_from_db(config, force_all_actions=False, show_only_removals=True) if all_actions: logger.info(f"Found {len(all_actions)} total actions in database for wiki update") content = build_wiki_content(all_actions, config) - wiki_page = config.get('wiki_page', 'modlog') - update_wiki_page(reddit, config['source_subreddit'], wiki_page, content, force=args.force_wiki) + wiki_page = config.get("wiki_page", "modlog") + update_wiki_page(reddit, config["source_subreddit"], wiki_page, content, force=args.force_wiki) else: logger.warning("No actions found in database for wiki update") - - cleanup_old_entries(get_config_with_default(config, 'retention_days')) - + + cleanup_old_entries(get_config_with_default(config, "retention_days")) + if args.continuous: run_continuous_mode(reddit, config, force=args.force_wiki) else: logger.info("Single run completed") - + except KeyboardInterrupt: logger.info("Received interrupt signal, shutting down...") sys.exit(0) @@ -1670,5 +1711,6 @@ def main(): logger.error(f"Fatal error: {e}") sys.exit(1) + if __name__ == "__main__": main() diff --git a/renovate.json b/renovate.json index 379b007..dc165c7 100644 --- a/renovate.json +++ b/renovate.json @@ -8,7 +8,7 @@ "labels": ["dependencies"], "assigneesFromCodeOwners": true, "reviewersFromCodeOwners": true, - + "packageRules": [ { "description": "Python dependencies - group minor and patch updates", @@ -48,14 +48,14 @@ "prConcurrentLimit": 3, "prHourlyLimit": 2, - + "commitMessageTopic": "{{depName}}", "commitMessageAction": "update", "commitMessageExtra": "to {{#if isPinDigest}}{{{newDigestShort}}}{{else}}{{#if isMajor}}{{prettyNewMajor}}{{else}}{{#if isSingleVersion}}{{prettyNewVersion}}{{else}}{{#if newValue}}{{{newValue}}}{{else}}{{{newDigestShort}}}{{/if}}{{/if}}{{/if}}{{/if}}", - + "prTitle": "{{#if isPin}}Pin{{else}}Update{{/if}} {{depName}} {{#if isMajor}}(major){{else}}{{#if isMinor}}(minor){{else}}{{#if isPatch}}(patch){{/if}}{{/if}}{{/if}}", "prBodyTemplate": "{{{header}}}{{{table}}}{{{notes}}}{{{changelogs}}}{{{configDescription}}}{{{controls}}}{{{footer}}}", - + "enabledManagers": [ "dockerfile", "docker-compose", @@ -72,4 +72,4 @@ "platformAutomerge": false, "requiredStatusChecks": null -} \ No newline at end of file +} diff --git a/requirements.txt b/requirements.txt index 7710485..1b71b61 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -praw>=7.7.0 \ No newline at end of file +praw>=7.7.0 diff --git a/debug_auth.py b/scripts/debug_auth.py similarity index 74% rename from debug_auth.py rename to scripts/debug_auth.py index f4104fd..cc6d17c 100644 --- a/debug_auth.py +++ b/scripts/debug_auth.py @@ -1,13 +1,14 @@ #!/usr/bin/env python3 -import json import base64 +import json + import requests # Load config -with open('config.json') as f: +with open("config.json") as f: config = json.load(f) -reddit = config['reddit'] +reddit = config["reddit"] print("=" * 50) print("Reddit Auth Debug") @@ -19,29 +20,18 @@ print(f"Client Secret first 4: {reddit['client_secret'][:4]}...") # Check for common issues -if len(reddit['client_id']) > 20: +if len(reddit["client_id"]) > 20: print("⚠️ Client ID seems too long - might be using secret as ID?") -if len(reddit['client_secret']) < 20: +if len(reddit["client_secret"]) < 20: print("⚠️ Client Secret seems too short - might be using ID as secret?") # Test manual auth print("\nTesting manual authentication...") auth = base64.b64encode(f"{reddit['client_id']}:{reddit['client_secret']}".encode()).decode() -headers = { - "Authorization": f"Basic {auth}", - "User-Agent": f"ModlogWikiPublisher/1.0 by /u/{reddit['username']}" -} -data = { - "grant_type": "password", - "username": reddit['username'], - "password": reddit['password'] -} - -response = requests.post( - "https://www.reddit.com/api/v1/access_token", - headers=headers, - data=data -) +headers = {"Authorization": f"Basic {auth}", "User-Agent": f"ModlogWikiPublisher/1.0 by /u/{reddit['username']}"} +data = {"grant_type": "password", "username": reddit["username"], "password": reddit["password"]} + +response = requests.post("https://www.reddit.com/api/v1/access_token", headers=headers, data=data) print(f"\nResponse Status: {response.status_code}") print(f"Response Headers: {dict(response.headers)}") @@ -53,4 +43,4 @@ print("2. Client ID = the string under 'personal use script' (shorter)") print("3. Client Secret = the 'secret' field (longer)") print("4. Make sure the app type is 'script' not 'web app'") - print("5. Username should be just 'Bakerboy448' not 'u/Bakerboy448'") \ No newline at end of file + print("5. Username should be just 'Bakerboy448' not 'u/Bakerboy448'") diff --git a/systemd/install.sh b/systemd/install.sh index 3c961f4..ab2e14e 100755 --- a/systemd/install.sh +++ b/systemd/install.sh @@ -134,4 +134,4 @@ echo " tail -f /var/log/redditmodlog/usenet.log" echo "" echo "5. Check service status:" echo " systemctl status modlog@opensignups.service" -echo " systemctl status modlog@usenet.service" \ No newline at end of file +echo " systemctl status modlog@usenet.service" diff --git a/systemd/modlog@.service b/systemd/modlog@.service index 8833bb7..bdf5841 100644 --- a/systemd/modlog@.service +++ b/systemd/modlog@.service @@ -47,4 +47,4 @@ KillSignal=SIGTERM TimeoutStopSec=30 [Install] -WantedBy=multi-user.target \ No newline at end of file +WantedBy=multi-user.target diff --git a/systemd/redditmodlog.logrotate b/systemd/redditmodlog.logrotate index 66130b7..986bfaa 100644 --- a/systemd/redditmodlog.logrotate +++ b/systemd/redditmodlog.logrotate @@ -32,4 +32,4 @@ # Signal systemd to reopen log files systemctl reload-or-restart 'modlog-*.service' 2>/dev/null || true endscript -} \ No newline at end of file +} diff --git a/test_removal_reasons.py b/tests/test_removal_reasons.py similarity index 84% rename from test_removal_reasons.py rename to tests/test_removal_reasons.py index 3050621..df5e2ee 100644 --- a/test_removal_reasons.py +++ b/tests/test_removal_reasons.py @@ -3,71 +3,75 @@ Test script to verify removal reason processing without Reddit API calls Creates a local markdown file to demonstrate the functionality """ -import sqlite3 -from datetime import datetime, timezone import os +import sqlite3 import sys +from datetime import datetime # Add the current directory to path to import our module sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from modlog_wiki_publisher import * + # Mock Reddit action objects for testing class MockRedditAction: - def __init__(self, action_id, action_type, details, mod_name, target_type='post', target_id='abc123'): + def __init__(self, action_id, action_type, details, mod_name, target_type="post", target_id="abc123"): self.id = action_id self.action = action_type self.details = details self.created_utc = int(datetime.now().timestamp()) - + # Mock moderator class MockMod: def __init__(self, name): self.name = name + self.mod = MockMod(mod_name) - + # Mock targets based on type - if target_type == 'post': + if target_type == "post": self.target_submission = target_id self.target_comment = None - self.target_author = 'testuser' - self.target_title = 'Test Post Title' - self.target_permalink = f'/r/test/comments/{target_id}/test_post/' - elif target_type == 'comment': + self.target_author = "testuser" + self.target_title = "Test Post Title" + self.target_permalink = f"/r/test/comments/{target_id}/test_post/" + elif target_type == "comment": self.target_submission = None self.target_comment = target_id - self.target_author = 'testuser' + self.target_author = "testuser" self.target_title = None - self.target_permalink = f'/r/test/comments/parent123/test_post/{target_id}/' + self.target_permalink = f"/r/test/comments/parent123/test_post/{target_id}/" + def test_removal_reasons(): """Test removal reason processing and storage""" print("Testing Removal Reason Processing") print("=" * 50) - + # Clean up any existing test database test_db = "test_modlog.db" if os.path.exists(test_db): os.remove(test_db) - + # Override the global DB_PATH for testing global DB_PATH original_db_path = DB_PATH DB_PATH = test_db - + try: # Initialize test database print(" Setting up test database...") setup_database() - + # Verify table was created conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='processed_actions'") if not cursor.fetchone(): print(" Database table not found, creating manually...") - cursor.execute(""" + cursor.execute( + """ CREATE TABLE processed_actions ( id INTEGER PRIMARY KEY AUTOINCREMENT, action_id TEXT UNIQUE NOT NULL, @@ -81,10 +85,11 @@ def test_removal_reasons(): created_at INTEGER NOT NULL, processed_at INTEGER DEFAULT (strftime('%s', 'now')) ) - """) + """ + ) conn.commit() conn.close() - + # Test cases with different removal reasons test_actions = [ MockRedditAction("test1", "removelink", "Rule 1: No spam", "HumanMod1", "post", "post123"), @@ -94,85 +99,86 @@ def test_removal_reasons(): MockRedditAction("test5", "removelink", None, "HumanMod3", "post", "post111"), # No removal reason MockRedditAction("test6", "removecomment", " Rule 3: No off-topic ", "HumanMod2", "comment", "comment222"), # Test whitespace stripping ] - + print("\n1. Storing test actions...") for action in test_actions: print(f" Storing: {action.action} - '{action.details}'") store_processed_action(action) - + print("\n2. Verifying database storage...") conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() cursor.execute("SELECT action_id, action_type, removal_reason FROM processed_actions ORDER BY action_id") results = cursor.fetchall() conn.close() - + for action_id, action_type, removal_reason in results: print(f" {action_id}: {action_type} -> '{removal_reason}'") - + print("\n3. Testing wiki content generation...") - + # Create a mock config for testing mock_config = { - 'wiki_actions': ['removelink', 'removecomment', 'addremovalreason', 'spamlink'], - 'anonymize_moderators': True, - 'source_subreddit': 'test', - 'max_wiki_entries_per_page': 1000, - 'retention_days': 30 + "wiki_actions": ["removelink", "removecomment", "addremovalreason", "spamlink"], + "anonymize_moderators": True, + "source_subreddit": "test", + "max_wiki_entries_per_page": 1000, + "retention_days": 30, } - + # Get actions from database (simulating force refresh) actions = get_recent_actions_from_db(mock_config) print(f" Retrieved {len(actions)} actions from database") - + # Generate wiki content wiki_content = build_wiki_content(actions, mock_config) - + # Write to local markdown file output_file = "test_modlog_output.md" - with open(output_file, 'w', encoding='utf-8') as f: + with open(output_file, "w", encoding="utf-8") as f: f.write(wiki_content) - + print(f"\n4. Wiki content written to: {output_file}") print("\nFirst few lines of generated content:") print("-" * 40) - lines = wiki_content.split('\n') + lines = wiki_content.split("\n") for i, line in enumerate(lines[:15]): print(f"{i+1:2d}: {line}") if len(lines) > 15: print(" ... (truncated)") - + print("\n5. Checking removal reasons in wiki content...") if "Rule 1: No spam" in wiki_content: print(" ✓ Found 'Rule 1: No spam' in wiki content") else: print(" ❌ Missing 'Rule 1: No spam' in wiki content") - + if "Rule 2: Be civil" in wiki_content: print(" ✓ Found 'Rule 2: Be civil' in wiki content") else: print(" ❌ Missing 'Rule 2: Be civil' in wiki content") - + if "Rule 3: No off-topic" in wiki_content: print(" ✓ Found 'Rule 3: No off-topic' (whitespace stripped)") else: print(" ❌ Missing 'Rule 3: No off-topic' in wiki content") - + if "No reason" in wiki_content: print(" ✓ Found 'No reason' for action without details") else: print(" ❌ Missing 'No reason' fallback in wiki content") - + print(f"\nTest completed successfully!") print(f"Check '{output_file}' to see the full generated wiki content.") - + finally: # Restore original DB path DB_PATH = original_db_path - + # Clean up test database if os.path.exists(test_db): os.remove(test_db) + if __name__ == "__main__": - test_removal_reasons() \ No newline at end of file + test_removal_reasons()