diff --git a/ANALYSIS_README.md b/ANALYSIS_README.md new file mode 100644 index 000000000..eb78e4abc --- /dev/null +++ b/ANALYSIS_README.md @@ -0,0 +1,254 @@ +# Mass Repository CI/CD Analysis System + +Automated system to analyze all 956 repositories in the Zeeeepa organization for Enterprise CI/CD compatibility using Codegen agents and Repomix. + +## 🎯 Overview + +This system creates 956 independent Codegen agent runs that: +1. Use Repomix to analyze each repository's full codebase structure +2. Rate the repository on 8 Enterprise CI/CD criteria (1-10 scale) +3. Generate detailed ratings with recommendations +4. Commit all ratings to a single branch β†’ create one PR + +## πŸ“Š Rating Criteria + +Each repository is rated on: + +1. **Build System Maturity** - Build configuration and dependency management +2. **CI/CD Integration Readiness** - Automation and deployment capabilities +3. **Code Quality & Standards** - Linting, formatting, static analysis +4. **Documentation Quality** - README, API docs, setup instructions +5. **Containerization** - Docker configuration and practices +6. **Testing Infrastructure** - Unit, integration tests, coverage +7. **Security Practices** - Scanning, vulnerability checks, secrets management +8. **Enterprise Compatibility** - Licensing, scalability, enterprise features + +## πŸš€ Quick Start + +### Prerequisites + +```bash +# Install Codegen SDK +pip install codegen + +# Set API key +export CODEGEN_API_KEY='your-api-key-here' +``` + +### Step 1: Populate Repository List + +```bash +# Create repos_list.txt with all 956 repo names +# One repository name per line + +# Example content: +# -Linux- +# 1Panel +# 3x-ui +# ...etc +``` + +### Step 2: Run Analysis + +**Option A: Python Script (Recommended)** + +```bash +python3 mass_repo_analysis.py +``` + +**Option B: Bash Script** + +```bash +chmod +x create_analysis_agents.sh +./create_analysis_agents.sh +``` + +### Step 3: Monitor Progress + +- Agent runs process at ~30/minute +- Total time: ~30 minutes for 956 repos +- Monitor in Codegen dashboard +- All ratings pushed to single branch + +### Step 4: Create PR + +```bash +# Once all agents complete, create PR from the analysis branch +# Branch name format: analysis/cicd-ratings- +``` + +## πŸ“ Output Structure + +Each repository gets a rating file: + +``` +ratings/ +β”œβ”€β”€ -Linux-.json +β”œβ”€β”€ 1Panel.json +β”œβ”€β”€ 3x-ui.json +└── ...956 files total +``` + +### Rating File Format + +```json +{ + "repo_name": "example-repo", + "analyzed_at": "2025-12-26T01:30:00Z", + "overall_score": 7.5, + "ratings": { + "build_system": 8, + "cicd_readiness": 7, + "code_quality": 8, + "documentation": 7, + "containerization": 9, + "testing": 6, + "security": 7, + "enterprise": 8 + }, + "strengths": [ + "Excellent Docker configuration with multi-stage builds", + "Comprehensive CI/CD pipeline with automated testing", + "Well-documented API with examples" + ], + "weaknesses": [ + "Limited test coverage in backend modules", + "Missing security scanning in CI pipeline" + ], + "summary": "Solid enterprise-ready application with strong containerization and CI/CD practices. Main improvement area is expanding test coverage.", + "recommendations": [ + "Add security scanning (e.g., Trivy, Snyk) to CI pipeline", + "Increase backend test coverage to >80%", + "Document deployment process for production environments" + ], + "notable_files": [ + "Dockerfile", + ".github/workflows/ci.yml", + "docker-compose.yml" + ] +} +``` + +## βš™οΈ Configuration + +### Rate Limiting + +- **Default**: 30 agent runs per minute +- **Delay**: 2 seconds between requests +- **Pause**: 60 seconds after every 30 requests + +Modify in scripts: +- Python: `RATE_LIMIT = 30`, `DELAY = 2` +- Bash: `MAX_PER_MINUTE=30`, `DELAY_BETWEEN_REQUESTS=2` + +### Branch Naming + +Default format: `analysis/cicd-ratings-` + +Change by setting `BRANCH_NAME` variable in scripts. + +## πŸ”§ How It Works + +1. **Fetch Repository List** + - Loads all 956 repository names from `repos_list.txt` + +2. **Create Agent Runs** + - For each repository, creates a Codegen agent run + - Passes detailed analysis instructions + - Sets target branch for all commits + +3. **Agent Execution** + - Each agent independently: + - Uses Repomix to analyze the repository + - Evaluates against 8 criteria + - Generates structured JSON rating + - Commits rating to analysis branch + +4. **Consolidation** + - All 956 ratings collected in `ratings/` directory + - All changes in single branch + - Create one PR with complete analysis + +## πŸ“ Analysis Instructions Template + +Each agent receives: +- Repository name to analyze +- Target branch for commits +- 8 rating criteria with detailed descriptions +- Output format specification +- Instructions to use Repomix for analysis + +See `mass_repo_analysis.py` for full template. + +## 🎯 Success Criteria + +- βœ… All 956 repositories analyzed +- βœ… Each gets objective 1-10 ratings on 8 criteria +- βœ… Actionable recommendations provided +- βœ… All ratings in structured JSON format +- βœ… Single branch with all changes +- βœ… One PR for review + +## πŸ“Š Results Tracking + +Analysis results saved to: `analysis_results_.json` + +Contains: +- Start/completion timestamps +- Target branch name +- Successful agent runs (with run IDs) +- Failed runs (with error details) +- Summary statistics + +## 🚨 Troubleshooting + +### API Key Issues +```bash +# Verify API key is set +echo $CODEGEN_API_KEY + +# Test API access +curl -H "Authorization: Bearer $CODEGEN_API_KEY" \ + https://api.codegen.com/v1/user +``` + +### Rate Limiting +- Script automatically handles 30/minute limit +- If hitting issues, increase `DELAY` in scripts + +### Failed Runs +- Check `analysis_results_*.json` for failed repositories +- Retry failed repos individually +- Common causes: repo access, API errors, timeout + +## πŸ“š Related Files + +- `mass_repo_analysis.py` - Main Python implementation +- `create_analysis_agents.sh` - Bash alternative +- `analyze_all_repos.py` - Backup implementation +- `repos_list.txt` - Repository names (one per line) +- `ANALYSIS_README.md` - This file + +## 🀝 Contributing + +To add new rating criteria: +1. Update `ANALYSIS_INSTRUCTIONS` template +2. Add criteria to rating schema +3. Update output format in instructions +4. Document in this README + +## πŸ“„ License + +Same as parent repository. + +## πŸŽ‰ Credits + +Built for efficient mass repository analysis using: +- **Codegen SDK** - Agent orchestration +- **Repomix** - Codebase analysis +- **Enterprise CI/CD best practices** - Rating criteria + +--- + +**Ready to analyze 956 repositories in ~30 minutes!** πŸš€ + diff --git a/create_analysis_agents.sh b/create_analysis_agents.sh new file mode 100644 index 000000000..0b1c7ab9f --- /dev/null +++ b/create_analysis_agents.sh @@ -0,0 +1,193 @@ +#!/bin/bash +# Mass Repository CI/CD Analysis using Codegen Agent API +# Creates 956 agent runs sequentially with rate limiting + +set -euo pipefail + +# Configuration +CODEGEN_API_URL="${CODEGEN_API_URL:-https://api.codegen.com}" +CODEGEN_API_KEY="${CODEGEN_API_KEY:-}" +ORG_NAME="Zeeeepa" +BRANCH_NAME="analysis/cicd-ratings-$(date +%s)" +MAX_PER_MINUTE=30 +DELAY_BETWEEN_REQUESTS=2 # seconds + +# Colors +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +BLUE='\033[0;34m' +NC='\033[0m' + +log_info() { echo -e "${GREEN}[INFO]${NC} $1"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } +log_error() { echo -e "${RED}[ERROR]${NC} $1"; } +log_progress() { echo -e "${BLUE}[PROGRESS]${NC} $1"; } + +# Check API key +if [ -z "$CODEGEN_API_KEY" ]; then + log_error "CODEGEN_API_KEY environment variable not set!" + echo "Please set it with: export CODEGEN_API_KEY='your-key'" + exit 1 +fi + +log_info "Starting Mass Repository Analysis" +echo "==================================" +log_info "Organization: $ORG_NAME" +log_info "Target Branch: $BRANCH_NAME" +log_info "Rate Limit: $MAX_PER_MINUTE agents/minute" +echo "" + +# Fetch all repository names +log_info "Fetching repository list..." + +# This would call the Codegen API to get all repos +# For now, create a repos list file +REPOS_FILE="repos_list.txt" + +if [ ! -f "$REPOS_FILE" ]; then + log_warn "Repository list not found. Please create $REPOS_FILE with one repo name per line." + echo "Example:" + echo " -Linux-" + echo " 1Panel" + echo " 3x-ui" + exit 1 +fi + +# Read repos into array +mapfile -t REPOS < "$REPOS_FILE" +TOTAL_REPOS=${#REPOS[@]} + +log_info "Found $TOTAL_REPOS repositories to analyze" +log_info "Estimated time: ~$((TOTAL_REPOS / MAX_PER_MINUTE)) minutes" +echo "" + +# Analysis instructions template +read -r -d '' ANALYSIS_TEMPLATE << 'TEMPLATE_END' || true +Analyze repository {{REPO_NAME}} using Repomix for Enterprise CI/CD Compatibility. + +Rate on these criteria (1-10 scale): +1. Build System Maturity +2. CI/CD Integration Readiness +3. Code Quality & Standards +4. Documentation Quality +5. Containerization +6. Testing Infrastructure +7. Security Practices +8. Enterprise Compatibility + +Create file: ratings/{{REPO_NAME}}.json with: +{ + "repo_name": "{{REPO_NAME}}", + "overall_score": , + "ratings": { + "build_system": <1-10>, + "cicd_readiness": <1-10>, + "code_quality": <1-10>, + "documentation": <1-10>, + "containerization": <1-10>, + "testing": <1-10>, + "security": <1-10>, + "enterprise": <1-10> + }, + "summary": "", + "recommendations": ["", ""] +} + +Push to branch: {{BRANCH}} +TEMPLATE_END + +# Results tracking +SUCCESS_COUNT=0 +FAILED_COUNT=0 +RESULTS_FILE="analysis_runs_$(date +%s).json" + +echo "[" > "$RESULTS_FILE" + +# Create agent runs +log_info "Creating agent runs..." +echo "" + +for i in "${!REPOS[@]}"; do + REPO_NAME="${REPOS[$i]}" + INDEX=$((i + 1)) + + log_progress "[$INDEX/$TOTAL_REPOS] Processing: $REPO_NAME" + + # Prepare instructions + INSTRUCTIONS="${ANALYSIS_TEMPLATE//\{\{REPO_NAME\}\}/$REPO_NAME}" + INSTRUCTIONS="${INSTRUCTIONS//\{\{BRANCH\}\}/$BRANCH_NAME}" + + # Create API payload + PAYLOAD=$(jq -n \ + --arg repo "$ORG_NAME/$REPO_NAME" \ + --arg branch "$BRANCH_NAME" \ + --arg msg "Analyze $REPO_NAME for CI/CD compatibility" \ + --arg inst "$INSTRUCTIONS" \ + '{ + repository: $repo, + branch: $branch, + message: $msg, + instructions: $inst, + create_branch: true + }') + + # Make API request + RESPONSE=$(curl -s -w "\n%{http_code}" \ + -X POST "$CODEGEN_API_URL/v1/agent-runs" \ + -H "Authorization: Bearer $CODEGEN_API_KEY" \ + -H "Content-Type: application/json" \ + -d "$PAYLOAD") + + HTTP_CODE=$(echo "$RESPONSE" | tail -n1) + BODY=$(echo "$RESPONSE" | sed '$d') + + if [ "$HTTP_CODE" = "201" ] || [ "$HTTP_CODE" = "200" ]; then + RUN_ID=$(echo "$BODY" | jq -r '.id // .run_id // "unknown"') + log_info " βœ“ Created run: $RUN_ID" + + SUCCESS_COUNT=$((SUCCESS_COUNT + 1)) + + # Save to results + echo "$BODY" | jq -c ". + {repo_name: \"$REPO_NAME\"}" >> "$RESULTS_FILE" + if [ $INDEX -lt $TOTAL_REPOS ]; then + echo "," >> "$RESULTS_FILE" + fi + else + log_error " βœ— Failed: HTTP $HTTP_CODE" + log_error " Response: $BODY" + FAILED_COUNT=$((FAILED_COUNT + 1)) + fi + + # Rate limiting + if [ $INDEX -lt $TOTAL_REPOS ]; then + if [ $((INDEX % MAX_PER_MINUTE)) -eq 0 ]; then + log_warn " ⏸️ Rate limit pause ($INDEX/$TOTAL_REPOS processed)..." + sleep 60 + else + sleep $DELAY_BETWEEN_REQUESTS + fi + fi +done + +echo "]" >> "$RESULTS_FILE" + +# Summary +echo "" +echo "==================================" +log_info "Analysis Complete!" +echo "==================================" +echo "" +echo "πŸ“Š Summary:" +echo " βœ“ Successful: $SUCCESS_COUNT" +echo " βœ— Failed: $FAILED_COUNT" +echo " πŸ“ Results: $RESULTS_FILE" +echo " 🌿 Branch: $BRANCH_NAME" +echo "" +echo "⏳ Agent runs are now processing (~ 30 minutes total)" +echo " Monitor progress in Codegen dashboard" +echo " All ratings will be pushed to branch: $BRANCH_NAME" +echo "" +echo "πŸ“ Next step: Create PR from $BRANCH_NAME when complete!" +echo "" + diff --git a/mass_repo_analysis.py b/mass_repo_analysis.py new file mode 100644 index 000000000..b5f914694 --- /dev/null +++ b/mass_repo_analysis.py @@ -0,0 +1,279 @@ +#!/usr/bin/env python3 +""" +Mass Repository Analysis using Codegen Python SDK +Analyzes all 956 repositories for Enterprise CI/CD compatibility +Creates agent runs at 30/minute with automatic rate limiting +""" +import os +import sys +import time +import json +from datetime import datetime +from typing import List, Dict + +# Import Codegen SDK +try: + from codegen import Codegen +except ImportError: + print("❌ ERROR: Codegen SDK not installed!") + print(" Install with: pip install codegen") + sys.exit(1) + +# Configuration +ORG_NAME = "Zeeeepa" +BRANCH_NAME = f"analysis/cicd-ratings-{int(time.time())}" +RATE_LIMIT = 30 # agents per minute +DELAY = 2 # seconds between requests + +# Analysis instructions template +ANALYSIS_INSTRUCTIONS = """ +**Task: Analyze {repo_name} for Enterprise CI/CD Compatibility** + +Use Repomix to perform a comprehensive codebase analysis, then rate the repository on these 8 criteria (1-10 scale): + +1. **Build System Maturity** (1-10) + - Build configuration presence (package.json, Makefile, CMakeLists.txt, etc.) + - Build script completeness + - Dependency management quality + +2. **CI/CD Integration Readiness** (1-10) + - CI configuration files (.github/workflows, .gitlab-ci.yml, Jenkinsfile, etc.) + - Automated testing in CI + - Deployment automation + +3. **Code Quality & Standards** (1-10) + - Linting configuration (.eslintrc, .pylintrc, etc.) + - Code formatting setup (prettier, black, etc.) + - Static analysis tools + +4. **Documentation Quality** (1-10) + - README completeness + - API documentation + - Setup/deployment instructions + +5. **Containerization** (1-10) + - Dockerfile presence and quality + - Docker Compose configuration + - Multi-stage builds + +6. **Testing Infrastructure** (1-10) + - Unit tests presence + - Integration tests + - Test coverage configuration + +7. **Security Practices** (1-10) + - Security scanning configuration + - Dependency vulnerability checks + - Secrets management + +8. **Enterprise Compatibility** (1-10) + - Clear licensing + - Enterprise features + - Scalability considerations + +**Output Requirements:** + +1. Create directory: `ratings/` if it doesn't exist +2. Create file: `ratings/{repo_name}.json` with this exact structure: + +```json +{{ + "repo_name": "{repo_name}", + "analyzed_at": "", + "overall_score": , + "ratings": {{ + "build_system": <1-10>, + "cicd_readiness": <1-10>, + "code_quality": <1-10>, + "documentation": <1-10>, + "containerization": <1-10>, + "testing": <1-10>, + "security": <1-10>, + "enterprise": <1-10> + }}, + "strengths": ["", "", ""], + "weaknesses": ["", ""], + "summary": "<2-3 sentence summary>", + "recommendations": [ + "", + "", + "" + ], + "notable_files": ["", ""] +}} +``` + +3. Commit the rating file to branch: `{branch_name}` +4. Use descriptive commit message: "Add CI/CD rating for {repo_name}" + +**Important:** +- Use Repomix to analyze the full repository structure +- Be objective in ratings +- Provide specific, actionable recommendations +- Consider the repository type (library, application, tool, etc.) +""" + +def fetch_all_repos_from_api(codegen_client): + """Fetch all repository names from the organization""" + print("πŸ“‹ Fetching repository list from Codegen API...") + + all_repos = [] + page = 1 + + while True: + try: + # This would use the actual Codegen SDK method + # For now, we'll use a placeholder + # repos_page = codegen_client.repos.list(org=ORG_NAME, page=page) + + # Placeholder: load from file + break + except Exception as e: + print(f"❌ Error fetching repos: {e}") + break + + return all_repos + +def create_agent_run(codegen_client, repo_name: str, branch_name: str) -> Dict: + """Create a Codegen agent run for repository analysis""" + + instructions = ANALYSIS_INSTRUCTIONS.format( + repo_name=repo_name, + branch_name=branch_name + ) + + try: + # Create agent run using Codegen SDK + run = codegen_client.runs.create( + repo=f"{ORG_NAME}/{repo_name}", + message=f"Analyze {repo_name} for Enterprise CI/CD compatibility using Repomix", + instructions=instructions, + branch=branch_name, + create_branch=True + ) + + return { + "success": True, + "run_id": run.id, + "repo": repo_name, + "created_at": datetime.utcnow().isoformat() + } + + except Exception as e: + return { + "success": False, + "repo": repo_name, + "error": str(e) + } + +def main(): + print("="*70) + print(" πŸ€– MASS REPOSITORY CI/CD ANALYSIS AUTOMATION") + print("="*70) + print() + + # Initialize Codegen client + api_key = os.getenv("CODEGEN_API_KEY") + if not api_key: + print("❌ ERROR: CODEGEN_API_KEY environment variable not set!") + print(" Set it with: export CODEGEN_API_KEY='your-api-key'") + return 1 + + print("πŸ”‘ Initializing Codegen client...") + try: + codegen = Codegen(api_key=api_key) + except Exception as e: + print(f"❌ Failed to initialize Codegen client: {e}") + return 1 + + # Load repository list + print(f"πŸ“Š Loading repository list for organization: {ORG_NAME}") + + # For this script, load from repos_list.txt + repos_file = "repos_list.txt" + if not os.path.exists(repos_file): + print(f"❌ Repository list file not found: {repos_file}") + print(f" Create it with one repository name per line") + return 1 + + with open(repos_file, 'r') as f: + repos = [line.strip() for line in f if line.strip()] + + total_repos = len(repos) + print(f"βœ… Found {total_repos} repositories to analyze") + print(f"⏱️ Estimated time: ~{(total_repos / RATE_LIMIT):.1f} minutes") + print(f"🌿 Target branch: {BRANCH_NAME}") + print() + + # Confirm before proceeding + response = input(f"πŸš€ Create {total_repos} agent runs? (yes/no): ") + if response.lower() not in ['yes', 'y']: + print("❌ Cancelled by user") + return 0 + + print() + print("="*70) + print(" πŸƒ CREATING AGENT RUNS") + print("="*70) + print() + + # Track results + results = { + "started_at": datetime.utcnow().isoformat(), + "branch": BRANCH_NAME, + "total_repos": total_repos, + "successful": [], + "failed": [] + } + + # Create agent runs with rate limiting + for idx, repo_name in enumerate(repos, 1): + print(f"[{idx}/{total_repos}] Creating run for: {repo_name}") + + run_result = create_agent_run(codegen, repo_name, BRANCH_NAME) + + if run_result["success"]: + print(f" βœ… Created: {run_result['run_id']}") + results["successful"].append(run_result) + else: + print(f" ❌ Failed: {run_result['error']}") + results["failed"].append(run_result) + + # Rate limiting + if idx < total_repos: + if idx % RATE_LIMIT == 0: + print(f" ⏸️ Rate limit pause ({idx}/{total_repos} processed)...") + time.sleep(60) + else: + time.sleep(DELAY) + + # Save results + results["completed_at"] = datetime.utcnow().isoformat() + results_file = f"analysis_results_{int(time.time())}.json" + + with open(results_file, 'w') as f: + json.dump(results, f, indent=2) + + print() + print("="*70) + print(" βœ… AGENT RUNS CREATED SUCCESSFULLY") + print("="*70) + print() + print(f"πŸ“Š Summary:") + print(f" βœ… Successful: {len(results['successful'])}") + print(f" ❌ Failed: {len(results['failed'])}") + print() + print(f"πŸ“ Results saved to: {results_file}") + print(f"🌿 All analyses will push to branch: {BRANCH_NAME}") + print() + print("⏳ Agent runs are now processing (~30 minutes total)") + print(" Monitor progress in Codegen dashboard") + print() + print(f"πŸ“ Next step: Create PR from {BRANCH_NAME} when complete!") + print() + + return 0 + +if __name__ == "__main__": + sys.exit(main()) + diff --git a/repos_list.txt b/repos_list.txt new file mode 100644 index 000000000..52a7989cf --- /dev/null +++ b/repos_list.txt @@ -0,0 +1,5 @@ +# Repository names for analysis +# Total: 956 repositories +# Generated: $(date) + +# To populate: run fetch_repos.py