v1.4: Enhanced GitHub Action with type hints and new features

soodoku · claude · soodoku · commit ab14392f3ccd · 2025-09-04T13:09:03.000-07:00
✨ New Features: - Repository exclusion patterns (exclude_repos input) - Configurable maximum repositories (max_repos input) - Improved action metadata for better marketplace discoverability 🔧 Improvements: - Full Python type hints for better code quality - Pinned dependency versions in requirements.txt - Enhanced README documentation with configuration table - Better environment variable handling 🛠️ Technical: - Updated workflows to use requirements.txt - Fixed security vulnerability in requests dependency - Improved error handling and validation 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/.github/scripts/adjacent.py b/.github/scripts/adjacent.py
@@ -4,6 +4,7 @@
 import base64
 import re
 import time
+from typing import List, Tuple, Optional, Dict, Any
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
 
@@ -15,14 +16,14 @@
 )
 logger = logging.getLogger(__name__)
 
-REPO = os.getenv("GITHUB_REPOSITORY")  # e.g., 'soodoku/bloomjoin'
-TOKEN = os.getenv("GITHUB_TOKEN")
-HEADERS = {
+REPO: Optional[str] = os.getenv("GITHUB_REPOSITORY")  # e.g., 'soodoku/bloomjoin'
+TOKEN: Optional[str] = os.getenv("GITHUB_TOKEN")
+HEADERS: Dict[str, str] = {
     "Accept": "application/vnd.github+json",
     "Authorization": f"Bearer {TOKEN}"
 }
 
-def get_topics(owner, repo):
+def get_topics(owner: str, repo: str) -> List[str]:
     logger.info(f"Fetching topics for {owner}/{repo}")
     url = f"https://api.github.com/repos/{owner}/{repo}/topics"
     r = requests.get(url, headers=HEADERS)
@@ -31,10 +32,10 @@ def get_topics(owner, repo):
     logger.info(f"Found {len(topics)} topics")
     return topics
 
-def get_user_repos(owner):
+def get_user_repos(owner: str) -> List[Dict[str, Any]]:
     logger.info(f"Fetching repositories for {owner}")
     url = f"https://api.github.com/users/{owner}/repos?per_page=100&type=owner"
-    repos = []
+    repos: List[Dict[str, Any]] = []
     while url:
         r = requests.get(url, headers=HEADERS)
         time.sleep(1)  # More cautious rate limit handling
@@ -44,7 +45,7 @@ def get_user_repos(owner):
         else:
             logger.warning(f"Unexpected response when fetching repos: {page_repos}")
             break
-        link_header = r.headers.get('Link', '')
+        link_header: str = r.headers.get('Link', '')
         url = None
         for link in link_header.split(','):
             if 'rel="next"' in link:
@@ -53,7 +54,7 @@ def get_user_repos(owner):
     logger.info(f"Total repositories found: {len(repos)}")
     return repos
 
-def get_readme_content(owner, repo):
+def get_readme_content(owner: str, repo: str) -> str:
     logger.info(f"Fetching README for {owner}/{repo}")
     url = f"https://api.github.com/repos/{owner}/{repo}/readme"
     r = requests.get(url, headers=HEADERS)
@@ -71,7 +72,7 @@ def get_readme_content(owner, repo):
     logger.info("No README content found")
     return ""
 
-def clean_markdown(text):
+def clean_markdown(text: str) -> str:
     text = re.sub(r'```.*?```', '', text, flags=re.DOTALL)
     text = re.sub(r'`.*?`', '', text)
     text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
@@ -84,7 +85,7 @@ def clean_markdown(text):
     text = re.sub(r'\s+', ' ', text).strip()
     return text
 
-def compute_readme_similarity(text1, text2):
+def compute_readme_similarity(text1: str, text2: str) -> float:
     if not text1 or not text2:
         return 0.0
 
@@ -98,69 +99,72 @@ def compute_readme_similarity(text1, text2):
         logger.warning(f"Error computing README similarity: {e}")
         return 0.0
 
-def find_adjacent_by_topics(owner, repo_name, topics):
+def find_adjacent_by_topics(owner: str, repo_name: str, topics: List[str], exclude_repos: Optional[List[str]] = None) -> List[Tuple[str, str, List[str], float]]:
     """Find adjacent repositories based on common topics"""
-    repos = get_user_repos(owner)
-    related = []
+    repos: List[Dict[str, Any]] = get_user_repos(owner)
+    related: List[Tuple[str, str, List[str], float]] = []
+    exclude_list = exclude_repos or []
     for r in repos:
-        if r["name"].lower() == repo_name.lower():
+        if r["name"].lower() == repo_name.lower() or r["name"] in exclude_list:
             continue
-        t = get_topics(r["owner"]["login"], r["name"])
-        common = set(t) & set(topics)
+        t: List[str] = get_topics(r["owner"]["login"], r["name"])
+        common: set = set(t) & set(topics)
         if common:
             related.append((r["full_name"], r.get("description", ""), list(common), len(common)/len(set(t) | set(topics))))
     return sorted(related, key=lambda x: -x[3])
 
-def find_adjacent_by_readme(owner, repo_name, readme_content):
+def find_adjacent_by_readme(owner: str, repo_name: str, readme_content: str, exclude_repos: Optional[List[str]] = None) -> List[Tuple[str, str, List[str], float]]:
     """Find adjacent repositories based on README content similarity"""
-    repos = get_user_repos(owner)
-    related = []
+    repos: List[Dict[str, Any]] = get_user_repos(owner)
+    related: List[Tuple[str, str, List[str], float]] = []
+    exclude_list = exclude_repos or []
     for r in repos:
-        if r["name"].lower() == repo_name.lower():
+        if r["name"].lower() == repo_name.lower() or r["name"] in exclude_list:
             continue
-        other_readme = get_readme_content(r["owner"]["login"], r["name"])
-        similarity = compute_readme_similarity(readme_content, other_readme)
+        other_readme: str = get_readme_content(r["owner"]["login"], r["name"])
+        similarity: float = compute_readme_similarity(readme_content, other_readme)
         if similarity > 0.1:  # Threshold for considering repositories as related
             related.append((r["full_name"], r.get("description", ""), [], similarity))
     return sorted(related, key=lambda x: -x[3])
 
-def find_adjacent_combined(owner, repo_name, topics, readme_content, weight_topics=0.5):
+def find_adjacent_combined(owner: str, repo_name: str, topics: List[str], readme_content: str, weight_topics: float = 0.5, exclude_repos: Optional[List[str]] = None) -> List[Tuple[str, str, List[str], float]]:
     """Find adjacent repositories using a weighted combination of topics and README similarity"""
-    repos = get_user_repos(owner)
-    related = []
+    repos: List[Dict[str, Any]] = get_user_repos(owner)
+    related: List[Tuple[str, str, List[str], float]] = []
+    exclude_list = exclude_repos or []
     
     # Check if we have topics and README content
-    has_topics = len(topics) > 0
-    has_readme = len(readme_content) > 0
+    has_topics: bool = len(topics) > 0
+    has_readme: bool = len(readme_content) > 0
     
     # Adjust weights if one source is missing
-    effective_weight_topics = weight_topics
+    effective_weight_topics: float = weight_topics
     if not has_topics:
         effective_weight_topics = 0
     if not has_readme:
         effective_weight_topics = 1
     
     # Collect similarity scores for normalization if needed
-    all_topic_sims = []
-    all_readme_sims = []
-    repo_data = []
+    all_topic_sims: List[float] = []
+    all_readme_sims: List[float] = []
+    repo_data: List[Tuple[str, str, List[str], float, float]] = []
     
     # First pass to collect all scores
     for r in repos:
-        if r["name"].lower() == repo_name.lower():
+        if r["name"].lower() == repo_name.lower() or r["name"] in exclude_list:
             continue
         
         # Get topic similarity
-        t = get_topics(r["owner"]["login"], r["name"])
-        common = set(t) & set(topics)
-        topic_sim = 0
+        t: List[str] = get_topics(r["owner"]["login"], r["name"])
+        common: set = set(t) & set(topics)
+        topic_sim: float = 0
         if has_topics and t:
             topic_sim = len(common)/max(1, len(set(t) | set(topics)))
         all_topic_sims.append(topic_sim)
         
         # Get README similarity
-        other_readme = ""
-        readme_sim = 0
+        other_readme: str = ""
+        readme_sim: float = 0
         if has_readme:
             other_readme = get_readme_content(r["owner"]["login"], r["name"])
             readme_sim = compute_readme_similarity(readme_content, other_readme)
@@ -169,17 +173,17 @@ def find_adjacent_combined(owner, repo_name, topics, readme_content, weight_topi
         repo_data.append((r["full_name"], r.get("description", ""), list(common), topic_sim, readme_sim))
     
     # Normalize scores if we have data
-    topic_max = max(all_topic_sims) if all_topic_sims else 1
-    readme_max = max(all_readme_sims) if all_readme_sims else 1
+    topic_max: float = max(all_topic_sims) if all_topic_sims else 1
+    readme_max: float = max(all_readme_sims) if all_readme_sims else 1
     
     # Second pass to calculate combined scores
     for full_name, desc, common, topic_sim, readme_sim in repo_data:
         # Normalize if we have non-zero maximums
-        norm_topic_sim = topic_sim / topic_max if topic_max > 0 else 0
-        norm_readme_sim = readme_sim / readme_max if readme_max > 0 else 0
+        norm_topic_sim: float = topic_sim / topic_max if topic_max > 0 else 0
+        norm_readme_sim: float = readme_sim / readme_max if readme_max > 0 else 0
         
         # Combined score
-        combined_score = (
+        combined_score: float = (
             effective_weight_topics * norm_topic_sim + 
             (1 - effective_weight_topics) * norm_readme_sim
         )
@@ -189,7 +193,7 @@ def find_adjacent_combined(owner, repo_name, topics, readme_content, weight_topi
     
     return sorted(related, key=lambda x: -x[3])
 
-def update_readme(related):
+def update_readme(related: List[Tuple[str, str, List[str], float]], max_repos: int = 5) -> None:
     logger.info("Updating README with adjacent repositories")
     
     try:
@@ -201,7 +205,7 @@ def update_readme(related):
     header = "## 🔗 Adjacent Repositories"
     block = [f"{header}\n\n"]
 
-    for full_name, desc, tags, score in related[:5]:
+    for full_name, desc, tags, score in related[:max_repos]:
         url = f"https://github.com/{full_name}"
         clean_desc = desc.strip() if desc else ""
         desc_str = f" — {clean_desc}" if clean_desc else ""
@@ -240,11 +244,18 @@ def update_readme(related):
     logger.info("README update complete")
 
 if __name__ == "__main__":
+    if not REPO:
+        raise ValueError("GITHUB_REPOSITORY environment variable not set")
     owner, repo = REPO.split("/")
-    topics = get_topics(owner, repo)
-    method = os.getenv("SIMILARITY_METHOD", "topics").lower()
+    topics: List[str] = get_topics(owner, repo)
+    method: str = os.getenv("SIMILARITY_METHOD", "topics").lower()
+    exclude_repos_str: str = os.getenv("EXCLUDE_REPOS", "")
+    exclude_repos: List[str] = [r.strip() for r in exclude_repos_str.split(",") if r.strip()]
+    max_repos: int = int(os.getenv("MAX_REPOS", "5"))
     
     print(f"Finding adjacent repositories using method: {method}")
+    if exclude_repos:
+        print(f"Excluding repositories: {', '.join(exclude_repos)}")
     
     # Check if we have topics
     has_topics = len(topics) > 0
@@ -258,47 +269,48 @@ def update_readme(related):
         print("Warning: No README content found or failed to parse")
     
     # Determine which method to use, with fallbacks if necessary
+    related: List[Tuple[str, str, List[str], float]] = []
     if method == "topics":
         if has_topics:
-            related = find_adjacent_by_topics(owner, repo, topics)
+            related = find_adjacent_by_topics(owner, repo, topics, exclude_repos)
         else:
             print("Falling back to README similarity since no topics are available")
             if has_readme:
-                related = find_adjacent_by_readme(owner, repo, readme_content)
+                related = find_adjacent_by_readme(owner, repo, readme_content, exclude_repos)
             else:
                 print("No viable similarity method available. Both topics and README are missing.")
                 related = []
     elif method == "readme":
         if has_readme:
-            related = find_adjacent_by_readme(owner, repo, readme_content)
+            related = find_adjacent_by_readme(owner, repo, readme_content, exclude_repos)
         else:
             print("Falling back to topic similarity since README is not available")
             if has_topics:
-                related = find_adjacent_by_topics(owner, repo, topics)
+                related = find_adjacent_by_topics(owner, repo, topics, exclude_repos)
             else:
                 print("No viable similarity method available. Both topics and README are missing.")
                 related = []
     elif method == "combined":
-        weight = float(os.getenv("TOPIC_WEIGHT", "0.5"))
-        related = find_adjacent_combined(owner, repo, topics, readme_content, weight)
+        weight: float = float(os.getenv("TOPIC_WEIGHT", "0.5"))
+        related = find_adjacent_combined(owner, repo, topics, readme_content, weight, exclude_repos)
     else:
         print(f"Unrecognized method '{method}', using best available method")
         if has_topics and has_readme:
             print("Using combined similarity")
             weight = float(os.getenv("TOPIC_WEIGHT", "0.5"))
-            related = find_adjacent_combined(owner, repo, topics, readme_content, weight)
+            related = find_adjacent_combined(owner, repo, topics, readme_content, weight, exclude_repos)
         elif has_topics:
             print("Using topic similarity")
-            related = find_adjacent_by_topics(owner, repo, topics)
+            related = find_adjacent_by_topics(owner, repo, topics, exclude_repos)
         elif has_readme:
             print("Using README similarity")
-            related = find_adjacent_by_readme(owner, repo, readme_content)
+            related = find_adjacent_by_readme(owner, repo, readme_content, exclude_repos)
         else:
             print("No viable similarity method available. Both topics and README are missing.")
             related = []
     
     if related:
-        update_readme(related)
-        print("README updated with adjacent repositories.")
+        update_readme(related, max_repos)
+        print(f"README updated with {min(len(related), max_repos)} adjacent repositories.")
     else:
         print("No adjacent repos found.")
diff --git a/.github/workflows/adjacent.yml b/.github/workflows/adjacent.yml
@@ -15,7 +15,7 @@ jobs:
           python-version: '3.x'
       - name: Install dependencies
         run: |
-          pip install requests scikit-learn numpy
+          pip install -r requirements.txt
       - name: Run adjacent recommender
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.gitignore b/.gitignore
@@ -173,3 +173,4 @@ cython_debug/
 # PyPI configuration file
 .pypirc
 .github/.DS_Store
+.DS_Store
diff --git a/README.md b/README.md
@@ -5,18 +5,20 @@
 ![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)
 [![Used By](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/gojiplus/adjacent/main/docs/adjacent.json)](https://github.com/search?q=gojiplus/adjacent+path%3A.github%2Fworkflows+language%3AYAML&type=code)
 
-**Adjacent** is a GitHub Action that discovers and inserts a list of **related repositories** into your README based on shared GitHub topics.
+**Adjacent** is a GitHub Action that discovers and inserts a list of **related repositories** into your README based on shared GitHub topics and README content similarity.
 
-Perfect for discovery, organization, and letting your users explore similar tools you’ve built.
+Perfect for discovery, organization, and letting your users explore similar tools you've built.
 
 ---
 
 ## 🚀 Features
 
-- 🔎 Finds related repositories by topic similarity
-- 🧠 Ranks and inserts up to 5 adjacent repos into your `README.md`
-- 🔄 Runs on a schedule or manual trigger
-- 💬 Ideal for portfolios, developer tools, and curated ecosystems
+- 🔎 **Multiple similarity methods**: GitHub topics, README content, or combined approach
+- 🧠 **Smart ranking**: Configurable weighting between topics and content similarity
+- 🚫 **Repository exclusions**: Skip specific repositories you don't want to include
+- 📊 **Customizable output**: Set maximum number of repositories to display
+- 🔄 **Automated updates**: Runs on schedule or manual trigger
+- 💬 **Perfect for**: Portfolios, developer tools, and curated ecosystems
 
 ---
 
@@ -44,9 +46,13 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Adjacent Repositories Recommender
-        uses: gojiplus/adjacent@v1.3
+        uses: gojiplus/adjacent@v1.4
         with:
-          token: ${{ secrets.GITHUB_TOKEN }}  # ✅ Pass the required token
+          token: ${{ secrets.GITHUB_TOKEN }}  # ✅ Required: GitHub token
+          similarity_method: 'combined'        # Optional: topics, readme, or combined
+          topic_weight: '0.6'                  # Optional: weight for topics (0-1)
+          exclude_repos: 'template,archived'   # Optional: comma-separated exclusions
+          max_repos: '5'                       # Optional: max repositories to show
 
       - name: Commit and push changes
         run: |
@@ -58,6 +64,17 @@ jobs:
 
 ```
 
+## ⚙️ Configuration Options
+
+| Input | Description | Default | Example |
+|-------|-------------|---------|----------|
+| `token` | GitHub token for API access | **Required** | `${{ secrets.GITHUB_TOKEN }}` |
+| `repo` | Target repository | Current repo | `owner/repository` |
+| `similarity_method` | Method: `topics`, `readme`, or `combined` | `combined` | `topics` |
+| `topic_weight` | Weight for topics in combined method (0-1) | `0.6` | `0.8` |
+| `exclude_repos` | Comma-separated repository names to exclude | _(none)_ | `template,archived,old-project` |
+| `max_repos` | Maximum repositories to display | `5` | `3` |
+
 ## 🔗 Adjacent Repositories
 
 - [gojiplus/reporoulette](https://github.com/gojiplus/reporoulette) — Sample Random GitHub Repositories
diff --git a/action.yml b/action.yml
diff --git a/requirements.txt b/requirements.txt