Added optional tags field and filtering support

thomas-bartlett · thomas-bartlett · commit c46399bb444f · 2025-11-17T13:04:06.000-05:00
diff --git a/sources/core/codeguard-0-api-web-services.md b/sources/core/codeguard-0-api-web-services.md
@@ -12,6 +12,10 @@ languages:
 - typescript
 - xml
 - yaml
+tags:
+- api
+- web-security
+- microservices
 alwaysApply: false
 ---
 
diff --git a/sources/core/codeguard-0-authentication-mfa.md b/sources/core/codeguard-0-authentication-mfa.md
@@ -13,6 +13,9 @@ languages:
 - ruby
 - swift
 - typescript
+tags:
+- authentication
+- web-security
 alwaysApply: false
 ---
 
diff --git a/src/convert_to_ide_formats.py b/src/convert_to_ide_formats.py
@@ -36,6 +36,23 @@ def sync_plugin_metadata(version: str) -> None:
     print(f"✅ Synced plugin metadata to {version}")
 
 
+def matches_tag_filter(rule_tags: list[str], filter_tags: list[str]) -> bool:
+    """
+    Check if rule has all required tags (case-insensitive AND logic).
+    
+    Args:
+        rule_tags: List of tags from the rule (already lowercase from parsing)
+        filter_tags: List of tags to filter by
+    
+    Returns:
+        True if rule has all filter tags (or no filter), False otherwise
+    """
+    if not filter_tags:
+        return True  # No filter means all pass
+    
+    return all(tag.lower() in rule_tags for tag in filter_tags)
+
+
 def update_skill_md(language_to_rules: dict[str, list[str]], skill_path: str) -> None:
     """
     Update SKILL.md with language-to-rules mapping table.
@@ -81,7 +98,7 @@ def update_skill_md(language_to_rules: dict[str, list[str]], skill_path: str) ->
     print(f"Updated SKILL.md with language mappings")
 
 
-def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: bool = True, version: str = None) -> dict[str, list[str]]:
+def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode: bool = True, version: str = None, filter_tags: list[str] = None) -> dict[str, list[str]]:
     """
     Convert rule file(s) to all supported IDE formats using RuleConverter.
 
@@ -90,6 +107,7 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode:
         output_dir: Output directory (default: 'dist/')
         include_claudecode: Whether to generate Claude Code plugin (default: True, only for core rules)
         version: Version string to use (default: read from pyproject.toml)
+        filter_tags: Optional list of tags to filter by (AND logic, case-insensitive)
 
     Returns:
         Dictionary with 'success' and 'errors' lists:
@@ -138,14 +156,19 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode:
     # Setup output directory
     output_base = Path(output_dir)
 
-    results = {"success": [], "errors": []}
+    results = {"success": [], "errors": [], "skipped": []}
     language_to_rules = defaultdict(list)
 
     # Process each file
     for md_file in md_files:
         try:
             # Convert the file (raises exceptions on error)
             result = converter.convert(md_file)
+            
+            # Apply tag filter if specified
+            if filter_tags and not matches_tag_filter(result.tags, filter_tags):
+                results["skipped"].append(result.filename)
+                continue
 
             # Write each format
             output_files = []
@@ -192,9 +215,14 @@ def convert_rules(input_path: str, output_dir: str = "dist", include_claudecode:
             results["errors"].append(error_msg)
 
     # Summary
-    print(
-        f"\nResults: {len(results['success'])} success, {len(results['errors'])} errors"
-    )
+    if filter_tags:
+        print(
+            f"\nResults: {len(results['success'])} success, {len(results['skipped'])} skipped (tag filter), {len(results['errors'])} errors"
+        )
+    else:
+        print(
+            f"\nResults: {len(results['success'])} success, {len(results['errors'])} errors"
+        )
 
     # Generate SKILL.md with language mappings (only if Claude Code is included)
     if include_claudecode and language_to_rules:
@@ -256,6 +284,13 @@ def _resolve_source_paths(args) -> list[Path]:
         default="dist",
         help="Output directory for generated bundles (default: dist).",
     )
+    parser.add_argument(
+        "--tag",
+        "--tags",
+        dest="tags",
+        action="append",
+        help="Filter rules by tags (case-insensitive, AND logic). Can be specified multiple times.",
+    )
     
     cli_args = parser.parse_args()
     source_paths = _resolve_source_paths(cli_args)
@@ -316,7 +351,13 @@ def _resolve_source_paths(args) -> list[Path]:
         print()
     
     # Convert all sources
-    aggregated = {"success": [], "errors": []}
+    aggregated = {"success": [], "errors": [], "skipped": []}
+    filter_tags = cli_args.tags if cli_args.tags else None
+    
+    # Print tag filter info if active
+    if filter_tags:
+        print(f"Tag filter active: {', '.join(filter_tags)} (AND logic - rules must have all tags)\n")
+    
     for source_path in source_paths:
         is_core = source_path == Path("sources/core")
         
@@ -325,11 +366,14 @@ def _resolve_source_paths(args) -> list[Path]:
             str(source_path), 
             cli_args.output_dir, 
             include_claudecode=is_core,
-            version=version
+            version=version,
+            filter_tags=filter_tags
         )
         
         aggregated["success"].extend(results["success"])
         aggregated["errors"].extend(results["errors"])
+        if "skipped" in results:
+            aggregated["skipped"].extend(results["skipped"])
         print("")
     
     if aggregated["errors"]:
diff --git a/src/converter.py b/src/converter.py
@@ -45,6 +45,7 @@ class ConversionResult:
         basename: Filename without extension (e.g., 'my-rule')
         outputs: Dictionary mapping format names to their outputs
         languages: List of programming languages the rule applies to, empty list if always applies
+        tags: List of tags for categorizing and filtering rules
     Example:
         result = ConversionResult(
             filename="my-rule.md",
@@ -56,14 +57,16 @@ class ConversionResult:
                     subpath=".cursor/rules"
                 )
             },
-            languages=["python", "javascript"]
+            languages=["python", "javascript"],
+            tags=["authentication", "web-security"]
         )
     """
 
     filename: str
     basename: str
     outputs: dict[str, FormatOutput]
     languages: list[str]
+    tags: list[str]
 
 
 class RuleConverter:
@@ -159,6 +162,28 @@ def parse_rule(self, content: str, filename: str) -> ProcessedRule:
                     f"'languages' must be a non-empty list in {filename} when alwaysApply is false"
                 )
 
+        # Parse and validate tags (optional field)
+        tags = []
+        if "tags" in frontmatter:
+            raw_tags = frontmatter["tags"]
+            if not isinstance(raw_tags, list):
+                raise ValueError(f"'tags' must be a list in {filename}")
+            
+            for tag in raw_tags:
+                if not isinstance(tag, str):
+                    raise ValueError(f"All tags must be strings in {filename}")
+                
+                # Check for whitespace characters
+                if any(c.isspace() for c in tag):
+                    raise ValueError(
+                        f"Tags cannot contain spaces or whitespace characters in {filename}: '{tag}'"
+                    )
+                
+                if not tag:
+                    raise ValueError(f"Empty tag found in {filename}")
+                
+                tags.append(tag.lower())
+
         # Adding rule_id to the beginning of the content
         rule_id = Path(filename).stem
         markdown_content = f"rule_id: {rule_id}\n\n{markdown_content}"
@@ -169,6 +194,7 @@ def parse_rule(self, content: str, filename: str) -> ProcessedRule:
             always_apply=always_apply,
             content=markdown_content,
             filename=filename,
+            tags=tags,
         )
 
     def generate_globs(self, languages: list[str]) -> str:
@@ -242,4 +268,5 @@ def convert(self, filepath: str) -> ConversionResult:
             basename=basename,
             outputs=outputs,
             languages=rule.languages,
+            tags=rule.tags,
         )
diff --git a/src/formats/base.py b/src/formats/base.py
@@ -25,13 +25,15 @@ class ProcessedRule:
         always_apply: Whether this rule should apply to all files
         content: The actual rule content in markdown format
         filename: Original filename of the rule
+        tags: List of tags for categorizing and filtering rules
     """
 
     description: str
     languages: list[str]
     always_apply: bool
     content: str
     filename: str
+    tags: list[str]
 
 
 class BaseFormat(ABC):
diff --git a/src/validate_unified_rules.py b/src/validate_unified_rules.py
@@ -54,6 +54,21 @@ def validate_rule(file_path: Path) -> dict[str, list[str]]:
             if unknown:
                 warnings.append(f"Unknown languages: {', '.join(unknown)}")
 
+        # Validate tags if present
+        if "tags" in frontmatter:
+            tags = frontmatter["tags"]
+            if not isinstance(tags, list):
+                errors.append("'tags' must be a list")
+            elif tags:  # Only validate if not empty
+                for tag in tags:
+                    if not isinstance(tag, str):
+                        errors.append(f"All tags must be strings, found: {type(tag).__name__}")
+                        break
+                    elif any(c.isspace() for c in tag):
+                        errors.append(f"Tags cannot contain whitespace: '{tag}'")
+                    elif not tag:
+                        errors.append("Empty tag found")
+
         # Check content exists
         if not markdown_content.strip():
             errors.append("Rule content cannot be empty")