@@ -396,77 +396,118 @@ def get_issue_content(self, number, issue=None) -> str:
396396
397397 def extract_related_issues (self , pr_data : Dict [str , Any ]) -> List [int ]:
398398 """
399- Extracts related issue numbers from all PR data following GitHub's reference syntax.
400-
401- This function implements GitHub's official autolink reference formats to find:
402- 1. Full GitHub issue/PR URLs
403- 2. Numeric references (#123)
404- 3. Keywords + issue references (fixes #123)
405- 4. Repository cross-references (owner/repo#123)
406-
407- See: https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/autolinked-references-and-urls
408-
399+ Extracts related issue numbers from PR data using adaptive strategies based on content length.
400+
401+ Uses different matching strategies:
402+ - Short descriptions (<200 chars): Aggressive patterns for simple references
403+ - Long descriptions (>=200 chars): Strict patterns to avoid false positives
404+
409405 Args:
410- pr_data: Dict[str, Any] - The complete pull request data dictionary
406+ pr_data: Complete pull request data dictionary
411407
412408 Returns:
413- List[int] - A sorted list of unique issue numbers found in the PR data
409+ List[int] - Sorted list of unique issue numbers
414410 """
415411 # GitHub's official closing keywords
416412 closing_keywords = (
417- 'close' , 'closes' , 'closed' ,
418- 'fix' , 'fixes' , 'fixed' ,
419- 'resolve' , 'resolves' , 'resolved'
413+ 'close' , 'closes' , 'closed' ,
414+ 'fix' , 'fixes' , 'fixed' ,
415+ 'resolve' , 'resolves' , 'resolved' ,
416+ 'address' , 'addresses' , 'addressing' ,
417+ 'relate' , 'relates' , 'related' ,
418+ 'see' ,
419+ 'issue' , 'bug' , 'ticket' , 'todo' , 'task'
420420 )
421421
422- # Regex patterns for GitHub issue references
423- patterns = [
424- # Full GitHub issue/PR URL pattern
425- rf'(?:https?://)?github\.com/{ re .escape (self .full_name )} /(?:issues|pull)/(\d+)' ,
426-
427- # # Standard #123 reference with proper boundaries
428- # r'(?:^|[^\w/])#(\d+)(?=[^\w/]|$)',
429-
430- # Closing keywords (fixes #123)
431- fr'(?:^|[^\w/])(?:{ "|" .join (closing_keywords )} ):?\s+#(\d+)(?=[^\w/]|$)' ,
432-
433- # Cross-repo reference (owner/repo#123)
434- rf'{ re .escape (self .full_name )} #(\d+)' ,
422+ issues = set ()
423+
424+ def get_description_length (data : Dict [str , Any ]) -> int :
425+ """Get the length of PR description for strategy selection"""
426+ try :
427+ description = data .get ('pr_metadata' , {}).get ('description' , '' )
428+ return len (description ) if isinstance (description , str ) else 0
429+ except :
430+ return 0
431+
432+ def extract_with_aggressive_patterns (text : str ) -> None :
433+ """Aggressive patterns for short, focused descriptions"""
434+ if not isinstance (text , str ):
435+ return
436+
437+ patterns = [
438+ # Simple #123 reference (most common in short descriptions)
439+ r'#(\d+)(?!\d)' ,
440+
441+ # Full GitHub URLs
442+ rf'(?:https?://)?github\.com/{ re .escape (self .full_name )} /(?:issues|pull)/(\d+)' ,
443+
444+ # Closing keywords with flexible spacing
445+ fr'(?:{ "|" .join (closing_keywords )} )\s*:?\s*#?(\d+)(?!\d)' ,
446+
447+ # Action words commonly used in short descriptions
448+ r'(?:addresses?|references?|relates?\s+to|see)\s+#?(\d+)(?!\d)' ,
449+ ]
435450
436- # Issue keyword reference (issue #123 or issue: #123)
437- r'(?:^|[^\w/])(?:issue|bug|ticket|todo|task)s?:?\s+#?(\d+)(?=[^\w/]|$)'
438- ]
451+ for pattern in patterns :
452+ matches = re .findall (pattern , text , re .IGNORECASE )
453+ valid_matches = [
454+ int (match ) for match in matches
455+ if match .isdigit () and len (match ) <= 6 and int (match ) > 0
456+ ]
457+ issues .update (valid_matches )
439458
440- issues = set ()
441-
442- def extract_from_text (text : str ) -> None :
443- """Helper function to extract issue numbers from text"""
459+ def extract_with_strict_patterns (text : str ) -> None :
460+ """Strict patterns for long descriptions to avoid false positives"""
444461 if not isinstance (text , str ):
445462 return
446463
464+ patterns = [
465+ # Full GitHub URLs (always reliable)
466+ rf'(?:https?://)?github\.com/{ re .escape (self .full_name )} /(?:issues|pull)/(\d+)' ,
467+
468+ # Closing keywords with word boundaries
469+ fr'\b(?:{ "|" .join (closing_keywords )} )\s*:?\s*#(\d+)\b' ,
470+
471+ # Explicit issue references with word boundaries
472+ r'\b(?:issue|bug|ticket|pr|pull\s+request)\s*:?\s*#?(\d+)\b' ,
473+
474+ # Cross-repo references
475+ rf'\b{ re .escape (self .full_name )} #(\d+)\b' ,
476+ ]
477+
447478 for pattern in patterns :
448- matches = re .findall (pattern , text , re .IGNORECASE | re .MULTILINE )
449- # Validate issue numbers (reasonable length and positive values)
479+ matches = re .findall (pattern , text , re .IGNORECASE )
450480 valid_matches = [
451481 int (match ) for match in matches
452- if match .isdigit () and len (match ) <= 7 and int (match ) > 0
482+ if match .isdigit () and len (match ) <= 6 and int (match ) > 0
453483 ]
454484 issues .update (valid_matches )
455485
456- def process_value (value : Any ) -> None :
457- """Recursively process dictionary values and extract issue numbers"""
486+ def extract_from_text (text : str , use_aggressive : bool = False ) -> None :
487+ """Extract issue numbers using appropriate strategy"""
488+ if use_aggressive :
489+ extract_with_aggressive_patterns (text )
490+ else :
491+ extract_with_strict_patterns (text )
492+
493+ def process_value (value : Any , use_aggressive : bool = False ) -> None :
494+ """Recursively process values and extract issue numbers"""
458495 if isinstance (value , dict ):
459496 for v in value .values ():
460- process_value (v )
497+ process_value (v , use_aggressive )
461498 elif isinstance (value , (list , tuple )):
462499 for item in value :
463- process_value (item )
500+ process_value (item , use_aggressive )
464501 elif isinstance (value , str ):
465- extract_from_text (value )
502+ extract_from_text (value , use_aggressive )
503+
504+ # Determine strategy based on description length
505+ desc_length = get_description_length (pr_data )
506+ use_aggressive_strategy = desc_length < 200
507+
508+ # Process all PR data
509+ process_value (pr_data , use_aggressive_strategy )
466510
467- # Process all data in pr_data recursively
468- process_value (pr_data )
469-
470511 return sorted (list (issues ))
471512
472513
0 commit comments