@@ -48,7 +48,6 @@ def all_events(cls) -> list[EventAction]:
4848
4949 @classmethod
5050 def from_event (cls , event : sansio .Event ) -> MentionScope | None :
51- """Determine the scope of a GitHub event based on its type and context."""
5251 if event .event == "issue_comment" :
5352 issue = event .data .get ("issue" , {})
5453 is_pull_request = (
@@ -65,128 +64,134 @@ def from_event(cls, event: sansio.Event) -> MentionScope | None:
6564
6665
6766@dataclass
68- class Mention :
67+ class RawMention :
68+ match : re .Match [str ]
6969 username : str
70- text : str
7170 position : int
72- line_number : int
73- line_text : str
74- match : re .Match [str ] | None = None
75- previous_mention : Mention | None = None
76- next_mention : Mention | None = None
71+ end : int
7772
7873
79- def check_pattern_match (
80- text : str , pattern : str | re .Pattern [str ] | None
81- ) -> re .Match [str ] | None :
82- """Check if text matches the given pattern (string or regex).
74+ CODE_BLOCK_PATTERN = re .compile (r"```[\s\S]*?```" , re .MULTILINE )
75+ INLINE_CODE_PATTERN = re .compile (r"`[^`]+`" )
76+ BLOCKQUOTE_PATTERN = re .compile (r"^\s*>.*$" , re .MULTILINE )
8377
84- Returns Match object if pattern matches, None otherwise.
85- If pattern is None, returns a dummy match object.
86- """
87- if pattern is None :
88- return re .match (r"(.*)" , text , re .IGNORECASE | re .DOTALL )
8978
90- # Check if it's a compiled regex pattern
91- if isinstance (pattern , re .Pattern ):
92- # Use the pattern directly, preserving its flags
93- return pattern .match (text )
79+ # GitHub username rules:
80+ # - 1-39 characters long
81+ # - Can only contain alphanumeric characters or hyphens
82+ # - Cannot start or end with a hyphen
83+ # - Cannot have multiple consecutive hyphens
84+ GITHUB_MENTION_PATTERN = re .compile (
85+ r"(?:^|(?<=\s))@([a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38})" ,
86+ re .MULTILINE | re .IGNORECASE ,
87+ )
9488
95- # For strings, do exact match (case-insensitive)
96- # Escape the string to treat it literally
97- escaped_pattern = re .escape (pattern )
98- return re .match (escaped_pattern , text , re .IGNORECASE )
9989
90+ def extract_all_mentions (text : str ) -> list [RawMention ]:
91+ # replace all code blocks, inline code, and blockquotes with spaces
92+ # this preserves linenos and postitions while not being able to
93+ # match against anything in them
94+ processed_text = CODE_BLOCK_PATTERN .sub (lambda m : " " * len (m .group (0 )), text )
95+ processed_text = INLINE_CODE_PATTERN .sub (
96+ lambda m : " " * len (m .group (0 )), processed_text
97+ )
98+ processed_text = BLOCKQUOTE_PATTERN .sub (
99+ lambda m : " " * len (m .group (0 )), processed_text
100+ )
101+ return [
102+ RawMention (
103+ match = match ,
104+ username = match .group (1 ),
105+ position = match .start (),
106+ end = match .end (),
107+ )
108+ for match in GITHUB_MENTION_PATTERN .finditer (processed_text )
109+ ]
100110
101- CODE_BLOCK_PATTERN = re .compile (r"```[\s\S]*?```" , re .MULTILINE )
102- INLINE_CODE_PATTERN = re .compile (r"`[^`]+`" )
103- QUOTE_PATTERN = re .compile (r"^\s*>.*$" , re .MULTILINE )
104111
112+ class LineInfo (NamedTuple ):
113+ lineno : int
114+ text : str
105115
106- def parse_mentions_for_username (
107- event : sansio .Event , username_pattern : str | re .Pattern [str ] | None = None
108- ) -> list [Mention ]:
109- comment = event .data .get ("comment" , {})
110- if comment is None :
111- comment = {}
112- body = comment .get ("body" , "" )
116+ @classmethod
117+ def for_mention_in_comment (cls , comment : str , mention_position : int ):
118+ lines = comment .splitlines ()
119+ text_before = comment [:mention_position ]
120+ line_number = text_before .count ("\n " ) + 1
113121
114- if not body :
115- return []
122+ line_index = line_number - 1
123+ line_text = lines [ line_index ] if line_index < len ( lines ) else ""
116124
117- # If no pattern specified, use bot username (TODO: get from settings)
118- if username_pattern is None :
119- username_pattern = "bot" # Placeholder
125+ return cls (lineno = line_number , text = line_text )
120126
121- # Handle regex patterns vs literal strings
122- if isinstance (username_pattern , re .Pattern ):
123- # Use the pattern string directly, preserving any flags
124- username_regex = username_pattern .pattern
125- # Extract flags from the compiled pattern
126- flags = username_pattern .flags | re .MULTILINE | re .IGNORECASE
127- else :
128- # For strings, escape them to be treated literally
129- username_regex = re .escape (username_pattern )
130- flags = re .MULTILINE | re .IGNORECASE
131127
132- original_body = body
133- original_lines = original_body .splitlines ()
128+ def extract_mention_text (
129+ body : str , current_index : int , all_mentions : list [RawMention ], mention_end : int
130+ ) -> str :
131+ text_start = mention_end
134132
135- processed_text = CODE_BLOCK_PATTERN . sub ( lambda m : " " * len ( m . group ( 0 )), body )
136- processed_text = INLINE_CODE_PATTERN . sub (
137- lambda m : " " * len (m . group ( 0 )), processed_text
138- )
139- processed_text = QUOTE_PATTERN . sub ( lambda m : " " * len ( m . group ( 0 )), processed_text )
133+ # Find next @mention (any mention, not just matched ones) to know where this text ends
134+ next_mention_index = None
135+ for j in range ( current_index + 1 , len (all_mentions )):
136+ next_mention_index = j
137+ break
140138
141- # Use \S+ to match non-whitespace characters for username
142- # Special handling for patterns that could match too broadly
143- if ".*" in username_regex :
144- # Replace .* with a more specific pattern that won't match spaces or @
145- username_regex = username_regex .replace (".*" , r"[^@\s]*" )
139+ if next_mention_index is not None :
140+ text_end = all_mentions [next_mention_index ].position
141+ else :
142+ text_end = len (body )
146143
147- mention_pattern = re .compile (
148- rf"(?:^|(?<=\s))@({ username_regex } )(?:\s|$|(?=[^\w\-]))" ,
149- flags ,
150- )
144+ return body [text_start :text_end ].strip ()
151145
152- mentions : list [Mention ] = []
153146
154- for match in mention_pattern .finditer (processed_text ):
155- position = match .start () # Position of @
156- username = match .group (1 ) # Captured username
147+ @dataclass
148+ class ParsedMention :
149+ username : str
150+ text : str
151+ position : int
152+ line_info : LineInfo
153+ match : re .Match [str ] | None = None
154+ previous_mention : ParsedMention | None = None
155+ next_mention : ParsedMention | None = None
157156
158- text_before = original_body [:position ]
159- line_number = text_before .count ("\n " ) + 1
160157
161- line_index = line_number - 1
162- line_text = (
163- original_lines [line_index ] if line_index < len (original_lines ) else ""
164- )
158+ def extract_mentions_from_event (
159+ event : sansio .Event , username_pattern : str | re .Pattern [str ] | None = None
160+ ) -> list [ParsedMention ]:
161+ comment_data = event .data .get ("comment" , {})
162+ if comment_data is None :
163+ comment_data = {}
164+ comment = comment_data .get ("body" , "" )
165165
166- text_start = match .end ()
166+ if not comment :
167+ return []
167168
168- # Find next @mention to know where this text ends
169- next_match = mention_pattern .search (processed_text , match .end ())
170- if next_match :
171- text_end = next_match .start ()
172- else :
173- text_end = len (original_body )
174-
175- text = original_body [text_start :text_end ].strip ()
176-
177- mention = Mention (
178- username = username ,
179- text = text ,
180- position = position ,
181- line_number = line_number ,
182- line_text = line_text ,
183- match = None ,
184- previous_mention = None ,
185- next_mention = None ,
186- )
169+ # If no pattern specified, use bot username (TODO: get from settings)
170+ if username_pattern is None :
171+ username_pattern = "bot" # Placeholder
187172
188- mentions .append (mention )
173+ mentions : list [ParsedMention ] = []
174+ potential_mentions = extract_all_mentions (comment )
175+ for i , raw_mention in enumerate (potential_mentions ):
176+ if not matches_pattern (raw_mention .username , username_pattern ):
177+ continue
178+
179+ text = extract_mention_text (comment , i , potential_mentions , raw_mention .end )
180+ line_info = LineInfo .for_mention_in_comment (comment , raw_mention .position )
181+
182+ mentions .append (
183+ ParsedMention (
184+ username = raw_mention .username ,
185+ text = text ,
186+ position = raw_mention .position ,
187+ line_info = line_info ,
188+ match = None ,
189+ previous_mention = None ,
190+ next_mention = None ,
191+ )
192+ )
189193
194+ # link mentions
190195 for i , mention in enumerate (mentions ):
191196 if i > 0 :
192197 mention .previous_mention = mentions [i - 1 ]
@@ -202,11 +207,10 @@ class Comment:
202207 author : str
203208 created_at : datetime
204209 url : str
205- mentions : list [Mention ]
210+ mentions : list [ParsedMention ]
206211
207212 @property
208213 def line_count (self ) -> int :
209- """Number of lines in the comment."""
210214 if not self .body :
211215 return 0
212216 return len (self .body .splitlines ())
@@ -224,8 +228,7 @@ def from_event(cls, event: sansio.Event) -> Comment:
224228 if not comment_data :
225229 raise ValueError (f"Cannot extract comment from event type: { event .event } " )
226230
227- created_at_str = comment_data .get ("created_at" , "" )
228- if created_at_str :
231+ if created_at_str := comment_data .get ("created_at" , "" ):
229232 # GitHub timestamps are in ISO format: 2024-01-01T12:00:00Z
230233 created_at_aware = datetime .fromisoformat (
231234 created_at_str .replace ("Z" , "+00:00" )
@@ -253,9 +256,9 @@ def from_event(cls, event: sansio.Event) -> Comment:
253256
254257
255258@dataclass
256- class MentionEvent :
259+ class Mention :
257260 comment : Comment
258- triggered_by : Mention
261+ mention : ParsedMention
259262 scope : MentionScope | None
260263
261264 @classmethod
@@ -271,7 +274,7 @@ def from_event(
271274 if scope is not None and event_scope != scope :
272275 return
273276
274- mentions = parse_mentions_for_username (event , username )
277+ mentions = extract_mentions_from_event (event , username )
275278 if not mentions :
276279 return
277280
@@ -280,13 +283,36 @@ def from_event(
280283
281284 for mention in mentions :
282285 if pattern is not None :
283- match = check_pattern_match (mention .text , pattern )
286+ match = get_match (mention .text , pattern )
284287 if not match :
285288 continue
286289 mention .match = match
287290
288291 yield cls (
289292 comment = comment ,
290- triggered_by = mention ,
293+ mention = mention ,
291294 scope = event_scope ,
292295 )
296+
297+
298+ def matches_pattern (text : str , pattern : str | re .Pattern [str ] | None ) -> bool :
299+ match pattern :
300+ case None :
301+ return True
302+ case re .Pattern ():
303+ return pattern .fullmatch (text ) is not None
304+ case str ():
305+ return text .strip ().lower () == pattern .strip ().lower ()
306+
307+
308+ def get_match (text : str , pattern : str | re .Pattern [str ] | None ) -> re .Match [str ] | None :
309+ match pattern :
310+ case None :
311+ return re .match (r"(.*)" , text , re .IGNORECASE | re .DOTALL )
312+ case re .Pattern ():
313+ # Use the pattern directly, preserving its flags
314+ return pattern .match (text )
315+ case str ():
316+ # For strings, do exact match (case-insensitive)
317+ # Escape the string to treat it literally
318+ return re .match (re .escape (pattern ), text , re .IGNORECASE )
0 commit comments