4141UPDATE_DATE = datetime (2019 , 7 , 9 , 18 , 18 , 36 , 480291 , tzinfo = timezone .utc )
4242
4343log = logging .getLogger ("sync2jira" )
44+ logging .getLogger ("snowflake.connector" ).setLevel (logging .WARNING )
4445
4546remote_link_title = "Upstream issue"
4647duplicate_issues_subject = "FYI: Duplicate Sync2jira Issues"
4748
48- jira_cache = {}
4949SNOWFLAKE_QUERY = f"""
5050SELECT
5151 CONCAT(p.PKEY, '-', a.issue_key) AS issue_key,
7070GH_URL_PATTERN = re .compile (r"https://github\.com/[^/]+/[^/]+/(issues|pull)/\d+" )
7171
7272
73+ class UrlCache (dict ):
74+ """A dict-like object, intended to be used as a cache, which contains a
75+ limited number of entries -- excess entries are deleted in FIFO order.
76+ """
77+
78+ MAX_SIZE = 1000
79+
80+ def __setitem__ (self , key , value ):
81+ while len (self ) >= self .MAX_SIZE :
82+ del self [next (iter (self ))]
83+ super ().__setitem__ (key , value )
84+
85+
86+ jira_cache = UrlCache ()
87+
88+
7389def validate_github_url (url ):
7490 """URL validation"""
7591 return bool (GH_URL_PATTERN .fullmatch (url ))
@@ -225,73 +241,73 @@ def _matching_jira_issue_query(client, issue, config):
225241 :param jira.client.JIRA client: JIRA client
226242 :param sync2jira.intermediary.Issue issue: Issue object
227243 :param Dict config: Config dict
228- :param Bool free: Free tag to add 'statusCategory != Done' to query
229244 :returns: results: Returns a list of matching JIRA issues if any are found
230245 :rtype: List
231246 """
232- # Searches for any remote link to the issue.url
233247
234- # Query the JIRA client and store the results
235- results = execute_snowflake_query (issue )
236- results_of_query = []
237- if len (results ) > 0 :
248+ # If there's an entry for the issue in our cache, fetch the issue key from it.
249+ if result := jira_cache .get (issue .url ):
250+ issue_keys = [result ]
251+ else :
252+ # Search for Jira issues with a "remote link" to the issue.url;
253+ # if we find none, return an empty list.
254+ results = execute_snowflake_query (issue )
255+ if not results :
256+ return []
257+
258+ # From the results returned by Snowflake, make an iterable of the
259+ # issues' keys.
238260 issue_keys = (row [0 ] for row in results )
239- jql = f"key in ({ ',' .join (issue_keys )} )"
240- results_of_query = client .search_issues (jql )
241- if len (results_of_query ) > 1 :
242- final_results = []
261+
262+ # Fetch the Jira issue objects using the key list.
263+ jql = f"key in ({ ',' .join (issue_keys )} )"
264+ results = client .search_issues (jql )
265+
266+ # If there is more than one issue, remove duplicates and filter the list
267+ # down to one.
268+ if len (results ) > 1 :
269+ filtered_results = []
243270 # TODO: there is pagure-specific code in here that handles the case where a dropped issue's URL is
244271 # re-used by an issue opened later. i.e. pagure re-uses IDs
245- for result in results_of_query :
272+ for result in results :
246273 description = result .fields .description or ""
247274 summary = result .fields .summary or ""
248- if issue .id in description or issue .title == summary :
249- search = check_comments_for_duplicate (
250- client , result , find_username (issue , config )
275+ if (
276+ issue .id in description
277+ or issue .title == summary
278+ or re .search (
279+ r"\[[a-zA-Z0-9!@#$%^&*()_+\-=\[\]{};':\\|,.<>/?]*] "
280+ + issue .upstream_title ,
281+ summary ,
251282 )
252- if search is True :
253- final_results .append (result )
254- else :
255- # Else search returned a linked issue
256- final_results .append (search )
257- # If that's not the case, check if they have the same upstream title.
258- # Upstream username/repo can change if repos are merged.
259- elif re .search (
260- r"\[[a-zA-Z0-9!@#$%^&*()_+\-=\[\]{};':\\|,.<>/?]*] "
261- + issue .upstream_title ,
262- result .fields .summary ,
263283 ):
264- search = check_comments_for_duplicate (
265- client , result , find_username (issue , config )
266- )
267- if search is True :
268- # We went through all the comments and didn't find anything
269- # that indicated it was a duplicate
270- log .warning (
271- "Matching downstream issue %s to upstream issue %s" ,
272- result .key ,
273- issue .url ,
274- )
275- final_results .append (result )
276- else :
277- # Else search returned a linked issue
278- final_results .append (search )
279- if not final_results :
280- # Only return the most updated issue
281- results_of_query .sort (
284+ username = find_username (issue , config )
285+ search = check_comments_for_duplicate (client , result , username )
286+ filtered_results .append (search if search else result )
287+
288+ # Unless the filtering removed _all_ the results, switch the results to
289+ # the filtered results; otherwise, continue with the original list.
290+ if filtered_results :
291+ results = filtered_results
292+
293+ # If there is more than one result, select only the most-recently updated one.
294+ if len (results ) > 1 :
295+ log .debug (
296+ "Found %i results for query with issue %r" ,
297+ len (results ),
298+ issue .url ,
299+ )
300+ results .sort (
282301 key = lambda x : datetime .strptime (
283302 x .fields .updated , "%Y-%m-%dT%H:%M:%S.%f+0000"
284- )
303+ ),
304+ reverse = True , # Biggest (most recent) first
285305 )
286- final_results . append ( results_of_query [ 0 ])
306+ results = [ results [ 0 ]] # A list of one item
287307
288- # Return the final_results
289- log .debug (
290- "Found %i results for query with issue %r" , len (final_results ), issue .url
291- )
292- return final_results
293- else :
294- return results_of_query
308+ # Cache the result for next time and return it.
309+ jira_cache [issue .url ] = results [0 ].key
310+ return results
295311
296312
297313def find_username (_issue , config ):
@@ -314,16 +330,15 @@ def check_comments_for_duplicate(client, result, username):
314330 :param jira.client.JIRA client: JIRA client
315331 :param jira.resource.Issue result: JIRA issue
316332 :param string username: Username of JIRA user
317- :returns: True if duplicate comment was not found or JIRA issue if \
318- we were able to find it
319- :rtype: Bool or jira.resource.Issue
333+ :returns: duplicate JIRA issue or None
334+ :rtype: jira.resource.Issue or None
320335 """
321336 for comment in client .comments (result ):
322337 search = re .search (r"Marking as duplicate of (\w*)-(\d*)" , comment .body )
323338 if search and comment .author .name == username :
324339 issue_id = search .groups ()[0 ] + "-" + search .groups ()[1 ]
325340 return client .issue (issue_id )
326- return True
341+ return None
327342
328343
329344def _find_comment_in_jira (comment , j_comments ):
@@ -335,6 +350,11 @@ def _find_comment_in_jira(comment, j_comments):
335350 :returns: Item/None
336351 :rtype: jira.resource.Comment/None
337352 """
353+ if comment ["date_created" ] < UPDATE_DATE :
354+ # If the comment date is prior to the update_date, we should not try to
355+ # touch the comment; return the item as is.
356+ return comment
357+
338358 formatted_comment = _comment_format (comment )
339359 legacy_formatted_comment = _comment_format_legacy (comment )
340360 for item in j_comments :
@@ -350,13 +370,6 @@ def _find_comment_in_jira(comment, j_comments):
350370 item .update (body = formatted_comment )
351371 log .info ("Updated one comment" )
352372 # Now we can just return the item
353- return item
354- else :
355- # Else they are equal and we can return the item
356- return item
357- if comment ["date_created" ] < UPDATE_DATE :
358- # If the comment date is prior to the update_date,
359- # we should not try to touch the comment
360373 return item
361374 return None
362375
@@ -689,6 +702,7 @@ def _create_jira_issue(client, issue, config):
689702 return None
690703
691704 downstream = client .create_issue (** kwargs )
705+ jira_cache [issue .url ] = [downstream .key ]
692706
693707 # Add values to the Epic link, QA, and EXD-Service fields if present
694708 if (
@@ -875,7 +889,7 @@ def _update_transition(client, existing, issue):
875889 # downstream JIRA ticket
876890
877891 # First get the closed status from the config file
878- t = filter (lambda d : "transition" in d , issue .downstream .get ("issue_updates" , {} ))
892+ t = filter (lambda d : "transition" in d , issue .downstream .get ("issue_updates" , [] ))
879893 closed_status = next (t )["transition" ]
880894 if (
881895 closed_status is not True
@@ -1154,7 +1168,7 @@ def _update_tags(updates, existing, issue):
11541168
11551169def _build_description (issue ):
11561170 # Build the description of the JIRA issue
1157- issue_updates = issue .downstream .get ("issue_updates" , {} )
1171+ issue_updates = issue .downstream .get ("issue_updates" , [] )
11581172 description = ""
11591173 if "description" in issue_updates :
11601174 description = f"Upstream description: {{quote}}{ issue .content } {{quote}}"
0 commit comments