Skip to content

Commit bdf72ee

Browse files
webbnhralphbean
andauthored
Add cache for linking downstream to upstream issues (#398)
* Correct default value for issue_updates to empty list * Reduce Snowflake log noise * Streamline _find_comment_in_jira() * Make signature and uses of check_comments_for_duplicate() more graceful * Refactor _matching_jira_issue_query() * Cache the upstream-downstream issue matches * Add cache refresh after issue update Refresh the local cache after updating the issue. * Appease black * Cache the Jira issue key instead of the ORM object --------- Co-authored-by: Ralph Bean <rbean@redhat.com>
1 parent 8c7985e commit bdf72ee

File tree

3 files changed

+186
-99
lines changed

3 files changed

+186
-99
lines changed

sync2jira/downstream_issue.py

Lines changed: 80 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,11 @@
4141
UPDATE_DATE = datetime(2019, 7, 9, 18, 18, 36, 480291, tzinfo=timezone.utc)
4242

4343
log = logging.getLogger("sync2jira")
44+
logging.getLogger("snowflake.connector").setLevel(logging.WARNING)
4445

4546
remote_link_title = "Upstream issue"
4647
duplicate_issues_subject = "FYI: Duplicate Sync2jira Issues"
4748

48-
jira_cache = {}
4949
SNOWFLAKE_QUERY = f"""
5050
SELECT
5151
CONCAT(p.PKEY, '-', a.issue_key) AS issue_key,
@@ -70,6 +70,22 @@
7070
GH_URL_PATTERN = re.compile(r"https://github\.com/[^/]+/[^/]+/(issues|pull)/\d+")
7171

7272

73+
class UrlCache(dict):
74+
"""A dict-like object, intended to be used as a cache, which contains a
75+
limited number of entries -- excess entries are deleted in FIFO order.
76+
"""
77+
78+
MAX_SIZE = 1000
79+
80+
def __setitem__(self, key, value):
81+
while len(self) >= self.MAX_SIZE:
82+
del self[next(iter(self))]
83+
super().__setitem__(key, value)
84+
85+
86+
jira_cache = UrlCache()
87+
88+
7389
def validate_github_url(url):
7490
"""URL validation"""
7591
return bool(GH_URL_PATTERN.fullmatch(url))
@@ -225,73 +241,73 @@ def _matching_jira_issue_query(client, issue, config):
225241
:param jira.client.JIRA client: JIRA client
226242
:param sync2jira.intermediary.Issue issue: Issue object
227243
:param Dict config: Config dict
228-
:param Bool free: Free tag to add 'statusCategory != Done' to query
229244
:returns: results: Returns a list of matching JIRA issues if any are found
230245
:rtype: List
231246
"""
232-
# Searches for any remote link to the issue.url
233247

234-
# Query the JIRA client and store the results
235-
results = execute_snowflake_query(issue)
236-
results_of_query = []
237-
if len(results) > 0:
248+
# If there's an entry for the issue in our cache, fetch the issue key from it.
249+
if result := jira_cache.get(issue.url):
250+
issue_keys = [result]
251+
else:
252+
# Search for Jira issues with a "remote link" to the issue.url;
253+
# if we find none, return an empty list.
254+
results = execute_snowflake_query(issue)
255+
if not results:
256+
return []
257+
258+
# From the results returned by Snowflake, make an iterable of the
259+
# issues' keys.
238260
issue_keys = (row[0] for row in results)
239-
jql = f"key in ({','.join(issue_keys)})"
240-
results_of_query = client.search_issues(jql)
241-
if len(results_of_query) > 1:
242-
final_results = []
261+
262+
# Fetch the Jira issue objects using the key list.
263+
jql = f"key in ({','.join(issue_keys)})"
264+
results = client.search_issues(jql)
265+
266+
# If there is more than one issue, remove duplicates and filter the list
267+
# down to one.
268+
if len(results) > 1:
269+
filtered_results = []
243270
# TODO: there is pagure-specific code in here that handles the case where a dropped issue's URL is
244271
# re-used by an issue opened later. i.e. pagure re-uses IDs
245-
for result in results_of_query:
272+
for result in results:
246273
description = result.fields.description or ""
247274
summary = result.fields.summary or ""
248-
if issue.id in description or issue.title == summary:
249-
search = check_comments_for_duplicate(
250-
client, result, find_username(issue, config)
275+
if (
276+
issue.id in description
277+
or issue.title == summary
278+
or re.search(
279+
r"\[[a-zA-Z0-9!@#$%^&*()_+\-=\[\]{};':\\|,.<>/?]*] "
280+
+ issue.upstream_title,
281+
summary,
251282
)
252-
if search is True:
253-
final_results.append(result)
254-
else:
255-
# Else search returned a linked issue
256-
final_results.append(search)
257-
# If that's not the case, check if they have the same upstream title.
258-
# Upstream username/repo can change if repos are merged.
259-
elif re.search(
260-
r"\[[a-zA-Z0-9!@#$%^&*()_+\-=\[\]{};':\\|,.<>/?]*] "
261-
+ issue.upstream_title,
262-
result.fields.summary,
263283
):
264-
search = check_comments_for_duplicate(
265-
client, result, find_username(issue, config)
266-
)
267-
if search is True:
268-
# We went through all the comments and didn't find anything
269-
# that indicated it was a duplicate
270-
log.warning(
271-
"Matching downstream issue %s to upstream issue %s",
272-
result.key,
273-
issue.url,
274-
)
275-
final_results.append(result)
276-
else:
277-
# Else search returned a linked issue
278-
final_results.append(search)
279-
if not final_results:
280-
# Only return the most updated issue
281-
results_of_query.sort(
284+
username = find_username(issue, config)
285+
search = check_comments_for_duplicate(client, result, username)
286+
filtered_results.append(search if search else result)
287+
288+
# Unless the filtering removed _all_ the results, switch the results to
289+
# the filtered results; otherwise, continue with the original list.
290+
if filtered_results:
291+
results = filtered_results
292+
293+
# If there is more than one result, select only the most-recently updated one.
294+
if len(results) > 1:
295+
log.debug(
296+
"Found %i results for query with issue %r",
297+
len(results),
298+
issue.url,
299+
)
300+
results.sort(
282301
key=lambda x: datetime.strptime(
283302
x.fields.updated, "%Y-%m-%dT%H:%M:%S.%f+0000"
284-
)
303+
),
304+
reverse=True, # Biggest (most recent) first
285305
)
286-
final_results.append(results_of_query[0])
306+
results = [results[0]] # A list of one item
287307

288-
# Return the final_results
289-
log.debug(
290-
"Found %i results for query with issue %r", len(final_results), issue.url
291-
)
292-
return final_results
293-
else:
294-
return results_of_query
308+
# Cache the result for next time and return it.
309+
jira_cache[issue.url] = results[0].key
310+
return results
295311

296312

297313
def find_username(_issue, config):
@@ -314,16 +330,15 @@ def check_comments_for_duplicate(client, result, username):
314330
:param jira.client.JIRA client: JIRA client
315331
:param jira.resource.Issue result: JIRA issue
316332
:param string username: Username of JIRA user
317-
:returns: True if duplicate comment was not found or JIRA issue if \
318-
we were able to find it
319-
:rtype: Bool or jira.resource.Issue
333+
:returns: duplicate JIRA issue or None
334+
:rtype: jira.resource.Issue or None
320335
"""
321336
for comment in client.comments(result):
322337
search = re.search(r"Marking as duplicate of (\w*)-(\d*)", comment.body)
323338
if search and comment.author.name == username:
324339
issue_id = search.groups()[0] + "-" + search.groups()[1]
325340
return client.issue(issue_id)
326-
return True
341+
return None
327342

328343

329344
def _find_comment_in_jira(comment, j_comments):
@@ -335,6 +350,11 @@ def _find_comment_in_jira(comment, j_comments):
335350
:returns: Item/None
336351
:rtype: jira.resource.Comment/None
337352
"""
353+
if comment["date_created"] < UPDATE_DATE:
354+
# If the comment date is prior to the update_date, we should not try to
355+
# touch the comment; return the item as is.
356+
return comment
357+
338358
formatted_comment = _comment_format(comment)
339359
legacy_formatted_comment = _comment_format_legacy(comment)
340360
for item in j_comments:
@@ -350,13 +370,6 @@ def _find_comment_in_jira(comment, j_comments):
350370
item.update(body=formatted_comment)
351371
log.info("Updated one comment")
352372
# Now we can just return the item
353-
return item
354-
else:
355-
# Else they are equal and we can return the item
356-
return item
357-
if comment["date_created"] < UPDATE_DATE:
358-
# If the comment date is prior to the update_date,
359-
# we should not try to touch the comment
360373
return item
361374
return None
362375

@@ -689,6 +702,7 @@ def _create_jira_issue(client, issue, config):
689702
return None
690703

691704
downstream = client.create_issue(**kwargs)
705+
jira_cache[issue.url] = [downstream.key]
692706

693707
# Add values to the Epic link, QA, and EXD-Service fields if present
694708
if (
@@ -875,7 +889,7 @@ def _update_transition(client, existing, issue):
875889
# downstream JIRA ticket
876890

877891
# First get the closed status from the config file
878-
t = filter(lambda d: "transition" in d, issue.downstream.get("issue_updates", {}))
892+
t = filter(lambda d: "transition" in d, issue.downstream.get("issue_updates", []))
879893
closed_status = next(t)["transition"]
880894
if (
881895
closed_status is not True
@@ -1154,7 +1168,7 @@ def _update_tags(updates, existing, issue):
11541168

11551169
def _build_description(issue):
11561170
# Build the description of the JIRA issue
1157-
issue_updates = issue.downstream.get("issue_updates", {})
1171+
issue_updates = issue.downstream.get("issue_updates", [])
11581172
description = ""
11591173
if "description" in issue_updates:
11601174
description = f"Upstream description: {{quote}}{issue.content}{{quote}}"

sync2jira/upstream_issue.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ def add_project_values(issue, upstream, headers, config):
219219
"""
220220
upstream_config = config["sync2jira"]["map"]["github"][upstream]
221221
project_number = upstream_config.get("github_project_number")
222-
issue_updates = upstream_config.get("issue_updates", {})
222+
issue_updates = upstream_config.get("issue_updates", [])
223223
if "github_project_fields" not in issue_updates:
224224
return
225225
issue["storypoints"] = None

0 commit comments

Comments
 (0)