Skip to content

Commit e341f40

Browse files
committed
Fix: cleanup code to add partner element
1 parent d6724e7 commit e341f40

File tree

1 file changed

+64
-18
lines changed

1 file changed

+64
-18
lines changed

src/pyosmeta/parse_issues.py

Lines changed: 64 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -411,9 +411,11 @@ def _get_line_meta(self, line_item: list[str]) -> dict[str, object]:
411411
def parse_issue_header(
412412
self, issues: list[str], total_lines: int = 20
413413
) -> dict[str, str]:
414-
"""
415-
A function that parses through the header of an issue.
416-
It returns
414+
"""Parses through all headers comments of selected reviews and returns
415+
metadata
416+
417+
This will go through all reviews and return:
418+
GitHub Issue meta: "created_at", "updated_at", "closed_at"
417419
418420
Parameters
419421
----------
@@ -435,32 +437,72 @@ def parse_issue_header(
435437
meta_dates = ["created_at", "updated_at", "closed_at"]
436438

437439
review = {}
440+
review_final = {}
438441
for issue in issues:
439442
pkg_name, body_data = self.parse_comment(issue)
440443
if not pkg_name:
441444
continue
442-
# Index of 15 should include date accepted in the review meta
445+
443446
review[pkg_name] = self.get_issue_meta(body_data, total_lines)
444447
# Add issue open and close date to package meta from GH response
445448
# Date cleaning happens via pydantic validator not here
446449
for a_date in meta_dates:
447450
review[pkg_name][a_date] = issue[a_date]
448-
# Get categories and issue review link
449-
review[pkg_name]["categories"] = self.get_categories(body_data)
451+
450452
review[pkg_name]["issue_link"] = issue["url"].replace(
451453
"https://api.github.com/repos/", "https://github.com/"
452454
)
453455

454-
review_clean = {
455-
key: value
456-
for key, value in review[pkg_name].items()
457-
if not key.startswith("##")
458-
and not key.startswith("---")
459-
and not key.startswith("-_[x]_i_agree")
456+
# Get categories and issue review link
457+
review[pkg_name]["categories"] = self.get_categories(
458+
body_data, "## Scope", 10
459+
)
460+
# NOTE: right now the numeric value is hard coded based on the
461+
# number of partners listed in the issue. so it assumes there is
462+
# 3 partners. but that might not always be the case so we should
463+
# add a check in case there are fewer or more partners (if someone
464+
# modifies the template which they tend to do OR if it's an older
465+
# template)
466+
# TODO: rather than exact match have the line start_with the string
467+
review[pkg_name]["partners"] = self.get_categories(
468+
body_data, "## Community Partnerships", 3
469+
)
470+
# review[pkg_name]["domains"] = self.get_categories(body_data,
471+
# '## Domains',
472+
# 3)
473+
474+
# Only return keys for metadata that we need
475+
final_keys = [
476+
"submitting_author",
477+
"all_current_maintainers",
478+
"package_name",
479+
"one-line_description_of_package",
480+
"repository_link",
481+
"version_submitted",
482+
"editor",
483+
"reviewer_1",
484+
"reviewer_2",
485+
"archive",
486+
"version_accepted",
487+
"joss_doi",
488+
"date_accepted",
489+
"categories",
490+
"partners",
491+
"domain",
492+
"created_at",
493+
"updated_at",
494+
"closed_at",
495+
"issue_link",
496+
"categories",
497+
]
498+
499+
review_final[pkg_name] = {
500+
key: review[pkg_name][key]
501+
for key in final_keys
502+
if key in review[pkg_name].keys()
460503
}
461-
review[pkg_name] = review_clean
462504

463-
return review
505+
return review_final
464506

465507
def get_issue_meta(
466508
self,
@@ -669,8 +711,10 @@ def get_last_commit(self, repo: str) -> str:
669711

670712
return date
671713

714+
# This works - i could just make it more generic and remove fmt since it's
715+
# not used and replace it with a number of values and a test string
672716
def get_categories(
673-
self, issue_list: list[list[str]], fmt: bool = True
717+
self, issue_list: list[list[str]], section_str: str, num_vals: int
674718
) -> list[str]:
675719
"""Parse through a pyOS review issue and grab categories associated
676720
with a package
@@ -686,11 +730,12 @@ def get_categories(
686730
required for the website.
687731
"""
688732
# Find the starting index of the category section
733+
# This will be more robust if we use starts_with rather than in i think
689734
try:
690735
index = next(
691736
i
692737
for i, sublist in enumerate(issue_list)
693-
if "## Scope" in sublist
738+
if section_str in sublist
694739
)
695740
# Iterate from scope index to first line starting with " - ["
696741
# To find list of category check boxes
@@ -699,10 +744,11 @@ def get_categories(
699744
cat_index = i
700745
break
701746
except StopIteration:
702-
print("'## Scope' not found in the list.")
747+
print(section_str, " not found in the list.")
748+
return None
703749

704750
# Get checked categories for package
705-
cat_list = issue_list[cat_index : cat_index + 10]
751+
cat_list = issue_list[cat_index : cat_index + num_vals]
706752
selected = [
707753
item[0] for item in cat_list if re.search(r"- \[[xX]\] ", item[0])
708754
]

0 commit comments

Comments
 (0)