@@ -411,9 +411,11 @@ def _get_line_meta(self, line_item: list[str]) -> dict[str, object]:
411411 def parse_issue_header (
412412 self , issues : list [str ], total_lines : int = 20
413413 ) -> dict [str , str ]:
414- """
415- A function that parses through the header of an issue.
416- It returns
414+ """Parses through all headers comments of selected reviews and returns
415+ metadata
416+
417+ This will go through all reviews and return:
418+ GitHub Issue meta: "created_at", "updated_at", "closed_at"
417419
418420 Parameters
419421 ----------
@@ -435,32 +437,72 @@ def parse_issue_header(
435437 meta_dates = ["created_at" , "updated_at" , "closed_at" ]
436438
437439 review = {}
440+ review_final = {}
438441 for issue in issues :
439442 pkg_name , body_data = self .parse_comment (issue )
440443 if not pkg_name :
441444 continue
442- # Index of 15 should include date accepted in the review meta
445+
443446 review [pkg_name ] = self .get_issue_meta (body_data , total_lines )
444447 # Add issue open and close date to package meta from GH response
445448 # Date cleaning happens via pydantic validator not here
446449 for a_date in meta_dates :
447450 review [pkg_name ][a_date ] = issue [a_date ]
448- # Get categories and issue review link
449- review [pkg_name ]["categories" ] = self .get_categories (body_data )
451+
450452 review [pkg_name ]["issue_link" ] = issue ["url" ].replace (
451453 "https://api.github.com/repos/" , "https://github.com/"
452454 )
453455
454- review_clean = {
455- key : value
456- for key , value in review [pkg_name ].items ()
457- if not key .startswith ("##" )
458- and not key .startswith ("---" )
459- and not key .startswith ("-_[x]_i_agree" )
456+ # Get categories and issue review link
457+ review [pkg_name ]["categories" ] = self .get_categories (
458+ body_data , "## Scope" , 10
459+ )
460+ # NOTE: right now the numeric value is hard coded based on the
461+ # number of partners listed in the issue. so it assumes there is
462+ # 3 partners. but that might not always be the case so we should
463+ # add a check in case there are fewer or more partners (if someone
464+ # modifies the template which they tend to do OR if it's an older
465+ # template)
466+ # TODO: rather than exact match have the line start_with the string
467+ review [pkg_name ]["partners" ] = self .get_categories (
468+ body_data , "## Community Partnerships" , 3
469+ )
470+ # review[pkg_name]["domains"] = self.get_categories(body_data,
471+ # '## Domains',
472+ # 3)
473+
474+ # Only return keys for metadata that we need
475+ final_keys = [
476+ "submitting_author" ,
477+ "all_current_maintainers" ,
478+ "package_name" ,
479+ "one-line_description_of_package" ,
480+ "repository_link" ,
481+ "version_submitted" ,
482+ "editor" ,
483+ "reviewer_1" ,
484+ "reviewer_2" ,
485+ "archive" ,
486+ "version_accepted" ,
487+ "joss_doi" ,
488+ "date_accepted" ,
489+ "categories" ,
490+ "partners" ,
491+ "domain" ,
492+ "created_at" ,
493+ "updated_at" ,
494+ "closed_at" ,
495+ "issue_link" ,
496+ "categories" ,
497+ ]
498+
499+ review_final [pkg_name ] = {
500+ key : review [pkg_name ][key ]
501+ for key in final_keys
502+ if key in review [pkg_name ].keys ()
460503 }
461- review [pkg_name ] = review_clean
462504
463- return review
505+ return review_final
464506
465507 def get_issue_meta (
466508 self ,
@@ -669,8 +711,10 @@ def get_last_commit(self, repo: str) -> str:
669711
670712 return date
671713
714+ # This works - i could just make it more generic and remove fmt since it's
715+ # not used and replace it with a number of values and a test string
672716 def get_categories (
673- self , issue_list : list [list [str ]], fmt : bool = True
717+ self , issue_list : list [list [str ]], section_str : str , num_vals : int
674718 ) -> list [str ]:
675719 """Parse through a pyOS review issue and grab categories associated
676720 with a package
@@ -686,11 +730,12 @@ def get_categories(
686730 required for the website.
687731 """
688732 # Find the starting index of the category section
733+ # This will be more robust if we use starts_with rather than in i think
689734 try :
690735 index = next (
691736 i
692737 for i , sublist in enumerate (issue_list )
693- if "## Scope" in sublist
738+ if section_str in sublist
694739 )
695740 # Iterate from scope index to first line starting with " - ["
696741 # To find list of category check boxes
@@ -699,10 +744,11 @@ def get_categories(
699744 cat_index = i
700745 break
701746 except StopIteration :
702- print ("'## Scope' not found in the list." )
747+ print (section_str , " not found in the list." )
748+ return None
703749
704750 # Get checked categories for package
705- cat_list = issue_list [cat_index : cat_index + 10 ]
751+ cat_list = issue_list [cat_index : cat_index + num_vals ]
706752 selected = [
707753 item [0 ] for item in cat_list if re .search (r"- \[[xX]\] " , item [0 ])
708754 ]
0 commit comments