@@ -33,6 +33,7 @@ def expand_special_keys(key, value):
3333 {
3434 "nameIdentifier" : orcid ,
3535 "nameIdentifierScheme" : "ORCID" ,
36+ "schemeUri" : f"https://orcid.org/{ value } " ,
3637 }
3738 ]
3839 return value
@@ -54,14 +55,21 @@ def parse_readme_to_json(readme_path):
5455 raise ValueError ('README.md needs to start with "# Title"' )
5556 else :
5657 json_data ["titles" ] = [{"title" : title_line .replace ("# " , "" )}]
58+
59+ contributors = []
60+ identifiers = []
61+ item_list = []
5762
5863 section_pattern = re .compile (r"^##\s+(.*)$" )
5964 key_value_pattern = re .compile (r"^-\s+(.*?):\s+(.*)$" )
6065 link_pattern = re .compile (r"\[.*?\]\((.*?)\)" )
6166
6267 for line_number , line in enumerate (lines , 1 ):
6368 if not line .strip ():
64- if current_object and current_section :
69+ if item_list and current_section :
70+ json_data [current_section ] = item_list
71+ item_list = []
72+ elif current_object and current_section :
6573 if current_section == "types" :
6674 json_data [current_section ] = current_object
6775 elif len (current_object ) == 1 :
@@ -70,14 +78,43 @@ def parse_readme_to_json(readme_path):
7078 json_data [current_section ] = value
7179 else :
7280 json_data [current_section ].append (current_object )
81+ elif current_section in ["creators" , "contributors" ]:
82+ contributors .append (current_object )
83+ current_object = {}
84+ elif current_section == "identifiers" :
85+ identifiers .append (current_object )
86+ current_object = {}
7387 else :
7488 json_data [current_section ].append (current_object )
7589 current_object = {}
7690 continue
7791
7892 section_match = section_pattern .match (line )
7993 if section_match :
80- if current_section and current_object :
94+ if item_list :
95+ json_data [current_section ] = item_list
96+ elif current_object :
97+ if current_section in json_data :
98+ if isinstance (json_data [current_section ], list ):
99+ json_data [current_section ].append (current_object )
100+ elif isinstance (json_data [current_section ], dict ):
101+ json_data [current_section ].update (current_object )
102+ else :
103+ json_data [current_section ] = (
104+ [current_object ]
105+ if current_section != "types"
106+ else current_object
107+ )
108+ current_object = {}
109+
110+ elif contributors and current_section in ["creators" , "contributors" ]:
111+ json_data [current_section ] = contributors
112+ contributors = []
113+ elif identifiers and current_section == "identifiers" :
114+ json_data [current_section ] = identifiers
115+ identifiers = []
116+
117+ elif current_section and current_object :
81118 if current_section == "types" :
82119 json_data [current_section ] = current_object
83120 elif len (current_object ) == 1 :
@@ -100,19 +137,38 @@ def parse_readme_to_json(readme_path):
100137
101138 if key in ["affiliation" , "nameIdentifiers" ]:
102139 value = expand_special_keys (key , value )
140+ elif (
141+ key == "nameType"
142+ and current_object
143+ and current_section in ["creators" , "contributors" ]
144+ ):
145+ contributors .append (current_object )
146+ current_object = {}
147+ elif current_section in ["subjects" ]:
148+ item_list .append ({key : value })
149+ elif current_section == "dates" :
150+ if key == "date" :
151+ current_object ["date" ] = value
152+ elif key == "dateType" :
153+ current_object ["dateType" ] = value
154+ item_list .append (current_object )
155+ current_object = {}
103156 else :
104157 link_match = link_pattern .search (value )
105158 if link_match :
106159 value = link_match .group (1 )
107-
108- current_object [key ] = value
160+ current_object [key ] = value
109161
110162 elif line .strip () and not section_match :
111163 raise ReadmeFormatException (
112164 f"Incorrect format detected at line { line_number } : { line } "
113165 )
114166
115- if current_section and current_object :
167+ if contributors and current_section in ["creators" , "contributors" ]:
168+ json_data [current_section ] = contributors
169+ elif identifiers and current_section == "identifiers" :
170+ json_data [current_section ] = identifiers
171+ elif current_section and current_object :
116172 if current_section == "types" :
117173 json_data [current_section ] = current_object
118174 elif len (current_object ) == 1 :
@@ -126,9 +182,8 @@ def parse_readme_to_json(readme_path):
126182
127183 return json_data
128184
129-
130185if __name__ == "__main__" :
131- readme_path = "exampleREADME.md"
186+ readme_path = "/Users/elizabethwon/downloads/ exampleREADME.md"
132187 try :
133188 json_data = parse_readme_to_json (readme_path )
134189 output_json_path = "output1.json"
0 commit comments