11import re
22import json
3+ import requests
34
45
56class ReadmeFormatException (Exception ):
@@ -15,13 +16,17 @@ def camel_case(s):
1516def expand_special_keys (key , value ):
1617 """Expand special keys into their structured format (affiliation, nameIdentifiers)."""
1718 if key == "affiliation" :
18- return [{"affiliationIdentifier" : value , "affiliationIdentifierScheme" : "ROR" }]
19+ if 'ror.org' not in value :
20+ raise ValueError ('Affiliation Identifier is not a ROR' )
21+ ror = value .split ('ror.org/' )[1 ].split (']' )[0 ]
22+ response = requests .get (f'https://api.ror.org/organizations/{ ror } ' ).json ()
23+ return [{"affiliationIdentifier" : ror , "affiliationIdentifierScheme" : "ROR" ,"name" :response ['name' ]}]
1924 elif key == "nameIdentifiers" :
25+ orcid = value .split ('orcid.org/' )[1 ].split (']' )[0 ]
2026 return [
2127 {
22- "nameIdentifier" : value ,
28+ "nameIdentifier" : orcid ,
2329 "nameIdentifierScheme" : "ORCID" ,
24- "schemeUri" : f"https://orcid.org/{ value } " ,
2530 }
2631 ]
2732 return value
@@ -38,6 +43,12 @@ def parse_readme_to_json(readme_path):
3843 current_section = None
3944 current_object = {}
4045
46+ title_line = lines .pop (0 )
47+ if title_line .startswith ('#' ) == False :
48+ raise ValueError ('README.md needs to start with "# Title"' )
49+ else :
50+ json_data ['titles' ] = [{'title' :title_line .replace ("# " ,"" )}]
51+
4152 section_pattern = re .compile (r"^##\s+(.*)$" )
4253 key_value_pattern = re .compile (r"^-\s+(.*?):\s+(.*)$" )
4354 link_pattern = re .compile (r"\[.*?\]\((.*?)\)" )
@@ -50,7 +61,7 @@ def parse_readme_to_json(readme_path):
5061 elif len (current_object ) == 1 :
5162 key , value = next (iter (current_object .items ()))
5263 if key in ["language" , "publicationYear" , "publisher" , "version" ]:
53- json_data [current_section ]. append ( value )
64+ json_data [current_section ]= value
5465 else :
5566 json_data [current_section ].append (current_object )
5667 else :
@@ -83,7 +94,6 @@ def parse_readme_to_json(readme_path):
8394
8495 if key in ["affiliation" , "nameIdentifiers" ]:
8596 value = expand_special_keys (key , value )
86- print (value )
8797 else :
8898 link_match = link_pattern .search (value )
8999 if link_match :
@@ -110,13 +120,13 @@ def parse_readme_to_json(readme_path):
110120
111121 return json_data
112122
113-
114- readme_path = "/Users/elizabethwon/downloads/ exampleREADME.md"
115- try :
116- json_data = parse_readme_to_json (readme_path )
117- output_json_path = "output1.json"
118- with open (output_json_path , "w" ) as json_file :
119- json .dump (json_data , json_file , indent = 4 )
120- print (f"Converted JSON saved to { output_json_path } " )
121- except ReadmeFormatException as e :
122- print (f"Error parsing README file: { e } " )
123+ if __name__ == '__main__' :
124+ readme_path = "exampleREADME.md"
125+ try :
126+ json_data = parse_readme_to_json (readme_path )
127+ output_json_path = "output1.json"
128+ with open (output_json_path , "w" ) as json_file :
129+ json .dump (json_data , json_file , indent = 4 )
130+ print (f"Converted JSON saved to { output_json_path } " )
131+ except ReadmeFormatException as e :
132+ print (f"Error parsing README file: { e } " )
0 commit comments