Skip to content

Commit 984b767

Browse files
committed
Fix #153 Fix #163
1 parent 31bf319 commit 984b767

File tree

1 file changed

+26
-20
lines changed

1 file changed

+26
-20
lines changed

src/somef/cli.py

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434

3535
import urllib
3636

37+
3738
## Markdown to plain text conversion: begin ##
3839
# code snippet from https://stackoverflow.com/a/54923798
3940
def unmark_element(element, stream=None):
@@ -226,7 +227,7 @@ def get_path(obj, path):
226227
license_info = {}
227228
if 'license' in filtered_resp:
228229
for k in ('name', 'url'):
229-
if k in filtered_resp['license']:
230+
if k in filtered_resp['license']:
230231
license_info[k] = filtered_resp['license'][k]
231232

232233
## If we didn't find it, look for the license
@@ -240,7 +241,8 @@ def get_path(obj, path):
240241
# license_text = license_text_resp.text
241242
license_info['url'] = possible_license_url
242243

243-
filtered_resp['license'] = license_info
244+
if license_info != '':
245+
filtered_resp['license'] = license_info
244246

245247
# get keywords / topics
246248
topics_headers = header
@@ -310,7 +312,7 @@ def get_path(obj, path):
310312
zip_ref.extractall(repo_extract_dir)
311313

312314
repo_folders = os.listdir(repo_extract_dir)
313-
assert(len(repo_folders) == 1)
315+
assert (len(repo_folders) == 1)
314316

315317
repo_dir = os.path.join(repo_extract_dir, repo_folders[0])
316318

@@ -334,7 +336,8 @@ def get_path(obj, path):
334336
else:
335337
docs_path = repo_relative_path + "/" + dirname
336338

337-
docs.append(f"https://github.com/{owner}/{repo_name}/tree/{urllib.parse.quote(repo_ref)}/{docs_path}")
339+
docs.append(
340+
f"https://github.com/{owner}/{repo_name}/tree/{urllib.parse.quote(repo_ref)}/{docs_path}")
338341
print(docs)
339342

340343
print("NOTEBOOKS:")
@@ -343,14 +346,12 @@ def get_path(obj, path):
343346
print("DOCKERFILES:")
344347
print(dockerfiles)
345348

346-
def convert_to_raw_usercontent(partial):
347-
348-
return f"https://raw.githubusercontent.com/{owner}/{repo_name}/{repo_ref}/{urllib.parse.quote(partial)}"
349-
350-
351-
filtered_resp["hasExecutableNotebook"] = [convert_to_raw_usercontent(x) for x in notebooks]
352-
filtered_resp["hasBuildFile"] = [convert_to_raw_usercontent(x) for x in dockerfiles]
353-
filtered_resp["hasDocumentation"] = docs
349+
if len(notebooks) > 0:
350+
filtered_resp["hasExecutableNotebook"] = [convert_to_raw_usercontent(x, owner, repo_name, repo_ref) for x in notebooks]
351+
if len(dockerfiles) > 0:
352+
filtered_resp["hasBuildFile"] = [convert_to_raw_usercontent(x, owner, repo_name, repo_ref) for x in dockerfiles]
353+
if len(docs) > 0:
354+
filtered_resp["hasDocumentation"] = docs
354355

355356
## get releases
356357
releases_list, date = rate_limit_get(repo_api_base_url + "/releases",
@@ -365,6 +366,9 @@ def convert_to_raw_usercontent(partial):
365366
return text, filtered_resp
366367

367368

369+
def convert_to_raw_usercontent(partial, owner, repo_name, repo_ref):
370+
return f"https://raw.githubusercontent.com/{owner}/{repo_name}/{repo_ref}/{urllib.parse.quote(partial)}"
371+
368372
## Function takes readme text as input and divides it into excerpts
369373
## Returns the extracted excerpts
370374
def create_excerpts(string_list):
@@ -558,7 +562,7 @@ def merge(header_predictions, predictions, citations, dois, binder_links, long_t
558562
print("Merge prediction using header information, classifier and bibtex and doi parsers")
559563
if long_title:
560564
predictions['long_title'] = {'excerpt': long_title, 'confidence': [1.0],
561-
'technique': 'Regular expression'}
565+
'technique': 'Regular expression'}
562566
for i in range(len(citations)):
563567
if 'citation' not in predictions.keys():
564568
predictions['citation'] = []
@@ -575,7 +579,7 @@ def merge(header_predictions, predictions, citations, dois, binder_links, long_t
575579
for notebook in binder_links:
576580
# The identifier is in position 1. Position 0 is the badge id, which we don't want to export
577581
predictions['executable_example'].insert(0, {'excerpt': notebook[1], 'confidence': [1.0],
578-
'technique': 'Regular expression'})
582+
'technique': 'Regular expression'})
579583
for headers in header_predictions:
580584
if headers not in predictions.keys():
581585
predictions[headers] = header_predictions[headers]
@@ -596,7 +600,10 @@ def format_output(git_data, repo_data):
596600
repo_data['description'] = []
597601
repo_data['description'].append({'excerpt': git_data[i], 'confidence': [1.0], 'technique': 'GitHub API'})
598602
else:
599-
repo_data[i] = {'excerpt': git_data[i], 'confidence': [1.0], 'technique': 'GitHub API'}
603+
if i == 'hasExecutableNotebook' or i == 'hasBuildFile' or i == 'hasDocumentation':
604+
repo_data[i] = {'excerpt': git_data[i], 'confidence': [1.0], 'technique': 'File Exploration'}
605+
else:
606+
repo_data[i] = {'excerpt': git_data[i], 'confidence': [1.0], 'technique': 'GitHub API'}
600607

601608
return repo_data
602609

@@ -618,16 +625,15 @@ def save_json(git_data, repo_data, outfile):
618625
repo_data = format_output(git_data, repo_data)
619626
save_json_output(repo_data, outfile)
620627

621-
def save_codemeta_output(repo_data, outfile, pretty=False):
622628

629+
def save_codemeta_output(repo_data, outfile, pretty=False):
623630
def data_path(path):
624631
return DataGraph.resolve_path(repo_data, path)
625632

626633
def format_date(date_string):
627634
date_object = date_parser.parse(date_string)
628635
return date_object.strftime("%Y-%m-%d")
629636

630-
631637
latest_release = None
632638
releases = data_path(["releases", "excerpt"])
633639

@@ -659,9 +665,9 @@ def average_confidence(x):
659665
else:
660666
return 0
661667

662-
663668
descriptions = data_path(["description"])
664-
descriptions.sort(key=lambda x: (average_confidence(x) + (1 if x["technique"] == "GitHub API" else 0)), reverse=True)
669+
descriptions.sort(key=lambda x: (average_confidence(x) + (1 if x["technique"] == "GitHub API" else 0)),
670+
reverse=True)
665671
descriptions_text = [x["excerpt"] for x in descriptions]
666672

667673
codemeta_output = {
@@ -792,4 +798,4 @@ def run_cli(*,
792798
out_file.write(data_graph.g.serialize(format=graph_format))
793799

794800
if codemeta_out is not None:
795-
save_codemeta_output(repo_data, codemeta_out, pretty=pretty)
801+
save_codemeta_output(repo_data, codemeta_out, pretty=pretty)

0 commit comments

Comments
 (0)