|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Update and sort the creators list of the zenodo record.""" |
| 3 | +import sys |
| 4 | +from pathlib import Path |
| 5 | +import json |
| 6 | +from fuzzywuzzy import fuzz, process |
| 7 | + |
| 8 | +# These ORCIDs should go last |
| 9 | +CREATORS_LAST = ['Rokem, Ariel', 'Esteban, Oscar'] |
| 10 | +CONTRIBUTORS_LAST = ['Poldrack, Russell A.'] |
| 11 | + |
| 12 | + |
| 13 | +def sort_contributors(entries, git_lines, exclude=None, last=None): |
| 14 | + """Return a list of author dictionaries, ordered by contribution.""" |
| 15 | + last = last or [] |
| 16 | + sorted_authors = sorted(entries, key=lambda i: i['name']) |
| 17 | + |
| 18 | + first_last = [' '.join(val['name'].split(',')[::-1]).strip() |
| 19 | + for val in sorted_authors] |
| 20 | + first_last_excl = [' '.join(val['name'].split(',')[::-1]).strip() |
| 21 | + for val in exclude or []] |
| 22 | + |
| 23 | + unmatched = [] |
| 24 | + author_matches = [] |
| 25 | + position = 1 |
| 26 | + for ele in git_lines: |
| 27 | + matches = process.extract(ele, first_last, scorer=fuzz.token_sort_ratio, |
| 28 | + limit=2) |
| 29 | + # matches is a list [('First match', % Match), ('Second match', % Match)] |
| 30 | + if matches[0][1] > 80: |
| 31 | + val = sorted_authors[first_last.index(matches[0][0])] |
| 32 | + else: |
| 33 | + # skip unmatched names |
| 34 | + if ele not in first_last_excl: |
| 35 | + unmatched.append(ele) |
| 36 | + continue |
| 37 | + |
| 38 | + if val not in author_matches: |
| 39 | + val['position'] = position |
| 40 | + author_matches.append(val) |
| 41 | + position += 1 |
| 42 | + |
| 43 | + names = {' '.join(val['name'].split(',')[::-1]).strip() for val in author_matches} |
| 44 | + for missing_name in first_last: |
| 45 | + if missing_name not in names: |
| 46 | + missing = sorted_authors[first_last.index(missing_name)] |
| 47 | + missing['position'] = position |
| 48 | + author_matches.append(missing) |
| 49 | + position += 1 |
| 50 | + |
| 51 | + all_names = [val['name'] for val in author_matches] |
| 52 | + for last_author in last: |
| 53 | + author_matches[all_names.index(last_author)]['position'] = position |
| 54 | + position += 1 |
| 55 | + |
| 56 | + author_matches = sorted(author_matches, key=lambda k: k['position']) |
| 57 | + |
| 58 | + return author_matches, unmatched |
| 59 | + |
| 60 | + |
| 61 | +def get_git_lines(fname='line-contributors.txt'): |
| 62 | + """Run git-line-summary.""" |
| 63 | + import shutil |
| 64 | + import subprocess as sp |
| 65 | + contrib_file = Path(fname) |
| 66 | + |
| 67 | + lines = [] |
| 68 | + if contrib_file.exists(): |
| 69 | + print('WARNING: Reusing existing line-contributors.txt file.', file=sys.stderr) |
| 70 | + lines = contrib_file.read_text().splitlines() |
| 71 | + |
| 72 | + git_line_summary_path = shutil.which('git-line-summary') |
| 73 | + if not lines and git_line_summary_path: |
| 74 | + print("Running git-line-summary on repo") |
| 75 | + lines = sp.check_output([git_line_summary_path]).decode().splitlines() |
| 76 | + lines = [l for l in lines if "Not Committed Yet" not in l] |
| 77 | + contrib_file.write_text('\n'.join(lines)) |
| 78 | + |
| 79 | + if not lines: |
| 80 | + raise RuntimeError("""\ |
| 81 | +Could not find line-contributors from git repository.%s""" % """ \ |
| 82 | +git-line-summary not found, please install git-extras. """ * (git_line_summary_path is None)) |
| 83 | + return [' '.join(line.strip().split()[1:-1]) for line in lines if '%' in line] |
| 84 | + |
| 85 | + |
| 86 | +if __name__ == '__main__': |
| 87 | + data = get_git_lines() |
| 88 | + |
| 89 | + zenodo_file = Path('.zenodo.json') |
| 90 | + zenodo = json.loads(zenodo_file.read_text()) |
| 91 | + |
| 92 | + creators = json.loads(Path('.maint/developers.json').read_text()) |
| 93 | + zen_creators, miss_creators = sort_contributors( |
| 94 | + creators, data, |
| 95 | + exclude=json.loads(Path('.maint/former.json').read_text()), |
| 96 | + last=CREATORS_LAST) |
| 97 | + contributors = json.loads(Path('.maint/contributors.json').read_text()) |
| 98 | + zen_contributors, miss_contributors = sort_contributors( |
| 99 | + contributors, data, |
| 100 | + exclude=json.loads(Path('.maint/former.json').read_text()), |
| 101 | + last=CONTRIBUTORS_LAST) |
| 102 | + zenodo['creators'] = zen_creators |
| 103 | + zenodo['contributors'] = zen_contributors |
| 104 | + |
| 105 | + print("Some people made commits, but are missing in .maint/ " |
| 106 | + "files: %s." % ', '.join(set(miss_creators).intersection(miss_contributors)), |
| 107 | + file=sys.stderr) |
| 108 | + |
| 109 | + # Remove position |
| 110 | + for creator in zenodo['creators']: |
| 111 | + del creator['position'] |
| 112 | + if isinstance(creator['affiliation'], list): |
| 113 | + creator['affiliation'] = creator['affiliation'][0] |
| 114 | + |
| 115 | + for creator in zenodo['contributors']: |
| 116 | + creator['type'] = 'Researcher' |
| 117 | + del creator['position'] |
| 118 | + if isinstance(creator['affiliation'], list): |
| 119 | + creator['affiliation'] = creator['affiliation'][0] |
| 120 | + |
| 121 | + zenodo_file.write_text('%s\n' % json.dumps(zenodo, indent=2)) |
0 commit comments