-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathcombine_citations
More file actions
executable file
·35 lines (27 loc) · 1.06 KB
/
combine_citations
File metadata and controls
executable file
·35 lines (27 loc) · 1.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/bin/bash
# simple shell script to combine multiple citations.CSV files from
# jstor DFR and get rid of any duplicates
#
# works on any number of files passed as arguments
#
# prints the result to STDOUT; you probably want to redirect to a file
#
# NB scrambles jstor's original ordering of files, which is quasi-arbitrary
# (by jstor id) anyway. The output is in order of a shell "sort"
# the standard header line from jstor dfr
header_line="id,doi,title,author,journaltitle,volume,issue,pubdate,pagerange,publisher,type,reviewed-work"
# Prepare a temporary file in /tmp for working
tmp_stem=`basename $0`
tmp_file=`mktemp /tmp/${tmp_stem}.XXXXXX` || exit 1
# take all but the first line of each command line argument
# and print them all into the temporary file
for cit in "$@"; do
tail -q -n +2 $cit >> $tmp_file
done
# print the header
echo $header_line
# grep out blank lines, then sort so we can uniq the results
#
# I wouldn't call this blazingly fast, but for 25000 lines it's only a
# few seconds on my machine, so no worries
grep -v "^$" $tmp_file | sort | uniq