Skip to content

Commit 4e4655d

Browse files
authored
Merge pull request #215 from ErezBinyamin/master
Restructure for text parsing. Faster, more stable, more maintainable
2 parents cb64535 + 8a662d7 commit 4e4655d

File tree

1 file changed

+119
-94
lines changed

1 file changed

+119
-94
lines changed

share/adapters/oeis.sh

Lines changed: 119 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -8,148 +8,173 @@
88
# oeis <sequence ID> <language>
99
# oeis <val_a, val_b, val_c, ...>
1010
oeis() (
11-
local URL='https://oeis.org'
11+
local URL='https://oeis.org/search?q='
1212
local TMP=/tmp/oeis
1313
local DOC=/tmp/oeis/doc.html
14-
local MAX_TERMS=10
14+
local MAX_TERMS_LONG=30
15+
local MAX_TERMS_SHORT=10
1516
mkdir -p $TMP
16-
# -- get_desc --
17-
# @return print description of OEIS sequence
18-
get_desc() {
19-
grep -A 1 '<td valign=top align=left>' $DOC \
20-
| sed '/<td valign=top align=left>/d; /--/d; s/^[ \t]*//; s/<[^>]*>//g;' \
21-
| sed 's/&nbsp;/ /g; s/\&amp;/\&/g; s/&gt;/>/g; s/&lt;/</g; s/&quot;/"/g'
22-
return $?
23-
}
24-
# -- get_seq --
25-
# @param MAX_TERMS
26-
# @return Print the first MAX_TERMS terms of a sequence
27-
get_seq() {
28-
local MAX_TERMS=${1}
29-
grep -o '<tt>.*, .*[0-9]</tt>' $DOC \
30-
| sed 's/<[^>]*>//g' \
31-
| grep -v '[a-z]' \
32-
| grep -v ':' \
33-
| cut -d ',' -f 1-${MAX_TERMS}
34-
return $?
35-
}
36-
# -- parse_code --
37-
# @param GREP_REGEX
38-
# @return Code snippet that corresponds to GREP_REGEX
39-
parse_code() {
40-
local GREP_REGEX="${1}"
41-
cat $DOC \
42-
| tr '\n' '`' \
43-
| grep -o "${GREP_REGEX}" \
44-
| tr '`' '\n' \
45-
| sed 's/^[ \t]*//; s/<[^>]*>//g; /^\s*$/d;' \
46-
| sed 's/&nbsp;/ /g; s/\&amp;/\&/g; s/&gt;/>/g; s/&lt;/</g; s/&quot;/"/g'
47-
return $?
48-
}
17+
rm -f ${TMP}/authors ${TMP}/bibliograpy ${TMP}/section $TMP/code_snippet
4918
# -- MAIN --
5019
# Search sequence by ID (optional language arg)
5120
# . oeis <SEQ_ID>
52-
# . oeis <SEQ_ID> <LANGUAGE>
53-
# . oeis <LANGUAGE> <SEQ_ID>
21+
# . oeis <SEQ_ID> <SECTION>
22+
# . oeis <SECTION> <SEQ_ID>
5423
isNum='^[0-9]+$'
55-
if [ $# -lt 3 ] && [[ ${1:1} =~ $isNum || ${2:1} =~ $isNum || ${1} =~ $isNum || ${2} =~ $isNum ]] && ! echo $1 | grep -q '[0-9]' || ! echo $2 | grep -q '[0-9]'
24+
# Search for specific sequence (and potentially language or :SECTION (list)
25+
if [ $# -ge 1 ] \
26+
&& [[ $(echo $1 | tr -d 'aA') =~ $isNum || $(echo $2 | tr -d 'aA') =~ $isNum ]] \
27+
&& [[ ! $(echo $1 | tr -d 'aA') =~ $isNum || ! $(echo $2 | tr -d 'aA') =~ $isNum ]]
5628
then
5729
# Arg-Parse ID, Generate URL
58-
if echo ${1^^} | grep -q '[B-Z]'
30+
if [[ $(echo $1 | tr -d 'aA') =~ $isNum ]]
5931
then
60-
ID=${2^^}
61-
LANGUAGE=$1
62-
else
6332
ID=${1^^}
64-
LANGUAGE=$2
33+
SECTION=$2
34+
else
35+
ID=${2^^}
36+
SECTION=$1
6537
fi
6638
[[ ${ID:0:1} == 'A' ]] && ID=${ID:1}
6739
ID=$(bc <<< "$ID")
6840
ID="A$(printf '%06d' ${ID})"
69-
URL+="/${ID}"
41+
URL+="id:${ID}&fmt=text"
7042
curl $URL 2>/dev/null > $DOC
71-
# Print Code Sample
72-
if [[ ${LANGUAGE^^} == ':LIST' ]]
43+
# :list available language code_snippets
44+
if [[ ${SECTION^^} == ':LIST' || ${SECTION^^} == ':PROG' ]]
7345
then
74-
rm -f ${TMP}/list
75-
grep -q 'MAPLE' $DOC && printf 'maple\n' >> $TMP/list
76-
grep -q 'MATHEMATICA' $DOC && printf 'mathematica\n' >> $TMP/list
77-
parse_code 'PROG.*CROSSREFS' \
78-
| grep -o '^(.*)' \
79-
| sed 's/ .*//g' \
80-
| tr -d '()' \
81-
| sort -u >> $TMP/list
82-
[ $(wc -c < $TMP/list) -ne 0 ] && cat ${TMP}/list || printf 'No code snippets available.\n'
46+
grep -q '%p' $DOC && echo 'maple' >> $TMP/section
47+
grep -q '%t' $DOC && echo 'mathematica' >> $TMP/section
48+
grep '%o' $DOC \
49+
| grep "${ID} (" \
50+
| sed "s/^.*${ID} (//; s/).*//" \
51+
| awk 'NF == 1' \
52+
>> $TMP/section
53+
[[ -f $TMP/section && $(wc -c < $TMP/section) -ne 0 ]] \
54+
&& cat ${TMP}/section | sort -u \
55+
|| printf 'No code snippets available.\n'
8356
return 0
8457
fi
85-
# Print ID, description, and sequence
58+
# Print ID
8659
printf "ID: ${ID}\n"
87-
get_desc
88-
printf '\n'
89-
get_seq ${MAX_TERMS}
60+
# Print Description (%N)
61+
grep '%N' $DOC | sed "s/^.*${ID} //"
9062
printf '\n'
63+
# Print Sequence (Three sections %S %T nd %U)
64+
grep '%S' $DOC | sed "s/^.*${ID} //" | tr -d '\n' > $TMP/seq
65+
grep '%T' $DOC | sed "s/^.*${ID} //" | tr -d '\n' >> $TMP/seq
66+
grep '%U' $DOC | sed "s/^.*${ID} //" | tr -d '\n' >> $TMP/seq
67+
cat $TMP/seq \
68+
| cut -d ',' -f 1-${MAX_TERMS_LONG} \
69+
| sed 's/,/, /g; s/$/ .../'
70+
# Generate code snippet (%p, %t, %o) (maple, mathematica, prog sections)
9171
if [ $# -gt 1 ]
9272
then
93-
if [[ ${LANGUAGE^^} == 'MAPLE' ]] && grep -q 'MAPLE' $DOC
73+
printf "\n\n"
74+
# MAPLE section (%p)
75+
if [[ ${SECTION^^} == 'MAPLE' ]] && grep -q '%p' $DOC
9476
then
95-
GREP_REGEX='MAPLE.*CROSSREFS'
96-
grep -q 'PROG' $DOC && GREP_REGEX='MAPLE.*PROG'
97-
grep -q 'MATHEMATICA' $DOC && GREP_REGEX='MAPLE.*MATHEMATICA'
98-
parse_code "${GREP_REGEX}" \
99-
| sed 's/MAPLE/(MAPLE)/; /MATHEMATICA/d; /PROG/d; /CROSSREFS/d' \
100-
> ${TMP}/code_snippet
101-
elif [[ ${LANGUAGE^^} == 'MATHEMATICA' ]] && grep -q 'MATHEMATICA' $DOC
77+
grep '%p' $DOC | sed "s/^.*${ID} //" > $TMP/code_snippet
78+
# MATHEMATICA section (%t)
79+
elif [[ ${SECTION^^} == 'MATHEMATICA' ]] && grep -q '%t' $DOC
80+
then
81+
grep '%t' $DOC | sed "s/^.*${ID} //" > $TMP/code_snippet
82+
# PROG section (%o)
83+
elif grep -qi '%o' $DOC && grep -qi $SECTION $DOC
10284
then
103-
GREP_REGEX='MATHEMATICA.*CROSSREFS'
104-
grep -q 'PROG' $DOC && GREP_REGEX='MATHEMATICA.*PROG'
105-
parse_code "${GREP_REGEX}" \
106-
| sed 's/MATHEMATICA/(MATHEMATICA)/; /PROG/d; /CROSSREFS/d' \
107-
> ${TMP}/code_snippet
108-
else
109-
# PROG section contains more code samples (Non Mathematica or Maple)
110-
parse_code 'PROG.*CROSSREFS' \
111-
| sed '/PROG/d; /CROSSREFS/d' \
112-
> ${TMP}/prog
11385
# Print out code sample for specified language
114-
rm -f ${TMP}/code_snippet
115-
awk -v tgt="${LANGUAGE^^}" -F'[()]' '/^\(/{f=(tgt==toupper($2))} f' ${TMP}/prog > ${TMP}/code_snippet
86+
grep '%o' $DOC \
87+
| sed "s/%o ${ID} //" \
88+
| awk -v tgt="${SECTION^^}" -F'[()]' '{act=$2} sub(/^\([^()]+\) */,""){f=(tgt==toupper(act))} f' \
89+
> ${TMP}/code_snippet
11690
fi
11791
# Print code snippet with 4-space indent to enable colorization
118-
if [ $(wc -c < $TMP/code_snippet) -ne 0 ]
92+
if [[ -f $TMP/code_snippet && $(wc -c < $TMP/code_snippet) -ne 0 ]]
11993
then
120-
printf "${LANGUAGE}"
94+
# Get authors
95+
cat ${TMP}/code_snippet \
96+
| grep -o ' _[A-Z].* [A-Z].*_, [A-Z].*[0-9]' \
97+
| sort -u \
98+
> ${TMP}/authors
99+
i=1
100+
# Replace authors with numbers
101+
while read author
102+
do
103+
author=$(<<<"$author" sed 's/[]\\\*\(\.[]/\\&/g')
104+
sed -i "s|${author}|[${i}]|" ${TMP}/code_snippet
105+
echo "[${i}] [${author}]" | tr -d '_' >> ${TMP}/bibliograpy
106+
let i++
107+
done <${TMP}/authors
108+
# Print snippet
121109
cat ${TMP}/code_snippet \
122-
| sed "s/(${LANGUAGE^^})/\n/; s/(${LANGUAGE})/\n/;" \
123110
| sed 's/^/ /'
124111
else
125-
printf "${LANGUAGE^^} unavailable. Use :list to view available languages.\n"
112+
printf "${SECTION^^} unavailable. Use :list to view available languages.\n"
126113
fi
127114
fi
128115
# Search unknown sequence
129-
else
116+
elif [ $# -gt 1 ] && ! echo $@ | grep -q -e [a-z] -e [A-Z]
117+
then
130118
# Build URL
131-
URL+="/search?q=signed:$(echo $@ | tr -sc '[:digit:]-' ',')"
119+
URL+="signed:$(echo $@ | tr -sc '[:digit:]-' ',')&fmt=short"
132120
curl $URL 2>/dev/null > $DOC
133121
# Sequence IDs
134-
grep -o '=id:.*&' $DOC \
135-
| sed 's/=id://; s/&//' > $TMP/id
136-
# Descriptions
137-
get_desc > $TMP/desc
138-
# Sequences
139-
get_seq ${MAX_TERMS} > $TMP/seq
140-
# Print data for all
122+
grep -o '"/A[0-9][0-9][0-9][0-9][0-9][0-9]">A[0-9][0-9][0-9][0-9][0-9][0-9]' $DOC \
123+
| sed 's/.*>//' \
124+
> $TMP/id
141125
readarray -t ID < $TMP/id
126+
# Descriptions
127+
grep -A 1 '<td valign=top align=left>' $DOC \
128+
| sed '/--/d; s/<[^>]*>//g; /^\s*$/d; s/^[ \t]*//' \
129+
| sed 's/&nbsp;/ /g; s/\&amp;/\&/g; s/&gt;/>/g; s/&lt;/</g; s/&quot;/"/g' \
130+
> $TMP/desc
142131
readarray -t DESC < $TMP/desc
132+
# Sequences
133+
grep 'style="color:black;font-size:120%' $DOC \
134+
| sed 's/<[^>]*>//g; s/^[ \t]*//' \
135+
| cut -d ',' -f 1-${MAX_TERMS_SHORT} \
136+
| sed 's/,/, /g; s/$/ .../' \
137+
> $TMP/seq
143138
readarray -t SEQ < $TMP/seq
139+
# Print all ID, DESC, SEQ
144140
for i in ${!ID[@]}
145141
do
146142
printf "${ID[$i]}: ${DESC[$i]}\n"
147143
printf "${SEQ[$i]}\n\n"
148144
done
145+
else
146+
printf "
147+
# oeis
148+
#
149+
# The On-Line Encyclopedia of Integer Sequences (OEIS),
150+
# also cited simply as Sloane's, is an online database of integer sequences.
151+
152+
# Find all possible OEIS sequences for some sequence (1,1,1,1...)
153+
curl cheat.sh/oeis/1+1+1+1
154+
155+
# Describe an OEIS sequence (A2)
156+
curl cheat.sh/oeis/A2
157+
158+
# Implementation of the A2 OEIS sequence in Python
159+
curl cheat.sh/oeis/A2/python
160+
161+
# List all available implementations of the A2 OEIS sequence
162+
curl cheat.sh/oeis/A2/:list
163+
"
164+
return 1
149165
fi
150-
grep 'results, too many to show. Please refine your search.' /tmp/oeis/doc.html | sed -e 's/<[^>]*>//g; s/^[ \t]*//'
166+
# Error statements
167+
grep 'results, too many to show. Please refine your search.' $DOC | sed -e 's/<[^>]*>//g; s/^[ \t]*//'
168+
grep -o 'Sorry, but the terms do not match anything in the table.' $DOC
169+
# print bibliography
170+
printf "\n\n"
171+
[ -f ${TMP}/bibliograpy ] && cat ${TMP}/bibliograpy
151172
# Print URL for user
152-
printf "\n[${URL}]\n" | rev | sed 's/,//' | rev
173+
printf "[${URL}]\n" \
174+
| rev \
175+
| sed 's/,//' \
176+
| rev \
177+
| sed 's/&.*/]/'
153178
)
154179
155180
oeis $@

0 commit comments

Comments
 (0)