|
| 1 | +#!/bin/bash |
| 2 | +# for text similarity task |
| 3 | + |
| 4 | +word2vec=$4 |
| 5 | +if [ "$word2vec" == "word2vec" ]; then |
| 6 | + ./word2vec.sh |
| 7 | +fi |
| 8 | +sick=en/SICK.semeval.txt |
| 9 | + |
| 10 | +# How many processes in parallel you want to run. |
| 11 | +# The maximum number should be inferior to the number of cores in your machine. |
| 12 | +# Default: 3 |
| 13 | +cores=${1:-3} |
| 14 | +# Split of the data (default train): |
| 15 | +# train (4439 problems), |
| 16 | +# test (4906 problems), |
| 17 | +# trial (495 problems). |
| 18 | +templates=$2 |
| 19 | + |
| 20 | +plain_dir=plain |
| 21 | +plain_dir2=plain2 |
| 22 | +results_dir=results |
| 23 | + |
| 24 | +# Usage: |
| 25 | +# |
| 26 | +# ./en/emnlp2017exp.sh 3 en/semantic_templates_en_event_sts.yaml (word2vec) |
| 27 | +# |
| 28 | + |
| 29 | +# Extract training and test data from SICK dataset, removing the header line. |
| 30 | +if [ ! -d ${plain_dir} ]; then |
| 31 | + mkdir -p ${plain_dir} |
| 32 | +fi |
| 33 | + |
| 34 | +#echo "Extracting problems from the SICK file." |
| 35 | +#tail -n +2 $sick | \ |
| 36 | +#tr -d '\r' | \ |
| 37 | +#awk -F'\t' \ |
| 38 | +# '{pair_id=$1; |
| 39 | +# sub(/\.$/,"",$2); |
| 40 | +# sub(/\.$/,"",$3); |
| 41 | +# premise=$2; |
| 42 | +# conclusion=$3; |
| 43 | +# if($4 == "CONTRADICTION"){ |
| 44 | +# judgement="no"; |
| 45 | +# } else if ($4 == "ENTAILMENT") { |
| 46 | +# judgement="yes"; |
| 47 | +# } else if ($4 == "NEUTRAL") { |
| 48 | +# judgement="unknown"; |
| 49 | +# } |
| 50 | +# set=$12; |
| 51 | +# printf "%s.\n%s.\n", premise, conclusion > "en_plain/sick_"tolower(set)"_"pair_id".txt"; |
| 52 | +# printf "%s\n", judgement > "en_plain/sick_"tolower(set)"_"pair_id".answer"; |
| 53 | +# }' |
| 54 | + |
| 55 | +# Create files that list all filenames of training, testing and trial. |
| 56 | +for dset in {train,test,trial}; do |
| 57 | + ls -v ${plain_dir}/sick_${dset}_*.txt > ${plain_dir}/sick_${dset}.files |
| 58 | +done |
| 59 | +# Split filename entries into several files, for parallel processing: |
| 60 | +ntrain=`cat ${plain_dir}/sick_train.files | wc -l` |
| 61 | +ntest=`cat ${plain_dir}/sick_test.files | wc -l` |
| 62 | +ntrial=`cat ${plain_dir}/sick_trial.files | wc -l` |
| 63 | +train_lines_per_split=`python -c "from math import ceil; print(int(ceil(float(${ntrain})/${cores})))"` |
| 64 | +test_lines_per_split=`python -c "from math import ceil; print(int(ceil(float(${ntest})/${cores})))"` |
| 65 | +trial_lines_per_split=`python -c "from math import ceil; print(int(ceil(float(${ntrial})/${cores})))"` |
| 66 | + |
| 67 | +rm ${plain_dir}/sick_{train,test,trial}.files_?? |
| 68 | +split -l $train_lines_per_split ${plain_dir}/sick_train.files ${plain_dir}/sick_train.files_ |
| 69 | +split -l $test_lines_per_split ${plain_dir}/sick_test.files ${plain_dir}/sick_test.files_ |
| 70 | +split -l $trial_lines_per_split ${plain_dir}/sick_trial.files ${plain_dir}/sick_trial.files_ |
| 71 | + |
| 72 | +# Copy a coq static library and compile it |
| 73 | +cp en/coqlib_sick.v coqlib.v |
| 74 | +coqc coqlib.v |
| 75 | +cp en/tactics_coq_sick.txt tactics_coq.txt |
| 76 | + |
| 77 | +for dataset in {train,test,trial}; do |
| 78 | + # Run pipeline for each entailment problem. |
| 79 | + for ff in ${plain_dir}/sick_${dataset}.files_??; do |
| 80 | + for f in `cat ${ff}`; do |
| 81 | + ./en/similarity_en_mp_any.sh $f $templates $word2vec; |
| 82 | + done & |
| 83 | + done |
| 84 | + |
| 85 | + # Wait for the parallel processes to finish. |
| 86 | + wait |
| 87 | + |
| 88 | + total=0 |
| 89 | + correct=0 |
| 90 | + for f in ./${plain_dir2}/sick_${dataset}_*.answer; do |
| 91 | + let total++ |
| 92 | + base_filename=${f##*/} |
| 93 | + sys_filename=./${results_dir}/${base_filename} |
| 94 | + gold_answer=`head -1 $f` |
| 95 | + if [ ! -e ${sys_filename} ]; then |
| 96 | + sys_answer="unknown" |
| 97 | + else |
| 98 | + sys_answer=`head -1 ${sys_filename}` |
| 99 | + fi |
| 100 | + echo -e $f"\t"$gold_answer"\t"$sys_answer |
| 101 | + done |
| 102 | + |
| 103 | + # Print a summary (precision, recall, f-score) of the errors at individual problems, |
| 104 | + # per problem category and a global score. |
| 105 | + echo "Evaluating." |
| 106 | + echo "<!doctype html> |
| 107 | + <html lang='en'> |
| 108 | + <head> |
| 109 | + <meta charset='UTF-8'> |
| 110 | + <title>Evaluation results of "$category_templates"</title> |
| 111 | + <style> |
| 112 | + body { |
| 113 | + font-size: 1.5em; |
| 114 | + } |
| 115 | + </style> |
| 116 | + </head> |
| 117 | + <body> |
| 118 | + <table border='1'> |
| 119 | + <tr> |
| 120 | + <td>sick problem</td> |
| 121 | + <td>gold answer</td> |
| 122 | + <td>system answer</td> |
| 123 | + <td>proving time</td> |
| 124 | + </tr>" > $results_dir/main_${dataset}.html |
| 125 | + total_observations=0 |
| 126 | + correct_recognitions=0 |
| 127 | + attempts=0 |
| 128 | + total_proving_time=0 |
| 129 | + red_color="rgb(255,0,0)" |
| 130 | + green_color="rgb(0,255,0)" |
| 131 | + white_color="rgb(255,255,255)" |
| 132 | + gray_color="rgb(136,136,136)" |
| 133 | + for gold_filename in `ls -v ${plain_dir2}/sick_${dataset}_*.answer`; do |
| 134 | + base_filename=${gold_filename##*/} # this line obtains the filename, without the directory path. |
| 135 | + system_filename=${results_dir}/${base_filename/.txt/.answer} |
| 136 | + gold_answer=`cat $gold_filename` |
| 137 | + system_answer=`cat $system_filename` |
| 138 | + time_filename=${results_dir}/${base_filename/.answer/.time} |
| 139 | + proving_time=`cat $time_filename` |
| 140 | + total_proving_time=`echo "$total_proving_time + $proving_time" | bc -l` |
| 141 | + total_number=$((total_number + 1)) |
| 142 | + color=$white_color |
| 143 | + if [ "$gold_answer" == "yes" ] || [ "$gold_answer" == "no" ]; then |
| 144 | + total_observations=$((total_observations + 1)) |
| 145 | + if [ "$gold_answer" == "$system_answer" ]; then |
| 146 | + correct_recognitions=$((correct_recognitions + 1)) |
| 147 | + color=$green_color |
| 148 | + else |
| 149 | + color=$red_color |
| 150 | + fi |
| 151 | + if [ "$system_answer" == "yes" ] || [ "$system_answer" == "no" ]; then |
| 152 | + attempts=$((attempts + 1)) |
| 153 | + else |
| 154 | + color=$gray_color |
| 155 | + fi |
| 156 | + fi |
| 157 | + echo ' |
| 158 | + <tr> |
| 159 | + <td><a style="background-color:'$color';" href="'${base_filename/.answer/.html}'">'${base_filename/.answer/}'</a></td> |
| 160 | + <td>'$gold_answer'</td> |
| 161 | + <td>'$system_answer'</td> |
| 162 | + <td>'$proving_time's</td> |
| 163 | + </tr>' >> $results_dir/main_${dataset}.html |
| 164 | + done |
| 165 | + average_proving_time=`echo "scale=2; $total_proving_time / $total_number" | bc -l` |
| 166 | + echo " |
| 167 | + <h4><font color="red">Average proving time: "${average_proving_time}" </font></h4> |
| 168 | + </body> |
| 169 | + </html> |
| 170 | + " >> $results_dir/main_${dataset}.html |
| 171 | +done |
| 172 | + |
| 173 | +if [ "$word2vec" == "word2vec" ]; then |
| 174 | + processid=$(ps ax|grep "word2vec-api.py"|grep -v grep|awk '{print $1}') |
| 175 | + kill $processid |
| 176 | +fi |
| 177 | + |
| 178 | +python scripts/randomforest_all.py |
| 179 | + |
0 commit comments