|
| 1 | +#!/bin/bash |
| 2 | +# |
| 3 | +# Copyright 2016 Pascual Martinez-Gomez |
| 4 | +# |
| 5 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | +# you may not use this file except in compliance with the License. |
| 7 | +# You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, software |
| 12 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | +# See the License for the specific language governing permissions and |
| 15 | +# limitations under the License. |
| 16 | + |
| 17 | +# Script to Recognize Textual Entailment of problems in Japanese, using |
| 18 | +# multiple CCG parsers (Jigg and depccg at the moment). |
| 19 | +# This script receives a file with several sentences (one per line), where all |
| 20 | +# sentences are premises except the last one, which is a conclusion. It returns |
| 21 | +# 'yes' (the premises entail the conclusion), 'no' (there is a contradiction) or |
| 22 | +# 'unknown' (none of the former). |
| 23 | +# You can use it as: |
| 24 | +# |
| 25 | +# ./rte_ja_mp.sh <sentences.txt> <semantic_templates.yaml> |
| 26 | +# |
| 27 | +# E.g. |
| 28 | +# ./rte_ja_mp.sh ja/sample_ja.txt ja/semantic_templates_ja.yaml |
| 29 | + |
| 30 | +USAGE="Usage: ./rte_ja_mp.sh <sentences.txt> <semantic_templates.yaml>" |
| 31 | + |
| 32 | +# Create a file named "parser_location_ja.txt" at the "ja" directory and |
| 33 | +# write a list of CCG parsers installed, as in: |
| 34 | +# $ cat ja/parser_location_ja.txt |
| 35 | +# jigg:/path/to/ccg2lambda/ja/jigg-v-0.4 |
| 36 | +# depccg:/path/to/depccg/build |
| 37 | + |
| 38 | +# Check that the number of arguments is correct. |
| 39 | +if [ "$#" -ne 2 ]; then |
| 40 | + echo "Error: Number of arguments invalid". |
| 41 | + echo $USAGE |
| 42 | + exit 1 |
| 43 | +fi |
| 44 | + |
| 45 | +# This variable contains the filename where the category templates are. |
| 46 | +category_templates=$2 |
| 47 | +if [ ! -f $category_templates ]; then |
| 48 | + echo "Error: File with semantic templates does not exist." |
| 49 | + echo $USAGE |
| 50 | + exit 1 |
| 51 | +fi |
| 52 | + |
| 53 | +# This variable contains the name of the dataset (fracas or jsem). |
| 54 | +sentences_fname=$1 |
| 55 | +sentences_basename=${sentences_fname##*/} |
| 56 | +if [ ! -f $sentences_fname ]; then |
| 57 | + echo "Error: File with plain sentences does not exist." |
| 58 | + echo $USAGE |
| 59 | + exit 1 |
| 60 | +fi |
| 61 | + |
| 62 | +# These variables contain the names of the directories where intermediate |
| 63 | +# results will be written. |
| 64 | +plain_dir="ja_plain" # tokenized sentences. |
| 65 | +parsed_dir="ja_parsed" # parsed sentences into XML or other formats. |
| 66 | +results_dir="ja_results" # HTML semantic outputs, proving results, etc. |
| 67 | +mkdir -p $plain_dir $parsed_dir $results_dir |
| 68 | + |
| 69 | +# Copy the input text to plain_dir |
| 70 | +cp $sentences_fname ${plain_dir}/${sentences_basename} |
| 71 | + |
| 72 | +function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; } |
| 73 | + |
| 74 | +# Set parser locations |
| 75 | +if [ ! -f "ja/parser_location_ja.txt" ]; then |
| 76 | + echo "Error: File ja/parser_location_ja.txt does not exist." |
| 77 | + exit 1 |
| 78 | +fi |
| 79 | +for parser in `cat ja/parser_location_ja.txt`; do |
| 80 | + parser_name=`echo $parser | awk -F':' '{print $1}'` |
| 81 | + parser_dir=`echo $parser | awk -F':' '{print $2}'` |
| 82 | + if [ "${parser_name}" == "jigg" ]; then |
| 83 | + jigg_dir=${parser_dir} |
| 84 | + if [ ! -d "${jigg_dir}/jar" ]; then |
| 85 | + echo "Parser directory does not exist. Exit." |
| 86 | + exit 1 |
| 87 | + fi |
| 88 | + if [ ! -e "${parser_dir}"/jar/ccg-models-*.jar ]; then |
| 89 | + echo "Japanese CCG models not found. Refer to Jigg instructions to download them." |
| 90 | + exit 1 |
| 91 | + fi |
| 92 | + fi |
| 93 | + if [ "${parser_name}" == "depccg" ]; then |
| 94 | + depccg_dir=${parser_dir} |
| 95 | + if [ ! -d "${depccg_dir}" ] || [ ! -e "${depccg_dir}"/src/run.py ]; then |
| 96 | + echo "depccg parser directory incorrect. Exit." |
| 97 | + exit 1 |
| 98 | + fi |
| 99 | + fi |
| 100 | +done |
| 101 | + |
| 102 | +# Set a variable with the command to invoke Jigg |
| 103 | +parser_cmd="java -Xmx4g -cp \"${jigg_dir}/jar/*\" jigg.pipeline.Pipeline \ |
| 104 | + -annotators ssplit,kuromoji,ccg \ |
| 105 | + -ccg.kBest 5 -file" |
| 106 | + |
| 107 | +tagging_cmd="java -Xmx4g -cp \"${jigg_dir}/jar/*\" jigg.pipeline.Pipeline \ |
| 108 | + -annotators ssplit,kuromoji -file" |
| 109 | + |
| 110 | +function parse_jigg() { |
| 111 | + # Parse using jigg. |
| 112 | + base_fname=$1 |
| 113 | + eval $parser_cmd ${plain_dir}/$base_fname \ |
| 114 | + > ${parsed_dir}/${base_fname}.log.std \ |
| 115 | + 2> ${parsed_dir}/${base_fname}.log.err |
| 116 | + mv ${plain_dir}/${base_fname}.xml ${parsed_dir}/${base_fname}.jigg.jigg.xml |
| 117 | +} |
| 118 | + |
| 119 | +function parse_depccg() { |
| 120 | + # Parse using depccg. |
| 121 | + base_fname=$1 |
| 122 | + eval $tagging_cmd ${plain_dir}/$base_fname \ |
| 123 | + > ${parsed_dir}/${base_fname}.log.std \ |
| 124 | + 2> ${parsed_dir}/${base_fname}.log.err |
| 125 | + mv ${plain_dir}/${base_fname}.xml ${parsed_dir}/${base_fname}.tagged.xml |
| 126 | + env PYTHONPATH=$depccg_dir/src:$PYTHONPATH \ |
| 127 | + python ja/rte.py \ |
| 128 | + ${depccg_dir}/../models/ja_headfinal \ |
| 129 | + ${parsed_dir}/${base_fname}.tagged.xml \ |
| 130 | + > ${parsed_dir}/${base_fname}.depccg.jigg.xml |
| 131 | +} |
| 132 | + |
| 133 | +function semantic_parsing() { |
| 134 | + parser=$1 |
| 135 | + sentences_basename=$2 |
| 136 | + python scripts/semparse.py \ |
| 137 | + $parsed_dir/${sentences_basename}.${parser}.jigg.xml \ |
| 138 | + $category_templates \ |
| 139 | + $parsed_dir/${sentences_basename}.${parser}.sem.xml \ |
| 140 | + --arbi-types \ |
| 141 | + 2> $parsed_dir/${sentences_basename}.${parser}.sem.err |
| 142 | +} |
| 143 | + |
| 144 | +function proving() { |
| 145 | + parser=$1 |
| 146 | + sentences_basename=$2 |
| 147 | + start_time=`python -c 'import time; print(time.time())'` |
| 148 | + timeout 100 python scripts/prove.py \ |
| 149 | + ${parsed_dir}/${sentences_basename}.${parser}.sem.xml \ |
| 150 | + --graph_out ${results_dir}/${sentences_basename}.${parser}.html \ |
| 151 | + > ${results_dir}/${sentences_basename}.${parser}.answer \ |
| 152 | + 2> ${results_dir}/${sentences_basename}.${parser}.err |
| 153 | + rte_answer=`cat ${results_dir}/${sentences_basename}.${parser}.answer` |
| 154 | + echo "judging entailment for ${parsed_dir}/${sentences_basename}.${parser}.sem.xml $rte_answer" |
| 155 | + proof_end_time=`python -c 'import time; print(time.time())'` |
| 156 | + proving_time=`echo "${proof_end_time} - ${start_time}" | bc -l | \ |
| 157 | + awk '{printf("%.2f\n",$1)}'` |
| 158 | + echo $proving_time > ${results_dir}/${sentences_basename}.time |
| 159 | +} |
| 160 | + |
| 161 | +function select_answer() { |
| 162 | + parser=$1 |
| 163 | + fname=${results_dir}/${sentences_basename}.${parser}.answer |
| 164 | + if [ ! -e $fname ]; then |
| 165 | + echo "" > $fname |
| 166 | + fi |
| 167 | + fname_answer=`cat ${fname}` |
| 168 | + if [ "current_answer" = "no" ] && [ "$fname_answer" = "yes" ]; then |
| 169 | + current_answer="unknown" |
| 170 | + elif [ "current_answer" = "yes" ] && [ "$fname_answer" = "no" ]; then |
| 171 | + current_answer="unknown" |
| 172 | + elif [ "$fname_answer" = "yes" ]; then |
| 173 | + current_answer="yes" |
| 174 | + prediction_fname=`echo ${fname##*/} | sed 's/.answer//g'` |
| 175 | + elif [ "$fname_answer" = "no" ]; then |
| 176 | + current_answer="no" |
| 177 | + prediction_fname=`echo ${fname##*/} | sed 's/.answer//g'` |
| 178 | + else |
| 179 | + : |
| 180 | + fi |
| 181 | + if [ ! -z "${prediction_fname}" ]; then |
| 182 | + cp ${parsed_dir}/${prediction_fname}.jigg.xml ${parsed_dir}/${sentences_basename}.xml |
| 183 | + cp ${parsed_dir}/${prediction_fname}.sem.xml ${parsed_dir}/${sentences_basename}.sem.xml |
| 184 | + cp ${results_dir}/${prediction_fname}.answer ${results_dir}/${sentences_basename}.answer |
| 185 | + cp ${results_dir}/${prediction_fname}.html ${results_dir}/${sentences_basename}.html |
| 186 | + fi |
| 187 | +} |
| 188 | + |
| 189 | +# Set the current answer |
| 190 | +current_answer="unknown" |
| 191 | +prediction_fname="${sentences_basename}.jigg" |
| 192 | + |
| 193 | +# CCG parsing, semantic parsing and theorem proving |
| 194 | +for parser in `cat ja/parser_location_ja.txt`; do |
| 195 | + parser_name=`echo $parser | awk -F':' '{print $1}'` |
| 196 | + parser_dir=`echo $parser | awk -F':' '{print $2}'` |
| 197 | + if [ ! -e ${parsed_dir}/${sentences_basename}.${parser_name}.jigg.xml ]; then |
| 198 | + echo "${parser_name} parsing ${plain_dir}/${sentences_basename}" |
| 199 | + parse_$parser_name $sentences_basename |
| 200 | + fi |
| 201 | + if [ ! -e ${parsed_dir}/${sentences_basename}.${parser_name}.sem.xml ]; then |
| 202 | + echo "semantic parsing $parsed_dir/${sentences_basename}.${parser_name}.sem.xml" |
| 203 | + semantic_parsing $parser_name $sentences_basename |
| 204 | + fi |
| 205 | + if [ ! -e ${results_dir}/${sentences_basename}.${parser_name}.answer ]; then |
| 206 | + proving $parser_name $sentences_basename |
| 207 | + select_answer ${parser_name} |
| 208 | + fi |
| 209 | +done |
0 commit comments