Skip to content

Commit d499b83

Browse files
committed
added a script for RTE with multiple parsers
1 parent abb485b commit d499b83

File tree

1 file changed

+209
-0
lines changed

1 file changed

+209
-0
lines changed

ja/rte_ja_mp.sh

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
#!/bin/bash
2+
#
3+
# Copyright 2016 Pascual Martinez-Gomez
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
# Script to Recognize Textual Entailment of problems in Japanese, using
18+
# multiple CCG parsers (Jigg and depccg at the moment).
19+
# This script receives a file with several sentences (one per line), where all
20+
# sentences are premises except the last one, which is a conclusion. It returns
21+
# 'yes' (the premises entail the conclusion), 'no' (there is a contradiction) or
22+
# 'unknown' (none of the former).
23+
# You can use it as:
24+
#
25+
# ./rte_ja_mp.sh <sentences.txt> <semantic_templates.yaml>
26+
#
27+
# E.g.
28+
# ./rte_ja_mp.sh ja/sample_ja.txt ja/semantic_templates_ja.yaml
29+
30+
USAGE="Usage: ./rte_ja_mp.sh <sentences.txt> <semantic_templates.yaml>"
31+
32+
# Create a file named "parser_location_ja.txt" at the "ja" directory and
33+
# write a list of CCG parsers installed, as in:
34+
# $ cat ja/parser_location_ja.txt
35+
# jigg:/path/to/ccg2lambda/ja/jigg-v-0.4
36+
# depccg:/path/to/depccg/build
37+
38+
# Check that the number of arguments is correct.
39+
if [ "$#" -ne 2 ]; then
40+
echo "Error: Number of arguments invalid".
41+
echo $USAGE
42+
exit 1
43+
fi
44+
45+
# This variable contains the filename where the category templates are.
46+
category_templates=$2
47+
if [ ! -f $category_templates ]; then
48+
echo "Error: File with semantic templates does not exist."
49+
echo $USAGE
50+
exit 1
51+
fi
52+
53+
# This variable contains the name of the dataset (fracas or jsem).
54+
sentences_fname=$1
55+
sentences_basename=${sentences_fname##*/}
56+
if [ ! -f $sentences_fname ]; then
57+
echo "Error: File with plain sentences does not exist."
58+
echo $USAGE
59+
exit 1
60+
fi
61+
62+
# These variables contain the names of the directories where intermediate
63+
# results will be written.
64+
plain_dir="ja_plain" # tokenized sentences.
65+
parsed_dir="ja_parsed" # parsed sentences into XML or other formats.
66+
results_dir="ja_results" # HTML semantic outputs, proving results, etc.
67+
mkdir -p $plain_dir $parsed_dir $results_dir
68+
69+
# Copy the input text to plain_dir
70+
cp $sentences_fname ${plain_dir}/${sentences_basename}
71+
72+
function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
73+
74+
# Set parser locations
75+
if [ ! -f "ja/parser_location_ja.txt" ]; then
76+
echo "Error: File ja/parser_location_ja.txt does not exist."
77+
exit 1
78+
fi
79+
for parser in `cat ja/parser_location_ja.txt`; do
80+
parser_name=`echo $parser | awk -F':' '{print $1}'`
81+
parser_dir=`echo $parser | awk -F':' '{print $2}'`
82+
if [ "${parser_name}" == "jigg" ]; then
83+
jigg_dir=${parser_dir}
84+
if [ ! -d "${jigg_dir}/jar" ]; then
85+
echo "Parser directory does not exist. Exit."
86+
exit 1
87+
fi
88+
if [ ! -e "${parser_dir}"/jar/ccg-models-*.jar ]; then
89+
echo "Japanese CCG models not found. Refer to Jigg instructions to download them."
90+
exit 1
91+
fi
92+
fi
93+
if [ "${parser_name}" == "depccg" ]; then
94+
depccg_dir=${parser_dir}
95+
if [ ! -d "${depccg_dir}" ] || [ ! -e "${depccg_dir}"/src/run.py ]; then
96+
echo "depccg parser directory incorrect. Exit."
97+
exit 1
98+
fi
99+
fi
100+
done
101+
102+
# Set a variable with the command to invoke Jigg
103+
parser_cmd="java -Xmx4g -cp \"${jigg_dir}/jar/*\" jigg.pipeline.Pipeline \
104+
-annotators ssplit,kuromoji,ccg \
105+
-ccg.kBest 5 -file"
106+
107+
tagging_cmd="java -Xmx4g -cp \"${jigg_dir}/jar/*\" jigg.pipeline.Pipeline \
108+
-annotators ssplit,kuromoji -file"
109+
110+
function parse_jigg() {
111+
# Parse using jigg.
112+
base_fname=$1
113+
eval $parser_cmd ${plain_dir}/$base_fname \
114+
> ${parsed_dir}/${base_fname}.log.std \
115+
2> ${parsed_dir}/${base_fname}.log.err
116+
mv ${plain_dir}/${base_fname}.xml ${parsed_dir}/${base_fname}.jigg.jigg.xml
117+
}
118+
119+
function parse_depccg() {
120+
# Parse using depccg.
121+
base_fname=$1
122+
eval $tagging_cmd ${plain_dir}/$base_fname \
123+
> ${parsed_dir}/${base_fname}.log.std \
124+
2> ${parsed_dir}/${base_fname}.log.err
125+
mv ${plain_dir}/${base_fname}.xml ${parsed_dir}/${base_fname}.tagged.xml
126+
env PYTHONPATH=$depccg_dir/src:$PYTHONPATH \
127+
python ja/rte.py \
128+
${depccg_dir}/../models/ja_headfinal \
129+
${parsed_dir}/${base_fname}.tagged.xml \
130+
> ${parsed_dir}/${base_fname}.depccg.jigg.xml
131+
}
132+
133+
function semantic_parsing() {
134+
parser=$1
135+
sentences_basename=$2
136+
python scripts/semparse.py \
137+
$parsed_dir/${sentences_basename}.${parser}.jigg.xml \
138+
$category_templates \
139+
$parsed_dir/${sentences_basename}.${parser}.sem.xml \
140+
--arbi-types \
141+
2> $parsed_dir/${sentences_basename}.${parser}.sem.err
142+
}
143+
144+
function proving() {
145+
parser=$1
146+
sentences_basename=$2
147+
start_time=`python -c 'import time; print(time.time())'`
148+
timeout 100 python scripts/prove.py \
149+
${parsed_dir}/${sentences_basename}.${parser}.sem.xml \
150+
--graph_out ${results_dir}/${sentences_basename}.${parser}.html \
151+
> ${results_dir}/${sentences_basename}.${parser}.answer \
152+
2> ${results_dir}/${sentences_basename}.${parser}.err
153+
rte_answer=`cat ${results_dir}/${sentences_basename}.${parser}.answer`
154+
echo "judging entailment for ${parsed_dir}/${sentences_basename}.${parser}.sem.xml $rte_answer"
155+
proof_end_time=`python -c 'import time; print(time.time())'`
156+
proving_time=`echo "${proof_end_time} - ${start_time}" | bc -l | \
157+
awk '{printf("%.2f\n",$1)}'`
158+
echo $proving_time > ${results_dir}/${sentences_basename}.time
159+
}
160+
161+
function select_answer() {
162+
parser=$1
163+
fname=${results_dir}/${sentences_basename}.${parser}.answer
164+
if [ ! -e $fname ]; then
165+
echo "" > $fname
166+
fi
167+
fname_answer=`cat ${fname}`
168+
if [ "current_answer" = "no" ] && [ "$fname_answer" = "yes" ]; then
169+
current_answer="unknown"
170+
elif [ "current_answer" = "yes" ] && [ "$fname_answer" = "no" ]; then
171+
current_answer="unknown"
172+
elif [ "$fname_answer" = "yes" ]; then
173+
current_answer="yes"
174+
prediction_fname=`echo ${fname##*/} | sed 's/.answer//g'`
175+
elif [ "$fname_answer" = "no" ]; then
176+
current_answer="no"
177+
prediction_fname=`echo ${fname##*/} | sed 's/.answer//g'`
178+
else
179+
:
180+
fi
181+
if [ ! -z "${prediction_fname}" ]; then
182+
cp ${parsed_dir}/${prediction_fname}.jigg.xml ${parsed_dir}/${sentences_basename}.xml
183+
cp ${parsed_dir}/${prediction_fname}.sem.xml ${parsed_dir}/${sentences_basename}.sem.xml
184+
cp ${results_dir}/${prediction_fname}.answer ${results_dir}/${sentences_basename}.answer
185+
cp ${results_dir}/${prediction_fname}.html ${results_dir}/${sentences_basename}.html
186+
fi
187+
}
188+
189+
# Set the current answer
190+
current_answer="unknown"
191+
prediction_fname="${sentences_basename}.jigg"
192+
193+
# CCG parsing, semantic parsing and theorem proving
194+
for parser in `cat ja/parser_location_ja.txt`; do
195+
parser_name=`echo $parser | awk -F':' '{print $1}'`
196+
parser_dir=`echo $parser | awk -F':' '{print $2}'`
197+
if [ ! -e ${parsed_dir}/${sentences_basename}.${parser_name}.jigg.xml ]; then
198+
echo "${parser_name} parsing ${plain_dir}/${sentences_basename}"
199+
parse_$parser_name $sentences_basename
200+
fi
201+
if [ ! -e ${parsed_dir}/${sentences_basename}.${parser_name}.sem.xml ]; then
202+
echo "semantic parsing $parsed_dir/${sentences_basename}.${parser_name}.sem.xml"
203+
semantic_parsing $parser_name $sentences_basename
204+
fi
205+
if [ ! -e ${results_dir}/${sentences_basename}.${parser_name}.answer ]; then
206+
proving $parser_name $sentences_basename
207+
select_answer ${parser_name}
208+
fi
209+
done

0 commit comments

Comments
 (0)